summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Bumiller <e0425955@student.tuwien.ac.at>2012-04-14 21:56:56 (GMT)
committerChristoph Bumiller <e0425955@student.tuwien.ac.at>2012-04-14 22:08:51 (GMT)
commite44089b2f79aa2dcaacf348911433d1e21235c0c (patch)
tree955d621392f0068ef8e3c98dc46195ff3916525e
parent69a921892d2303f1400576aa73980c28880f8654 (diff)
nvc0: add initial support for nve4+ (Kepler) chipsets
Most things that work on Fermi should work on Kepler too. There are a few performance optimizations left to do, like better placement of texture barriers and adding scheduling data to the shader instructions (without them, a thread group will be masked for 32 cycles after each single instruction issue).
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.h2
-rw-r--r--src/gallium/drivers/nouveau/nv_object.xml.h3
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir.h2
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_driver.h2
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp6
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h1
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp1
-rw-r--r--src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp3
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c1
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c7
-rw-r--r--src/gallium/drivers/nv50/nv50_texture.xml.h12
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp12
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp74
-rw-r--r--src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp49
-rw-r--r--src/gallium/drivers/nvc0/nvc0_3d.xml.h13
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.c8
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.h25
-rw-r--r--src/gallium/drivers/nvc0/nvc0_program.c61
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.c174
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.h8
-rw-r--r--src/gallium/drivers/nvc0/nvc0_shader_state.c3
-rw-r--r--src/gallium/drivers/nvc0/nvc0_state_validate.c38
-rw-r--r--src/gallium/drivers/nvc0/nvc0_surface.c2
-rw-r--r--src/gallium/drivers/nvc0/nvc0_tex.c165
-rw-r--r--src/gallium/drivers/nvc0/nvc0_transfer.c159
-rw-r--r--src/gallium/drivers/nvc0/nvc0_winsys.h19
-rw-r--r--src/gallium/drivers/nvc0/nve4_p2mf.xml.h107
-rw-r--r--src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c1
28 files changed, 799 insertions, 159 deletions
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index a278477..4ca286b 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -19,6 +19,8 @@ struct nouveau_screen {
unsigned sysmem_bindings;
+ uint16_t class_3d;
+
struct {
struct nouveau_fence *head;
struct nouveau_fence *tail;
diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h
index d87d713..66ba61b 100644
--- a/src/gallium/drivers/nouveau/nv_object.xml.h
+++ b/src/gallium/drivers/nouveau/nv_object.xml.h
@@ -188,15 +188,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_CLASS 0x00009097
#define NVC1_3D_CLASS 0x00009197
#define NVC8_3D_CLASS 0x00009297
+#define NVE4_3D_CLASS 0x0000a097
#define NV50_2D_CLASS 0x0000502d
#define NVC0_2D_CLASS 0x0000902d
#define NV50_COMPUTE_CLASS 0x000050c0
#define NVA3_COMPUTE_CLASS 0x000085c0
#define NVC0_COMPUTE_CLASS 0x000090c0
#define NVC8_COMPUTE_CLASS 0x000092c0
+#define NVE4_COMPUTE_CLASS 0x0000a0c0
#define NV84_CRYPT_CLASS 0x000074c1
#define BLOB_NVC0_PCOPY1_CLASS 0x000090b8
#define BLOB_NVC0_PCOPY0_CLASS 0x000090b5
+#define NVE4_P2MF_CLASS 0x0000a040
#define NV31_MPEG_CLASS 0x00003174
#define NV84_MPEG_CLASS 0x00008274
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.h b/src/gallium/drivers/nv50/codegen/nv50_ir.h
index 6ec4fc9..c299cab 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir.h
@@ -131,6 +131,7 @@ enum operation
OP_POPCNT, // bitcount(src0 & src1)
OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
OP_EXTBF,
+ OP_TEXBAR,
OP_LAST
};
@@ -141,6 +142,7 @@ enum operation
#define NV50_IR_SUBOP_LDC_ISL 3
#define NV50_IR_SUBOP_SHIFT_WRAP 1
#define NV50_IR_SUBOP_EMU_PRERET 1
+#define NV50_IR_SUBOP_TEXBAR(n) n
enum DataType
{
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
index e734c5b..9632986 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h
@@ -163,6 +163,8 @@ struct nv50_ir_prog_info
uint8_t clipDistanceMask; /* mask of clip distances defined */
uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */
int8_t genUserClip; /* request user clip planes for ClipVertex */
+ uint16_t ucpBase; /* base address for UCPs */
+ uint8_t ucpBinding; /* constant buffer index of UCP data */
uint8_t pointSize; /* output index for PointSize */
uint8_t instanceId; /* system value index of InstanceID */
uint8_t vertexId; /* system value index of VertexID */
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
index 4530dc2..8bd784f 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
@@ -2250,9 +2250,9 @@ Converter::handleUserClipPlanes()
for (c = 0; c < 4; ++c) {
for (i = 0; i < info->io.genUserClip; ++i) {
- Value *ucp;
- ucp = mkLoad(TYPE_F32, mkSymbol(FILE_MEMORY_CONST, 15, TYPE_F32,
- i * 16 + c * 4), NULL);
+ Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpBinding,
+ TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
+ Value *ucp = mkLoad(TYPE_F32, sym, NULL);
if (c == 0)
res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
else
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
index 4ce9deb..93e502e 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_inlines.h
@@ -40,6 +40,7 @@ static inline bool isMemoryFile(DataFile f)
return (f >= FILE_MEMORY_CONST && f <= FILE_MEMORY_LOCAL);
}
+// contrary to asTex(), this will never include SULD/SUST
static inline bool isTextureOp(operation op)
{
return (op >= OP_TEX && op <= OP_TEXCSAA);
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
index 45e61c5..4652bb9 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_print.cpp
@@ -147,6 +147,7 @@ const char *operationStr[OP_LAST + 1] =
"popcnt",
"insbf",
"extbf",
+ "texbar",
"(invalid)"
};
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp
index 27b9610..e3eae69 100644
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target.cpp
@@ -48,7 +48,7 @@ const uint8_t Target::operationSrcNr[OP_LAST + 1] =
1, 2, // SULD, SUST
1, 1, // DFDX, DFDY
1, 2, 2, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
- 2, 3, 2, // POPCNT, INSBF, EXTBF
+ 2, 3, 2, 0, // POPCNT, INSBF, EXTBF, TEXBAR
0
};
@@ -61,6 +61,7 @@ Target *Target::create(unsigned int chipset)
switch (chipset & 0xf0) {
case 0xc0:
case 0xd0:
+ case 0xe0:
return getTargetNVC0(chipset);
case 0x50:
case 0x80:
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 4bcd204..e8118d7 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -594,6 +594,7 @@ nv50_screen_create(struct nouveau_device *dev)
FAIL_SCREEN_INIT("Not a known NV50 chipset: NV%02x\n", dev->chipset);
break;
}
+ screen->base.class_3d = tesla_class;
ret = nouveau_object_new(chan, 0xbeef5097, tesla_class,
NULL, 0, &screen->tesla);
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index bf55442..5b783da 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -465,6 +465,13 @@ nv50_sampler_state_create(struct pipe_context *pipe,
(nv50_tsc_wrap_mode(cso->wrap_t) << 3) |
(nv50_tsc_wrap_mode(cso->wrap_r) << 6));
+ if (nouveau_screen(pipe->screen)->class_3d >= NVE4_3D_CLASS) {
+ if (cso->seamless_cube_map)
+ so->tsc[1] |= NVE4_TSC_1_CUBE_SEAMLESS;
+ if (!cso->normalized_coords)
+ so->tsc[1] |= NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS;
+ }
+
switch (cso->mag_img_filter) {
case PIPE_TEX_FILTER_LINEAR:
so->tsc[1] |= NV50_TSC_1_MAGF_LINEAR;
diff --git a/src/gallium/drivers/nv50/nv50_texture.xml.h b/src/gallium/drivers/nv50/nv50_texture.xml.h
index 08f6efd..2b140be 100644
--- a/src/gallium/drivers/nv50/nv50_texture.xml.h
+++ b/src/gallium/drivers/nv50/nv50_texture.xml.h
@@ -8,12 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- rnndb/nv50_texture.xml ( 7947 bytes, from 2011-07-09 13:43:58)
-- ./rnndb/copyright.xml ( 6452 bytes, from 2011-07-09 13:43:58)
-- ./rnndb/nvchipsets.xml ( 3617 bytes, from 2011-07-09 13:43:58)
-- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-07-09 13:43:58)
+- rnndb/nv50_texture.xml ( 8111 bytes, from 2012-03-31 16:47:45)
+- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nvchipsets.xml ( 3701 bytes, from 2012-03-22 20:40:59)
+- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-08-11 18:25:12)
-Copyright (C) 2006-2011 by the following authors:
+Copyright (C) 2006-2012 by the following authors:
- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
- Ben Skeggs (darktama, darktama_)
- B. R. <koala_br@users.sourceforge.net> (koala_br)
@@ -265,8 +265,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50_TSC_1_MIPF_NONE 0x00000040
#define NV50_TSC_1_MIPF_NEAREST 0x00000080
#define NV50_TSC_1_MIPF_LINEAR 0x000000c0
+#define NVE4_TSC_1_CUBE_SEAMLESS 0x00000200
#define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000
#define NV50_TSC_1_LOD_BIAS__SHIFT 12
+#define NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS 0x02000000
#define NV50_TSC_2 0x00000008
#define NV50_TSC_2_MIN_LOD__MASK 0x00000fff
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
index d4fd4da..912540d 100644
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
@@ -102,6 +102,7 @@ private:
void emitSLCT(const CmpInstruction *);
void emitSELP(const Instruction *);
+ void emitTEXBAR(const Instruction *);
void emitTEX(const TexInstruction *);
void emitTEXCSAA(const TexInstruction *);
void emitTXQ(const TexInstruction *);
@@ -938,6 +939,14 @@ void CodeEmitterNVC0::emitSELP(const Instruction *i)
code[1] |= 1 << 20;
}
+void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
+{
+ code[0] = 0x00000006 | (i->subOp << 26);
+ code[1] = 0xf0000000;
+ emitPredicate(i);
+ emitCondCode(i->predSrc >= 0 ? i->cc : CC_ALWAYS, 5);
+}
+
void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
{
code[0] = 0x00000086;
@@ -1630,6 +1639,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn)
case OP_TXQ:
emitTXQ(insn->asTex());
break;
+ case OP_TEXBAR:
+ emitTEXBAR(insn);
+ break;
case OP_BRA:
case OP_CALL:
case OP_PRERET:
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
index bd33fbf..318d345 100644
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
@@ -117,6 +117,9 @@ NVC0LegalizeSSA::visit(BasicBlock *bb)
class NVC0LegalizePostRA : public Pass
{
+public:
+ NVC0LegalizePostRA(const Program *);
+
private:
virtual bool visit(Function *);
virtual bool visit(BasicBlock *);
@@ -127,8 +130,15 @@ private:
void propagateJoin(BasicBlock *);
LValue *r63;
+
+ const bool needTexBar;
};
+NVC0LegalizePostRA::NVC0LegalizePostRA(const Program *prog)
+ : needTexBar(prog->getTarget()->getChipset() >= 0xe0)
+{
+}
+
bool
NVC0LegalizePostRA::visit(Function *fn)
{
@@ -225,6 +235,12 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
} else
if (i->isNop()) {
bb->remove(i);
+ } else
+ if (needTexBar && isTextureOp(i->op)) {
+ Instruction *bar = new_Instruction(func, OP_TEXBAR, TYPE_NONE);
+ bar->fixed = 1;
+ bar->subOp = 0;
+ bb->insertAfter(i, bar);
} else {
if (i->op != OP_MOV && i->op != OP_PFETCH)
replaceZero(i);
@@ -310,7 +326,61 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
const int dim = i->tex.target.getDim() + i->tex.target.isCube();
const int arg = i->tex.target.getArgCount();
- // generate and move the tsc/tic/array source to the front
+ if (prog->getTarget()->getChipset() >= 0xe0) {
+ if (i->tex.r == i->tex.s) {
+ i->tex.r += 8; // NOTE: offset should probably be a driver option
+ i->tex.s = 0; // only a single cX[] value possible here
+ } else {
+ // TODO: extract handles and use register to select TIC/TSC entries
+ }
+ if (i->tex.target.isArray()) {
+ LValue *layer = new_LValue(func, FILE_GPR);
+ Value *src = i->getSrc(arg - 1);
+ const int sat = (i->op == OP_TXF) ? 1 : 0;
+ DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32;
+ bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat;
+ for (int s = dim; s >= 1; --s)
+ i->setSrc(s, i->getSrc(s - 1));
+ i->setSrc(0, layer);
+ }
+ if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
+ Value *tmp[2];
+ Symbol *bind;
+ Value *rRel = i->getIndirectR();
+ Value *sRel = i->getIndirectS();
+ Value *shCnt = bld.loadImm(NULL, 2);
+
+ if (rRel) {
+ tmp[0] = bld.getScratch();
+ bind = bld.mkSymbol(FILE_MEMORY_CONST, 15, TYPE_U32, i->tex.r * 4);
+ bld.mkOp2(OP_SHL, TYPE_U32, tmp[0], rRel, shCnt);
+ tmp[1] = bld.mkLoad(TYPE_U32, bind, tmp[0]);
+ bld.mkOp2(OP_AND, TYPE_U32, tmp[0], tmp[1],
+ bld.loadImm(tmp[0], 0x00ffffffu));
+ rRel = tmp[0];
+ i->setSrc(i->tex.rIndirectSrc, NULL);
+ }
+ if (sRel) {
+ tmp[0] = bld.getScratch();
+ bind = bld.mkSymbol(FILE_MEMORY_CONST, 15, TYPE_U32, i->tex.s * 4);
+ bld.mkOp2(OP_SHL, TYPE_U32, tmp[0], sRel, shCnt);
+ tmp[1] = bld.mkLoad(TYPE_U32, bind, tmp[0]);
+ bld.mkOp2(OP_AND, TYPE_U32, tmp[0], tmp[1],
+ bld.loadImm(tmp[0], 0xff000000u));
+ sRel = tmp[0];
+ i->setSrc(i->tex.sIndirectSrc, NULL);
+ }
+ bld.mkOp2(OP_OR, TYPE_U32, rRel, rRel, sRel);
+
+ int min = i->tex.rIndirectSrc;
+ if (min < 0 || min > i->tex.sIndirectSrc)
+ min = i->tex.sIndirectSrc;
+ for (int s = min; s >= 1; --s)
+ i->setSrc(s, i->getSrc(s - 1));
+ i->setSrc(0, rRel);
+ }
+ } else
+ // (nvc0) generate and move the tsc/tic/array source to the front
if (dim != arg || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa
@@ -717,7 +787,7 @@ TargetNVC0::runLegalizePass(Program *prog, CGStage stage) const
return pass.run(prog, false, true);
} else
if (stage == CG_STAGE_POST_RA) {
- NVC0LegalizePostRA pass;
+ NVC0LegalizePostRA pass(prog);
return pass.run(prog, false, true);
} else
if (stage == CG_STAGE_SSA) {
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
index 0442562..2aa2005 100644
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
@@ -42,6 +42,7 @@ TargetNVC0::TargetNVC0(unsigned int card)
// Will probably make this nicer once we support subroutines properly,
// i.e. when we have an input IR that provides function declarations.
+// TODO: separate version for nve4+ which doesn't like the 4-byte insn formats
static const uint32_t nvc0_builtin_code[] =
{
// DIV U32: slow unsigned integer division
@@ -57,11 +58,11 @@ static const uint32_t nvc0_builtin_code[] =
//
#if 1
0x04009c03, 0x78000000,
- 0x7c209cdd,
- 0x0010dd18,
+ 0x7c209c82, 0x38000000, // 0x7c209cdd,
+ 0x0400dde2, 0x18000000, // 0x0010dd18,
0x08309c03, 0x60000000,
- 0x05605c18,
- 0x0810dc2a,
+ 0x05205d04, 0x1c000000, // 0x05605c18,
+ 0x0810dc03, 0x50000000, // 0x0810dc2a,
0x0c209c43, 0x20040000,
0x0810dc03, 0x50000000,
0x0c209c43, 0x20040000,
@@ -73,15 +74,15 @@ static const uint32_t nvc0_builtin_code[] =
0x0c209c43, 0x20040000,
0x0000dde4, 0x28000000,
0x08001c43, 0x50000000,
- 0x05609c18,
- 0x0010430d,
+ 0x05209d04, 0x1c000000, // 0x05609c18,
+ 0x00105c03, 0x20060000, // 0x0010430d,
0x0811dc03, 0x1b0e0000,
0x08104103, 0x48000000,
0x04000002, 0x08000000,
0x0811c003, 0x1b0e0000,
0x08104103, 0x48000000,
- 0x040000ac,
- 0x90001dff,
+ 0x04000002, 0x08000000, // 0x040000ac,
+ 0x00001de7, 0x90000000, // 0x90001dff,
#else
0x0401dc03, 0x1b0e0000,
0x00008003, 0x78000000,
@@ -111,27 +112,27 @@ static const uint32_t nvc0_builtin_code[] =
//
0xfc05dc23, 0x188e0000,
0xfc17dc23, 0x18c40000,
- 0x03301e18,
- 0x07305e18,
+ 0x01201ec4, 0x1c000000, // 0x03301e18,
+ 0x05205ec4, 0x1c000000, // 0x07305e18,
0x0401dc03, 0x1b0e0000,
0x00008003, 0x78000000,
0x0400c003, 0x78000000,
0x0c20c103, 0x48000000,
0x0c108003, 0x60000000,
- 0x00005c28,
- 0x00001d18,
+ 0x00005de4, 0x28000000, // 0x00005c28,
+ 0x00001de2, 0x18000000, // 0x00001d18,
0x0031c023, 0x1b0ec000,
- 0xb000a1e7, 0x40000000,
+ 0xe000a1e7, 0x40000000, // 0xb000a1e7, 0x40000000,
0x04000003, 0x6000c000,
0x0813dc03, 0x1b000000,
- 0x0420446c,
- 0x040004bd,
+ 0x04204603, 0x48000000, // 0x0420446c,
+ 0x04000442, 0x38000000, // 0x040004bd,
0x04208003, 0x5800c000,
0x0430c103, 0x4800c000,
- 0x0ffc5dff,
- 0x01700e18,
- 0x05704a18,
- 0x90001dff,
+ 0xe0001de7, 0x4003fffe, // 0x0ffc5dff,
+ 0x01200f84, 0x1c000000, // 0x01700e18,
+ 0x05204b84, 0x1c000000, // 0x05704a18,
+ 0x00001de7, 0x90000000, // 0x90001dff,
// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i)
//
@@ -180,9 +181,9 @@ static const uint32_t nvc0_builtin_code[] =
static const uint16_t nvc0_builtin_offsets[NVC0_BUILTIN_COUNT] =
{
0,
- 8 * (22),
- 8 * (22 + 18),
- 8 * (22 + 18 + 9)
+ 8 * (26),
+ 8 * (26 + 23),
+ 8 * (26 + 23 + 9)
};
void
@@ -270,7 +271,7 @@ void TargetNVC0::initOpInfo()
OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT,
OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET,
OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART,
- OP_QUADON, OP_QUADPOP
+ OP_QUADON, OP_QUADPOP, OP_TEXBAR
};
joinAnterior = false;
@@ -445,6 +446,8 @@ TargetNVC0::isAccessSupported(DataFile file, DataType ty) const
{
if (ty == TYPE_NONE)
return false;
+ if (file == FILE_MEMORY_CONST && getChipset() >= 0xe0) // wrong encoding ?
+ return typeSizeof(ty) <= 4;
if (ty == TYPE_B96)
return (file == FILE_SHADER_INPUT) || (file == FILE_SHADER_OUTPUT);
return true;
diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
index 71fa151..1cf1f96 100644
--- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h
+++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
@@ -94,6 +94,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_MEM_BARRIER_UNK8 0x00000100
#define NVC0_3D_MEM_BARRIER_UNK12 0x00001000
+#define NVC0_3D_CACHE_SPLIT 0x00000308
+#define NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1 0x00000001
+#define NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1 0x00000002
+#define NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1 0x00000003
+
#define NVC0_3D_TESS_MODE 0x00000320
#define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f
#define NVC0_3D_TESS_MODE_PRIM__SHIFT 0
@@ -289,6 +294,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000
#define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT 16
+#define NVC0_3D_CALL_LIMIT_LOG 0x00000d64
+
#define NVC0_3D_COUNTER_ENABLE 0x00000d68
#define NVC0_3D_COUNTER_ENABLE_UNK00 0x00000001
#define NVC0_3D_COUNTER_ENABLE_UNK01 0x00000002
@@ -727,6 +734,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_POINT_SIZE 0x00001518
+#define NVC0_3D_ZCULL_STATCTRS_ENABLE 0x0000151c
+
#define NVC0_3D_POINT_SPRITE_ENABLE 0x00001520
#define NVC0_3D_COUNTER_RESET 0x00001530
@@ -1303,6 +1312,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600
+#define NVE4_3D_TEX_CB_INDEX 0x00002608
+#define NVE4_3D_TEX_CB_INDEX__MIN 0x00000000
+#define NVE4_3D_TEX_CB_INDEX__MAX 0x00000010
+
#define NVC0_3D_TFB_VARYING_LOCS(i0, i1) (0x00002800 + 0x80*(i0) + 0x4*(i1))
#define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004
#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020
diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
index 461ceb1..8abac09 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nvc0/nvc0_context.c
@@ -133,10 +133,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
goto out_err;
nvc0->screen = screen;
- nvc0->base.screen = &screen->base;
- nvc0->base.copy_data = nvc0_m2mf_copy_linear;
- nvc0->base.push_data = nvc0_m2mf_push_linear;
- nvc0->base.push_cb = nvc0_cb_push;
+ nvc0->base.screen = &screen->base;
pipe->screen = pscreen;
pipe->priv = priv;
@@ -158,6 +155,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
nvc0_init_query_functions(nvc0);
nvc0_init_surface_functions(nvc0);
nvc0_init_state_functions(nvc0);
+ nvc0_init_transfer_functions(nvc0);
nvc0_init_resource_functions(pipe);
nvc0->draw = draw_create(pipe);
@@ -174,7 +172,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text);
- BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniforms);
+ BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniform_bo);
BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->txc);
flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h
index 7072b59..140ce1a 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nvc0/nvc0_context.h
@@ -27,7 +27,9 @@
#include "nvc0_3d.xml.h"
#include "nvc0_2d.xml.h"
#include "nvc0_m2mf.xml.h"
+#include "nve4_p2mf.xml.h"
+/* NOTE: must keep NVC0_NEW_...PROG in consecutive bits in this order */
#define NVC0_NEW_BLEND (1 << 0)
#define NVC0_NEW_RASTERIZER (1 << 1)
#define NVC0_NEW_ZSA (1 << 2)
@@ -75,6 +77,11 @@ struct nvc0_context {
struct nvc0_screen *screen;
+ void (*m2mf_copy_rect)(struct nvc0_context *,
+ const struct nv50_m2mf_rect *dst,
+ const struct nv50_m2mf_rect *src,
+ uint32_t nblocksx, uint32_t nblocksy);
+
uint32_t dirty;
struct {
@@ -130,6 +137,8 @@ struct nvc0_context {
unsigned num_samplers[5];
uint16_t samplers_dirty[5];
+ uint32_t tex_handles[5][PIPE_MAX_SAMPLERS]; /* for nve4 */
+
struct pipe_framebuffer_state framebuffer;
struct pipe_blend_color blend_colour;
struct pipe_stencil_ref stencil_ref;
@@ -165,7 +174,7 @@ void nvc0_default_kick_notify(struct nouveau_pushbuf *);
extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
/* nvc0_program.c */
-boolean nvc0_program_translate(struct nvc0_program *);
+boolean nvc0_program_translate(struct nvc0_program *, uint16_t chipset);
boolean nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *);
void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
void nvc0_program_library_upload(struct nvc0_context *);
@@ -206,6 +215,7 @@ extern void nvc0_init_surface_functions(struct nvc0_context *);
/* nvc0_tex.c */
void nvc0_validate_textures(struct nvc0_context *);
void nvc0_validate_samplers(struct nvc0_context *);
+void nve4_set_tex_handles(struct nvc0_context *);
struct pipe_sampler_view *
nvc0_create_sampler_view(struct pipe_context *,
@@ -214,19 +224,16 @@ nvc0_create_sampler_view(struct pipe_context *,
/* nvc0_transfer.c */
void
-nvc0_m2mf_transfer_rect(struct nvc0_context *,
- const struct nv50_m2mf_rect *dst,
- const struct nv50_m2mf_rect *src,
- uint32_t nblocksx, uint32_t nblocksy);
+nvc0_init_transfer_functions(struct nvc0_context *);
+
void
nvc0_m2mf_push_linear(struct nouveau_context *nv,
struct nouveau_bo *dst, unsigned offset, unsigned domain,
unsigned size, const void *data);
void
-nvc0_m2mf_copy_linear(struct nouveau_context *nv,
- struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
- struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
- unsigned size);
+nve4_p2mf_push_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned offset, unsigned domain,
+ unsigned size, const void *data);
void
nvc0_cb_push(struct nouveau_context *,
struct nouveau_bo *bo, unsigned domain,
diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c
index 50a853a..f228d07 100644
--- a/src/gallium/drivers/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nvc0/nvc0_program.c
@@ -152,7 +152,7 @@ nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info)
static int
nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info)
{
- unsigned last = info->prop.fp.numColourResults * 4;
+ unsigned count = info->prop.fp.numColourResults * 4;
unsigned i, c;
for (i = 0; i < info->numOutputs; ++i)
@@ -161,10 +161,13 @@ nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info)
info->out[i].slot[c] = info->out[i].si * 4 + c;
if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
- info->out[info->io.sampleMask].slot[0] = last++;
+ info->out[info->io.sampleMask].slot[0] = count++;
+ else
+ if (info->target >= 0xe0)
+ count++; /* on Kepler, depth is always last colour reg + 2 */
if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
- info->out[info->io.fragDepth].slot[2] = last;
+ info->out[info->io.fragDepth].slot[2] = count;
return 0;
}
@@ -278,7 +281,7 @@ nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
vp->vp.clip_mode |= 1 << (i * 4);
if (info->io.genUserClip < 0)
- vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES; /* prevent rebuilding */
+ vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */
return 0;
}
@@ -434,6 +437,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
{
unsigned i, c, a, m;
+ /* just 00062 on Kepler */
fp->hdr[0] = 0x20062 | (5 << 10);
fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
@@ -538,7 +542,7 @@ nvc0_program_dump(struct nvc0_program *prog)
#endif
boolean
-nvc0_program_translate(struct nvc0_program *prog)
+nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
{
struct nv50_ir_prog_info *info;
int ret;
@@ -548,11 +552,13 @@ nvc0_program_translate(struct nvc0_program *prog)
return FALSE;
info->type = prog->type;
- info->target = 0xc0;
+ info->target = chipset;
info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
info->bin.source = (void *)prog->pipe.tokens;
info->io.genUserClip = prog->vp.num_ucps;
+ info->io.ucpBase = 256;
+ info->io.ucpBinding = 15;
info->assignSlots = nvc0_program_assign_varying_slots;
@@ -655,7 +661,13 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
size = align(size, 0x40);
size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */
}
- size = align(size, 0x40); /* required by SP_START_ID */
+ /* On Fermi, SP_START_ID must be aligned to 0x40.
+ * On Kepler, the first instruction must be aligned to 0x80 because
+ * latency information is expected only at certain positions.
+ */
+ if (screen->base.class_3d >= NVE4_3D_CLASS)
+ size = size + 0x70;
+ size = align(size, 0x40);
ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem);
if (ret) {
@@ -667,6 +679,17 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <=
prog->mem->start + prog->mem->size));
+ if (screen->base.class_3d >= NVE4_3D_CLASS) {
+ switch (prog->mem->start & 0xff) {
+ case 0x40: prog->code_base += 0x70; break;
+ case 0x80: prog->code_base += 0x30; break;
+ case 0xc0: prog->code_base += 0x70; break;
+ default:
+ prog->code_base += 0x30;
+ assert((prog->mem->start & 0xff) == 0x00);
+ break;
+ }
+ }
code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE;
if (prog->relocs)
@@ -677,18 +700,18 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
nvc0_program_dump(prog);
#endif
- nvc0_m2mf_push_linear(&nvc0->base, screen->text, prog->code_base,
- NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
- nvc0_m2mf_push_linear(&nvc0->base, screen->text,
- prog->code_base + NVC0_SHADER_HEADER_SIZE,
- NOUVEAU_BO_VRAM, prog->code_size, prog->code);
+ nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
+ NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
+ nvc0->base.push_data(&nvc0->base, screen->text,
+ prog->code_base + NVC0_SHADER_HEADER_SIZE,
+ NOUVEAU_BO_VRAM, prog->code_size, prog->code);
if (prog->immd_size)
- nvc0_m2mf_push_linear(&nvc0->base,
- screen->text, prog->immd_base, NOUVEAU_BO_VRAM,
- prog->immd_size, prog->immd_data);
+ nvc0->base.push_data(&nvc0->base,
+ screen->text, prog->immd_base, NOUVEAU_BO_VRAM,
+ prog->immd_size, prog->immd_data);
BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1);
- PUSH_DATA (nvc0->base.pushbuf, 0x1111);
+ PUSH_DATA (nvc0->base.pushbuf, 0x1011);
return TRUE;
}
@@ -714,9 +737,9 @@ nvc0_program_library_upload(struct nvc0_context *nvc0)
if (ret)
return;
- nvc0_m2mf_push_linear(&nvc0->base,
- screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM,
- size, code);
+ nvc0->base.push_data(&nvc0->base,
+ screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM,
+ size, code);
/* no need for a memory barrier, will be emitted with first program */
}
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
index bad06c3..eb8a9c5 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -30,7 +30,6 @@
#include "nvc0_context.h"
#include "nvc0_screen.h"
-#include "nouveau/nv_object.xml.h"
#include "nvc0_graph_macros.h"
static boolean
@@ -67,6 +66,8 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
static int
nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
{
+ const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
+
switch (param) {
case PIPE_CAP_MAX_COMBINED_SAMPLERS:
return 16 * PIPE_SHADER_TYPES; /* NOTE: should not count COMPUTE */
@@ -89,7 +90,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
- return 0;
+ return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
case PIPE_CAP_TWO_SIDED_STENCIL:
case PIPE_CAP_DEPTH_CLIP_DISABLE:
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
@@ -247,10 +248,11 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
FREE(screen->blitctx);
nouveau_bo_ref(NULL, &screen->text);
+ nouveau_bo_ref(NULL, &screen->uniform_bo);
nouveau_bo_ref(NULL, &screen->tls);
nouveau_bo_ref(NULL, &screen->txc);
nouveau_bo_ref(NULL, &screen->fence.bo);
- nouveau_bo_ref(NULL, &screen->vfetch_cache);
+ nouveau_bo_ref(NULL, &screen->poly_cache);
nouveau_heap_destroy(&screen->lib_code);
nouveau_heap_destroy(&screen->text_heap);
@@ -260,7 +262,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
nouveau_mm_destroy(screen->mm_VRAM_fe0);
- nouveau_object_del(&screen->fermi);
+ nouveau_object_del(&screen->eng3d);
nouveau_object_del(&screen->eng2d);
nouveau_object_del(&screen->m2mf);
@@ -288,16 +290,16 @@ nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
}
static void
-nvc0_magic_3d_init(struct nouveau_pushbuf *push)
+nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
{
BEGIN_NVC0(push, SUBC_3D(0x10cc), 1);
PUSH_DATA (push, 0xff);
BEGIN_NVC0(push, SUBC_3D(0x10e0), 2);
- PUSH_DATA(push, 0xff);
- PUSH_DATA(push, 0xff);
+ PUSH_DATA (push, 0xff);
+ PUSH_DATA (push, 0xff);
BEGIN_NVC0(push, SUBC_3D(0x10ec), 2);
- PUSH_DATA(push, 0xff);
- PUSH_DATA(push, 0xff);
+ PUSH_DATA (push, 0xff);
+ PUSH_DATA (push, 0xff);
BEGIN_NVC0(push, SUBC_3D(0x074c), 1);
PUSH_DATA (push, 0x3f);
@@ -308,11 +310,6 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push)
BEGIN_NVC0(push, SUBC_3D(0x0de8), 1);
PUSH_DATA (push, 1);
-#if 0 /* software method */
- BEGIN_NVC0(push, SUBC_3D(0x1528), 1); /* MP poke */
- PUSH_DATA (push, 0);
-#endif
-
BEGIN_NVC0(push, SUBC_3D(0x12ac), 1);
PUSH_DATA (push, 0);
BEGIN_NVC0(push, SUBC_3D(0x0218), 1);
@@ -324,8 +321,6 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push)
BEGIN_NVC0(push, SUBC_3D(0x12d8), 2);
PUSH_DATA (push, 0x10);
PUSH_DATA (push, 0x10);
- BEGIN_NVC0(push, SUBC_3D(0x06d4), 1);
- PUSH_DATA (push, 8);
BEGIN_NVC0(push, SUBC_3D(0x1140), 1);
PUSH_DATA (push, 0x10);
BEGIN_NVC0(push, SUBC_3D(0x1610), 1);
@@ -333,24 +328,27 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push)
BEGIN_NVC0(push, SUBC_3D(0x164c), 1);
PUSH_DATA (push, 1 << 12);
- BEGIN_NVC0(push, SUBC_3D(0x151c), 1);
- PUSH_DATA (push, 1);
BEGIN_NVC0(push, SUBC_3D(0x030c), 1);
PUSH_DATA (push, 0);
BEGIN_NVC0(push, SUBC_3D(0x0300), 1);
PUSH_DATA (push, 3);
-#if 0 /* software method */
- BEGIN_NVC0(push, SUBC_3D(0x1280), 1); /* PGRAPH poke */
- PUSH_DATA (push, 0);
-#endif
+
BEGIN_NVC0(push, SUBC_3D(0x02d0), 1);
- PUSH_DATA (push, 0x1f40);
+ PUSH_DATA (push, 0x3fffff);
BEGIN_NVC0(push, SUBC_3D(0x0fdc), 1);
PUSH_DATA (push, 1);
BEGIN_NVC0(push, SUBC_3D(0x19c0), 1);
PUSH_DATA (push, 1);
BEGIN_NVC0(push, SUBC_3D(0x075c), 1);
PUSH_DATA (push, 3);
+
+ if (obj_class >= NVE4_3D_CLASS) {
+ BEGIN_NVC0(push, SUBC_3D(0x07fc), 1);
+ PUSH_DATA (push, 1);
+ }
+
+ /* TODO: find out what software methods 0x1528, 0x1280 and (on nve4) 0x02dc
+ * are supposed to do */
}
static void
@@ -391,10 +389,20 @@ nvc0_screen_create(struct nouveau_device *dev)
struct pipe_screen *pscreen;
struct nouveau_object *chan;
struct nouveau_pushbuf *push;
+ uint32_t obj_class;
int ret;
unsigned i;
union nouveau_bo_config mm_config;
+ switch (dev->chipset & ~0xf) {
+ case 0xc0:
+ case 0xd0:
+ case 0xe0:
+ break;
+ default:
+ return NULL;
+ }
+
screen = CALLOC_STRUCT(nvc0_screen);
if (!screen)
return NULL;
@@ -431,17 +439,25 @@ nvc0_screen_create(struct nouveau_device *dev)
screen->base.fence.emit = nvc0_screen_fence_emit;
screen->base.fence.update = nvc0_screen_fence_update;
- ret = nouveau_object_new(chan, 0xbeef9039, NVC0_M2MF_CLASS, NULL, 0,
+ switch (dev->chipset & 0xf0) {
+ case 0xe0:
+ obj_class = NVE4_P2MF_CLASS;
+ break;
+ default:
+ obj_class = NVC0_M2MF_CLASS;
+ break;
+ }
+ ret = nouveau_object_new(chan, 0xbeef323f, obj_class, NULL, 0,
&screen->m2mf);
if (ret)
FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret);
BEGIN_NVC0(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push, screen->m2mf->oclass);
- BEGIN_NVC0(push, NVC0_M2MF(NOTIFY_ADDRESS_HIGH), 3);
- PUSH_DATAh(push, screen->fence.bo->offset + 16);
- PUSH_DATA (push, screen->fence.bo->offset + 16);
- PUSH_DATA (push, 0);
+ if (screen->m2mf->oclass == NVE4_P2MF_CLASS) {
+ BEGIN_NVC0(push, SUBC_COPY(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, 0xa0b5);
+ }
ret = nouveau_object_new(chan, 0xbeef902d, NVC0_2D_CLASS, NULL, 0,
&screen->eng2d);
@@ -461,17 +477,39 @@ nvc0_screen_create(struct nouveau_device *dev)
BEGIN_NVC0(push, SUBC_2D(0x0888), 1);
PUSH_DATA (push, 1);
- ret = nouveau_object_new(chan, 0xbeef9097, NVC0_3D_CLASS, NULL, 0,
- &screen->fermi);
+ BEGIN_NVC0(push, SUBC_2D(NVC0_GRAPH_NOTIFY_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->fence.bo->offset + 16);
+ PUSH_DATA (push, screen->fence.bo->offset + 16);
+
+ switch (dev->chipset & 0xf0) {
+ case 0xe0:
+ obj_class = NVE4_3D_CLASS;
+ break;
+ case 0xd0:
+ case 0xc0:
+ default:
+ switch (dev->chipset) {
+ case 0xd9:
+ case 0xc8:
+ obj_class = NVC8_3D_CLASS;
+ break;
+ case 0xc1:
+ obj_class = NVC1_3D_CLASS;
+ break;
+ default:
+ obj_class = NVC0_3D_CLASS;
+ break;
+ }
+ break;
+ }
+ ret = nouveau_object_new(chan, 0xbeef003d, obj_class, NULL, 0,
+ &screen->eng3d);
if (ret)
FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret);
+ screen->base.class_3d = obj_class;
BEGIN_NVC0(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1);
- PUSH_DATA (push, screen->fermi->oclass);
- BEGIN_NVC0(push, NVC0_3D(NOTIFY_ADDRESS_HIGH), 3);
- PUSH_DATAh(push, screen->fence.bo->offset + 32);
- PUSH_DATA (push, screen->fence.bo->offset + 32);
- PUSH_DATA (push, 0);
+ PUSH_DATA (push, screen->eng3d->oclass);
BEGIN_NVC0(push, NVC0_3D(COND_MODE), 1);
PUSH_DATA (push, NVC0_3D_COND_MODE_ALWAYS);
@@ -501,10 +539,23 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, 1);
BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1);
PUSH_DATA (push, 0);
- BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1);
- PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
+ if (screen->eng3d->oclass < NVE4_3D_CLASS) {
+ BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1);
+ PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
+ } else {
+ BEGIN_NVC0(push, NVE4_3D(TEX_CB_INDEX), 1);
+ PUSH_DATA (push, 15);
+ }
+ BEGIN_NVC0(push, NVC0_3D(CALL_LIMIT_LOG), 1);
+ PUSH_DATA (push, 8); /* 128 */
+ BEGIN_NVC0(push, NVC0_3D(ZCULL_STATCTRS_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ if (screen->eng3d->oclass >= NVC1_3D_CLASS) {
+ BEGIN_NVC0(push, NVC0_3D(CACHE_SPLIT), 1);
+ PUSH_DATA (push, NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1);
+ }
- nvc0_magic_3d_init(push);
+ nvc0_magic_3d_init(push, screen->eng3d->oclass);
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL,
&screen->text);
@@ -517,21 +568,41 @@ nvc0_screen_create(struct nouveau_device *dev)
nouveau_heap_init(&screen->text_heap, 0, (1 << 20) - 0x100);
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16, NULL,
- &screen->uniforms);
+ &screen->uniform_bo);
if (ret)
goto fail;
- /* auxiliary constants (6 user clip planes, base instance id) */
- BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, 256);
- PUSH_DATAh(push, screen->uniforms->offset + (5 << 16));
- PUSH_DATA (push, screen->uniforms->offset + (5 << 16));
for (i = 0; i < 5; ++i) {
+ /* TIC and TSC entries for each unit (nve4+ only) */
+ /* auxiliary constants (6 user clip planes, base instance id */
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 512);
+ PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 9));
+ PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 9));
BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
PUSH_DATA (push, (15 << 4) | 1);
+ if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
+ unsigned j;
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 9);
+ PUSH_DATA (push, 0);
+ for (j = 0; j < 8; ++j)
+ PUSH_DATA(push, j);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(i)), 1);
+ PUSH_DATA (push, 0x54);
+ }
}
+ BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1);
+ PUSH_DATA (push, 0);
+
+ /* max MPs * max warps per MP (TODO: ask kernel) */
+ if (screen->eng3d->oclass >= NVE4_3D_CLASS)
+ screen->tls_size = 8 * 64;
+ else
+ screen->tls_size = 16 * 48;
+ screen->tls_size *= NVC0_CAP_MAX_PROGRAM_TEMPS * 16;
+ screen->tls_size = align(screen->tls_size, 1 << 17);
- screen->tls_size = (16 * 32) * (NVC0_CAP_MAX_PROGRAM_TEMPS * 16);
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17,
screen->tls_size, NULL, &screen->tls);
if (ret)
@@ -550,21 +621,14 @@ nvc0_screen_create(struct nouveau_device *dev)
BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1);
PUSH_DATA (push, 0);
- for (i = 0; i < 5; ++i) {
- BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(i)), 1);
- PUSH_DATA (push, 0x54);
- }
- BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1);
- PUSH_DATA (push, 0);
-
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL,
- &screen->vfetch_cache);
+ &screen->poly_cache);
if (ret)
goto fail;
BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3);
- PUSH_DATAh(push, screen->vfetch_cache->offset);
- PUSH_DATA (push, screen->vfetch_cache->offset);
+ PUSH_DATAh(push, screen->poly_cache->offset);
+ PUSH_DATA (push, screen->poly_cache->offset);
PUSH_DATA (push, 3);
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, NULL,
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h
index e0f5e5e..8bcc147 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nvc0/nvc0_screen.h
@@ -6,6 +6,8 @@
#include "nouveau/nouveau_fence.h"
#include "nouveau/nouveau_heap.h"
+#include "nouveau/nv_object.xml.h"
+
#include "nvc0_winsys.h"
#include "nvc0_stateobj.h"
@@ -24,10 +26,10 @@ struct nvc0_screen {
int num_occlusion_queries_active;
struct nouveau_bo *text;
- struct nouveau_bo *uniforms;
+ struct nouveau_bo *uniform_bo;
struct nouveau_bo *tls;
struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
- struct nouveau_bo *vfetch_cache;
+ struct nouveau_bo *poly_cache;
uint64_t tls_size;
@@ -55,7 +57,7 @@ struct nvc0_screen {
struct nouveau_mman *mm_VRAM_fe0;
- struct nouveau_object *fermi;
+ struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
struct nouveau_object *eng2d;
struct nouveau_object *m2mf;
struct nouveau_object *dijkstra;
diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c
index 54dfd8d..786889f 100644
--- a/src/gallium/drivers/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c
@@ -70,7 +70,8 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
return TRUE;
if (!prog->translated) {
- prog->translated = nvc0_program_translate(prog);
+ prog->translated = nvc0_program_translate(
+ prog, nvc0->screen->base.device->chipset);
if (!prog->translated)
return FALSE;
}
diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c
index 3533a5e..5d34f2b 100644
--- a/src/gallium/drivers/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c
@@ -250,17 +250,17 @@ nvc0_validate_viewport(struct nvc0_context *nvc0)
}
static INLINE void
-nvc0_upload_uclip_planes(struct nvc0_context *nvc0)
+nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- struct nouveau_bo *bo = nvc0->screen->uniforms;
+ struct nouveau_bo *bo = nvc0->screen->uniform_bo;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, 256);
- PUSH_DATAh(push, bo->offset + (5 << 16));
- PUSH_DATA (push, bo->offset + (5 << 16));
+ PUSH_DATA (push, 512);
+ PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 9));
+ PUSH_DATA (push, bo->offset + (5 << 16) + (s << 9));
BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
- PUSH_DATA (push, 0);
+ PUSH_DATA (push, 256);
PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
}
@@ -289,21 +289,28 @@ nvc0_validate_clip(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_program *vp;
+ unsigned stage;
uint8_t clip_enable = nvc0->rast->pipe.clip_plane_enable;
- if (nvc0->dirty & NVC0_NEW_CLIP)
- nvc0_upload_uclip_planes(nvc0);
-
- vp = nvc0->gmtyprog;
- if (!vp) {
+ if (nvc0->gmtyprog) {
+ stage = 3;
+ vp = nvc0->gmtyprog;
+ } else
+ if (nvc0->tevlprog) {
+ stage = 2;
vp = nvc0->tevlprog;
- if (!vp)
- vp = nvc0->vertprog;
+ } else {
+ stage = 0;
+ vp = nvc0->vertprog;
}
if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES)
nvc0_check_program_ucps(nvc0, vp, clip_enable);
+ if (nvc0->dirty & (NVC0_NEW_CLIP | (NVC0_NEW_VERTPROG << stage)))
+ if (vp->vp.num_ucps <= PIPE_MAX_CLIP_PLANES)
+ nvc0_upload_uclip_planes(nvc0, stage);
+
clip_enable &= vp->vp.clip_enable;
if (nvc0->state.clip_enable != clip_enable) {
@@ -375,7 +382,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
if (!nouveau_resource_mapped_by_gpu(&res->base)) {
if (i == 0 && (res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY)) {
base = s << 16;
- bo = nvc0->screen->uniforms;
+ bo = nvc0->screen->uniform_bo;
if (nvc0->state.uniform_buffer_bound[s] >= res->base.width0)
rebind = FALSE;
@@ -396,7 +403,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
nvc0->state.uniform_buffer_bound[s] = 0;
}
- if (bo != nvc0->screen->uniforms)
+ if (bo != nvc0->screen->uniform_bo)
BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD);
if (rebind) {
@@ -517,6 +524,7 @@ static struct state_validate {
{ nvc0_constbufs_validate, NVC0_NEW_CONSTBUF },
{ nvc0_validate_textures, NVC0_NEW_TEXTURES },
{ nvc0_validate_samplers, NVC0_NEW_SAMPLERS },
+ { nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS },
{ nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
{ nvc0_idxbuf_validate, NVC0_NEW_IDXBUF },
{ nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG }
diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c
index 3378b51..2b47c04 100644
--- a/src/gallium/drivers/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nvc0/nvc0_surface.c
@@ -233,7 +233,7 @@ nvc0_resource_copy_region(struct pipe_context *pipe,
src_box->x, src_box->y, src_box->z);
for (i = 0; i < src_box->depth; ++i) {
- nvc0_m2mf_transfer_rect(nvc0, &drect, &srect, nx, ny);
+ nvc0->m2mf_copy_rect(nvc0, &drect, &srect, nx, ny);
if (nv50_miptree(dst)->layout_3d)
drect.z++;
diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c
index f6c4ab3..8dd7185 100644
--- a/src/gallium/drivers/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nvc0/nvc0_tex.c
@@ -26,6 +26,9 @@
#include "util/u_format.h"
+#define NVE4_TIC_ENTRY_INVALID 0x000fffff
+#define NVE4_TSC_ENTRY_INVALID 0xfff00000
+
#define NV50_TIC_0_SWIZZLE__MASK \
(NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \
NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK)
@@ -271,13 +274,76 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
return need_flush;
}
+static boolean
+nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
+{
+ struct nouveau_bo *txc = nvc0->screen->txc;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned i;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nvc0->num_textures[s]; ++i) {
+ struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
+ struct nv04_resource *res;
+ const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
+
+ if (!tic) {
+ nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
+ continue;
+ }
+ res = nv04_resource(tic->pipe.texture);
+
+ if (tic->id < 0) {
+ tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
+
+ PUSH_SPACE(push, 16);
+ BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, txc->offset + (tic->id * 32));
+ PUSH_DATA (push, txc->offset + (tic->id * 32));
+ BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 32);
+ PUSH_DATA (push, 1);
+ BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9);
+ PUSH_DATA (push, 0x1001);
+ PUSH_DATAp(push, &tic->tic[0], 8);
+
+ need_flush = TRUE;
+ } else
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
+ PUSH_DATA (push, (tic->id << 4) | 1);
+ }
+ nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+
+ res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
+ nvc0->tex_handles[s][i] |= tic->id;
+ if (dirty)
+ BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD);
+ }
+ for (; i < nvc0->state.num_textures[s]; ++i)
+ nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
+
+ nvc0->state.num_textures[s] = nvc0->num_textures[s];
+
+ return need_flush;
+}
+
void nvc0_validate_textures(struct nvc0_context *nvc0)
{
boolean need_flush;
- need_flush = nvc0_validate_tic(nvc0, 0);
- need_flush |= nvc0_validate_tic(nvc0, 3);
- need_flush |= nvc0_validate_tic(nvc0, 4);
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
+ need_flush = nve4_validate_tic(nvc0, 0);
+ need_flush |= nve4_validate_tic(nvc0, 3);
+ need_flush |= nve4_validate_tic(nvc0, 4);
+ } else {
+ need_flush = nvc0_validate_tic(nvc0, 0);
+ need_flush |= nvc0_validate_tic(nvc0, 3);
+ need_flush |= nvc0_validate_tic(nvc0, 4);
+ }
if (need_flush) {
BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1);
@@ -329,16 +395,103 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
return need_flush;
}
+static boolean
+nve4_validate_tsc(struct nvc0_context *nvc0, int s)
+{
+ struct nouveau_bo *txc = nvc0->screen->txc;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned i;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nvc0->num_samplers[s]; ++i) {
+ struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
+
+ if (!tsc) {
+ nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
+ continue;
+ }
+ if (tsc->id < 0) {
+ tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
+
+ PUSH_SPACE(push, 16);
+ BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, txc->offset + 65536 + (tsc->id * 32));
+ PUSH_DATA (push, txc->offset + 65536 + (tsc->id * 32));
+ BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 32);
+ PUSH_DATA (push, 1);
+ BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9);
+ PUSH_DATA (push, 0x1001);
+ PUSH_DATAp(push, &tsc->tsc[0], 8);
+
+ need_flush = TRUE;
+ }
+ nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
+
+ nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID;
+ nvc0->tex_handles[s][i] |= tsc->id << 20;
+ }
+ for (; i < nvc0->state.num_samplers[s]; ++i)
+ nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
+
+ nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
+
+ return need_flush;
+}
+
void nvc0_validate_samplers(struct nvc0_context *nvc0)
{
boolean need_flush;
- need_flush = nvc0_validate_tsc(nvc0, 0);
- need_flush |= nvc0_validate_tsc(nvc0, 3);
- need_flush |= nvc0_validate_tsc(nvc0, 4);
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
+ need_flush = nve4_validate_tsc(nvc0, 0);
+ need_flush |= nve4_validate_tsc(nvc0, 3);
+ need_flush |= nve4_validate_tsc(nvc0, 4);
+ } else {
+ need_flush = nvc0_validate_tsc(nvc0, 0);
+ need_flush |= nvc0_validate_tsc(nvc0, 3);
+ need_flush |= nvc0_validate_tsc(nvc0, 4);
+ }
if (need_flush) {
BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1);
PUSH_DATA (nvc0->base.pushbuf, 0);
}
}
+
+/* Upload the "diagonal" entries for the possible texture sources ($t == $s).
+ * At some point we might want to get a list of the combinations used by a
+ * shader and fill in those entries instead of having it extract the handles.
+ */
+void
+nve4_set_tex_handles(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ uint64_t address;
+ unsigned s;
+
+ if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
+ return;
+ address = nvc0->screen->uniform_bo->offset + (5 << 16);
+
+ for (s = 0; s < 5; ++s, address += (1 << 9)) {
+ uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
+ if (!dirty)
+ continue;
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 512);
+ PUSH_DATAh(push, address);
+ PUSH_DATA (push, address);
+ do {
+ int i = ffs(dirty) - 1;
+ dirty &= ~(1 << i);
+
+ BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
+ PUSH_DATA (push, (8 + i) * 4);
+ PUSH_DATA (push, nvc0->tex_handles[s][i]);
+ } while (dirty);
+
+ nvc0->textures_dirty[s] = 0;
+ nvc0->samplers_dirty[s] = 0;
+ }
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_transfer.c b/src/gallium/drivers/nvc0/nvc0_transfer.c
index 774793d..fb44190 100644
--- a/src/gallium/drivers/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nvc0/nvc0_transfer.c
@@ -13,7 +13,7 @@ struct nvc0_transfer {
uint16_t nlayers;
};
-void
+static void
nvc0_m2mf_transfer_rect(struct nvc0_context *nvc0,
const struct nv50_m2mf_rect *dst,
const struct nv50_m2mf_rect *src,
@@ -108,6 +108,71 @@ nvc0_m2mf_transfer_rect(struct nvc0_context *nvc0,
nouveau_bufctx_reset(bctx, 0);
}
+static void
+nve4_m2mf_transfer_rect(struct nvc0_context *nvc0,
+ const struct nv50_m2mf_rect *dst,
+ const struct nv50_m2mf_rect *src,
+ uint32_t nblocksx, uint32_t nblocksy)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nouveau_bufctx *bctx = nvc0->bufctx;
+ uint32_t exec;
+ uint32_t src_base = src->base;
+ uint32_t dst_base = dst->base;
+ const int cpp = dst->cpp;
+
+ assert(dst->cpp == src->cpp);
+
+ nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR);
+ nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD);
+ nouveau_pushbuf_bufctx(push, bctx);
+ nouveau_pushbuf_validate(push);
+
+ exec = 0x200 /* 2D_ENABLE */ | 0x6 /* UNK */;
+
+ if (!nouveau_bo_memtype(dst->bo)) {
+ assert(!dst->z);
+ dst_base += dst->y * dst->pitch + dst->x * cpp;
+ exec |= 0x100; /* DST_MODE_2D_LINEAR */
+ }
+ if (!nouveau_bo_memtype(src->bo)) {
+ assert(!src->z);
+ src_base += src->y * src->pitch + src->x * cpp;
+ exec |= 0x080; /* SRC_MODE_2D_LINEAR */
+ }
+
+ BEGIN_NVC0(push, SUBC_COPY(0x070c), 6);
+ PUSH_DATA (push, 0x1000 | dst->tile_mode);
+ PUSH_DATA (push, dst->pitch);
+ PUSH_DATA (push, dst->height);
+ PUSH_DATA (push, dst->depth);
+ PUSH_DATA (push, dst->z);
+ PUSH_DATA (push, (dst->y << 16) | (dst->x * cpp));
+
+ BEGIN_NVC0(push, SUBC_COPY(0x0728), 6);
+ PUSH_DATA (push, 0x1000 | src->tile_mode);
+ PUSH_DATA (push, src->pitch);
+ PUSH_DATA (push, src->height);
+ PUSH_DATA (push, src->depth);
+ PUSH_DATA (push, src->z);
+ PUSH_DATA (push, (src->y << 16) | (src->x * cpp));
+
+ BEGIN_NVC0(push, SUBC_COPY(0x0400), 8);
+ PUSH_DATAh(push, src->bo->offset + src_base);
+ PUSH_DATA (push, src->bo->offset + src_base);
+ PUSH_DATAh(push, dst->bo->offset + dst_base);
+ PUSH_DATA (push, dst->bo->offset + dst_base);
+ PUSH_DATA (push, src->pitch);
+ PUSH_DATA (push, dst->pitch);
+ PUSH_DATA (push, nblocksx * cpp);
+ PUSH_DATA (push, nblocksy);
+
+ BEGIN_NVC0(push, SUBC_COPY(0x0300), 1);
+ PUSH_DATA (push, exec);
+
+ nouveau_bufctx_reset(bctx, 0);
+}
+
void
nvc0_m2mf_push_linear(struct nouveau_context *nv,
struct nouveau_bo *dst, unsigned offset, unsigned domain,
@@ -154,6 +219,49 @@ nvc0_m2mf_push_linear(struct nouveau_context *nv,
}
void
+nve4_p2mf_push_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned offset, unsigned domain,
+ unsigned size, const void *data)
+{
+ struct nvc0_context *nvc0 = nvc0_context(&nv->pipe);
+ struct nouveau_pushbuf *push = nv->pushbuf;
+ uint32_t *src = (uint32_t *)data;
+ unsigned count = (size + 3) / 4;
+
+ nouveau_bufctx_refn(nvc0->bufctx, 0, dst, domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, nvc0->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ while (count) {
+ unsigned nr;
+
+ if (!PUSH_SPACE(push, 16))
+ break;
+ nr = PUSH_AVAIL(push);
+ assert(nr >= 16);
+ nr = MIN2(count, nr - 8);
+ nr = MIN2(nr, (NV04_PFIFO_MAX_PACKET_LEN - 1));
+
+ BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, dst->offset + offset);
+ PUSH_DATA (push, dst->offset + offset);
+ BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, nr * 4);
+ PUSH_DATA (push, 1);
+ /* must not be interrupted (trap on QUERY fence, 0x50 works however) */
+ BEGIN_1IC0(push, NVE4_P2MF(EXEC), nr + 1);
+ PUSH_DATA (push, 0x1001);
+ PUSH_DATAp(push, src, nr);
+
+ count -= nr;
+ src += nr;
+ offset += nr * 4;
+ }
+
+ nouveau_bufctx_reset(nvc0->bufctx, 0);
+}
+
+static void
nvc0_m2mf_copy_linear(struct nouveau_context *nv,
struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
@@ -191,6 +299,32 @@ nvc0_m2mf_copy_linear(struct nouveau_context *nv,
nouveau_bufctx_reset(bctx, 0);
}
+static void
+nve4_m2mf_copy_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
+ struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
+ unsigned size)
+{
+ struct nouveau_pushbuf *push = nv->pushbuf;
+ struct nouveau_bufctx *bctx = nvc0_context(&nv->pipe)->bufctx;
+
+ nouveau_bufctx_refn(bctx, 0, src, srcdom | NOUVEAU_BO_RD);
+ nouveau_bufctx_refn(bctx, 0, dst, dstdom | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, bctx);
+ nouveau_pushbuf_validate(push);
+
+ BEGIN_NVC0(push, SUBC_COPY(0x0400), 4);
+ PUSH_DATAh(push, src->offset + srcoff);
+ PUSH_DATA (push, src->offset + srcoff);
+ PUSH_DATAh(push, dst->offset + dstoff);
+ PUSH_DATA (push, dst->offset + dstoff);
+ BEGIN_NVC0(push, SUBC_COPY(0x0418), 1);
+ PUSH_DATA (push, size);
+ IMMED_NVC0(push, SUBC_COPY(0x0300), 0x6);
+
+ nouveau_bufctx_reset(bctx, 0);
+}
+
struct pipe_transfer *
nvc0_miptree_transfer_new(struct pipe_context *pctx,
struct pipe_resource *res,
@@ -253,8 +387,8 @@ nvc0_miptree_transfer_new(struct pipe_context *pctx,
unsigned z = tx->rect[0].z;
unsigned i;
for (i = 0; i < tx->nlayers; ++i) {
- nvc0_m2mf_transfer_rect(nvc0, &tx->rect[1], &tx->rect[0],
- tx->nblocksx, tx->nblocksy);
+ nvc0->m2mf_copy_rect(nvc0, &tx->rect[1], &tx->rect[0],
+ tx->nblocksx, tx->nblocksy);
if (mt->layout_3d)
tx->rect[0].z++;
else
@@ -280,8 +414,8 @@ nvc0_miptree_transfer_del(struct pipe_context *pctx,
if (tx->base.usage & PIPE_TRANSFER_WRITE) {
for (i = 0; i < tx->nlayers; ++i) {
- nvc0_m2mf_transfer_rect(nvc0, &tx->rect[0], &tx->rect[1],
- tx->nblocksx, tx->nblocksy);
+ nvc0->m2mf_copy_rect(nvc0, &tx->rect[0], &tx->rect[1],
+ tx->nblocksx, tx->nblocksy);
if (mt->layout_3d)
tx->rect[0].z++;
else
@@ -362,3 +496,18 @@ nvc0_cb_push(struct nouveau_context *nv,
nouveau_bufctx_reset(bctx, 0);
}
+
+void
+nvc0_init_transfer_functions(struct nvc0_context *nvc0)
+{
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
+ nvc0->m2mf_copy_rect = nve4_m2mf_transfer_rect;
+ nvc0->base.copy_data = nve4_m2mf_copy_linear;
+ nvc0->base.push_data = nve4_p2mf_push_linear;
+ } else {
+ nvc0->m2mf_copy_rect = nvc0_m2mf_transfer_rect;
+ nvc0->base.copy_data = nvc0_m2mf_copy_linear;
+ nvc0->base.push_data = nvc0_m2mf_push_linear;
+ }
+ nvc0->base.push_cb = nvc0_cb_push;
+}
diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h
index c3ab1c9..c13ebd5 100644
--- a/src/gallium/drivers/nvc0/nvc0_winsys.h
+++ b/src/gallium/drivers/nvc0/nvc0_winsys.h
@@ -46,17 +46,24 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
}
-#define SUBC_3D(m) 1, (m)
+#define SUBC_3D(m) 0, (m)
#define NVC0_3D(n) SUBC_3D(NVC0_3D_##n)
+#define NVE4_3D(n) SUBC_3D(NVE4_3D_##n)
-#define SUBC_2D(m) 2, (m)
-#define NVC0_2D(n) SUBC_2D(NVC0_2D_##n)
+#define SUBC_COMPUTE(m) 1, (m)
+#define NVC0_COMPUTE(n) SUBC_COMPUTE(NVC0_COMPUTE_##n)
+#define NVE4_COMPUTE(n) SUBC_COMPUTE(NVE4_COMPUTE_##n)
-#define SUBC_M2MF(m) 3, (m)
+#define SUBC_M2MF(m) 2, (m)
+#define SUBC_P2MF(m) 2, (m)
#define NVC0_M2MF(n) SUBC_M2MF(NVC0_M2MF_##n)
+#define NVE4_P2MF(n) SUBC_P2MF(NVE4_P2MF_##n)
-#define SUBC_COMPUTE(m) 4, (m)
-#define NVC0_COMPUTE(n) SUBC_COMPUTE(NVC0_COMPUTE_##n)
+#define SUBC_2D(m) 3, (m)
+#define NVC0_2D(n) SUBC_2D(NVC0_2D_##n)
+
+#define SUBC_COPY(m) 4, (m)
+#define NVE4_COPY(m) SUBC_COPY(NVE4_COPY_##n)
static INLINE uint32_t
NVC0_FIFO_PKHDR_SQ(int subc, int mthd, unsigned size)
diff --git a/src/gallium/drivers/nvc0/nve4_p2mf.xml.h b/src/gallium/drivers/nvc0/nve4_p2mf.xml.h
new file mode 100644
index 0000000..68a742f
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nve4_p2mf.xml.h
@@ -0,0 +1,107 @@
+#ifndef RNNDB_NVE4_P2MF_XML
+#define RNNDB_NVE4_P2MF_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- rnndb/nve4_p2mf.xml ( 1400 bytes, from 2012-04-14 21:29:11)
+- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nv_object.xml ( 12736 bytes, from 2012-04-14 21:30:24)
+- ./rnndb/nvchipsets.xml ( 3701 bytes, from 2012-03-22 20:40:59)
+- ./rnndb/nv_defs.xml ( 4437 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-08-11 18:25:12)
+
+Copyright (C) 2006-2012 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Koƛcielnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVE4_P2MF_LINE_LENGTH_IN 0x00000180
+
+#define NVE4_P2MF_LINE_COUNT 0x00000184
+
+#define NVE4_P2MF_DST_ADDRESS_HIGH 0x00000188
+
+#define NVE4_P2MF_DST_ADDRESS_LOW 0x0000018c
+
+#define NVE4_P2MF_DST_TILE_MODE 0x00000194
+
+#define NVE4_P2MF_DST_PITCH 0x00000198
+
+#define NVE4_P2MF_DST_HEIGHT 0x0000019c
+
+#define NVE4_P2MF_DST_DEPTH 0x000001a0
+
+#define NVE4_P2MF_DST_Z 0x000001a4
+
+#define NVE4_P2MF_DST_X 0x000001a8
+
+#define NVE4_P2MF_DST_Y 0x000001ac
+
+#define NVE4_P2MF_EXEC 0x000001b0
+#define NVE4_P2MF_EXEC_LINEAR 0x00000001
+#define NVE4_P2MF_EXEC_UNK12 0x00001000
+
+#define NVE4_P2MF_DATA 0x000001b4
+
+
+#endif /* RNNDB_NVE4_P2MF_XML */
diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
index 91d51c0..bf99014 100644
--- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
+++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
@@ -34,6 +34,7 @@ nouveau_drm_screen_create(int fd)
break;
case 0xc0:
case 0xd0:
+ case 0xe0:
init = nvc0_screen_create;
break;
default: