summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
diff options
context:
space:
mode:
authorIlia Mirkin <imirkin@alum.mit.edu>2017-08-16 00:34:43 -0400
committerIlia Mirkin <imirkin@alum.mit.edu>2017-12-19 23:09:19 -0500
commit0cf6320eb5eca1ea20906624ad5a46ca386e0aa6 (patch)
tree7d3e1af82e6069c4c9ac1ae9c195af94da415200 /src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
parent22ceb1f99b5ae62d23200f4fecea8b62ab745318 (diff)
nvc0/ir: change textureGrad to always use lane 0 as the tex origin
Thanks to Karol Herbst for the debugging / tracing work that led to this change. Move to using lane 0 as the "work" lane for the texture. It is unclear why this helps, as that computation should be identical to doing it in the "correct" lane with the properly adjusted quadops. In order to be able to use the lane 0 result, we also have to ensure that lane 0 contains the proper array/indirect/shadow values. This applies to Fermi and Kepler. Maxwell+ may or may not need fixing, but that lowering logic is separate. Fixes KHR-GL45.texture_cube_map_array.sampling Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp60
1 files changed, 46 insertions, 14 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 6b51b7607cb..51f6fae2c1f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1081,15 +1081,20 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
bool
NVC0LoweringPass::handleManualTXD(TexInstruction *i)
{
- static const uint8_t qOps[4][2] =
- {
- { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0
- { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1
- { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
- { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
- };
+ // Always done from the l0 perspective. This is the way that NVIDIA's
+ // driver does it, and doing it from the "current" lane's perpsective
+ // doesn't seem to always work for reasons that aren't altogether clear,
+ // even in frag shaders.
+ //
+ // Note that we must move not only the coordinates into lane0, but also all
+ // ancillary arguments, like array indices and depth compare as they may
+ // differ between lanes. Offsets for TXD are supposed to be uniform, so we
+ // leave them alone.
+ static const uint8_t qOps[2] =
+ { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) };
+
Value *def[4][4];
- Value *crd[3];
+ Value *crd[3], *arr[2], *shadow;
Instruction *tex;
Value *zero = bld.loadImm(bld.getSSA(), 0);
int l, c;
@@ -1100,7 +1105,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
// indirect are both in the leading arg, while for Kepler, array and
// indirect are separate (and both precede the coordinates). Maxwell is
// handled in a separate function.
- unsigned array;
+ int array;
if (targ->getChipset() < NVISA_GK104_CHIPSET)
array = i->tex.target.isArray() || i->tex.rIndirectSrc >= 0;
else
@@ -1110,19 +1115,34 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
for (c = 0; c < dim; ++c)
crd[c] = bld.getScratch();
+ for (c = 0; c < array; ++c)
+ arr[c] = bld.getScratch();
+ shadow = bld.getScratch();
- bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
for (l = 0; l < 4; ++l) {
Value *src[3], *val;
- // mov coordinates from lane l to all lanes
+
+ bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
+ // we're using the texture result from lane 0 in all cases, so make sure
+ // that lane 0 is pointing at the proper array index, indirect value,
+ // and depth compare.
+ if (l != 0) {
+ for (c = 0; c < array; ++c)
+ bld.mkQuadop(0x00, arr[c], l, i->getSrc(c), zero);
+ if (i->tex.target.isShadow()) {
+ // The next argument after coords is the depth compare
+ bld.mkQuadop(0x00, shadow, l, i->getSrc(array + dim), zero);
+ }
+ }
+ // mov position coordinates from lane l to all lanes
for (c = 0; c < dim; ++c)
bld.mkQuadop(0x00, crd[c], l, i->getSrc(c + array), zero);
// add dPdx from lane l to lanes dx
for (c = 0; c < dim; ++c)
- bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]);
+ bld.mkQuadop(qOps[0], crd[c], l, i->dPdx[c].get(), crd[c]);
// add dPdy from lane l to lanes dy
for (c = 0; c < dim; ++c)
- bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
+ bld.mkQuadop(qOps[1], crd[c], l, i->dPdy[c].get(), crd[c]);
// normalize cube coordinates
if (i->tex.target.isCube()) {
for (c = 0; c < 3; ++c)
@@ -1139,8 +1159,21 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
}
// texture
bld.insert(tex = cloneForward(func, i));
+ if (l != 0) {
+ for (c = 0; c < array; ++c)
+ tex->setSrc(c, arr[c]);
+ if (i->tex.target.isShadow())
+ tex->setSrc(array + dim, shadow);
+ }
for (c = 0; c < dim; ++c)
tex->setSrc(c + array, src[c]);
+ // broadcast results from lane 0 to all lanes so that the moves *into*
+ // the target lane pick up the proper value.
+ if (l != 0)
+ for (c = 0; i->defExists(c); ++c)
+ bld.mkQuadop(0x00, tex->getDef(c), 0, tex->getDef(c), zero);
+ bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
+
// save results
for (c = 0; i->defExists(c); ++c) {
Instruction *mov;
@@ -1150,7 +1183,6 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
mov->lanes = 1 << l;
}
}
- bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
for (c = 0; i->defExists(c); ++c) {
Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));