summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp208
1 files changed, 158 insertions, 50 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 1f702a987d8..a76d6c60cda 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1802,6 +1802,9 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless
{
uint32_t base = slot * NVC0_SU_INFO__STRIDE;
+ // We don't upload surface info for bindless for GM107+
+ assert(!bindless || targ->getChipset() < NVISA_GM107_CHIPSET);
+
if (ptr) {
ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(slot));
if (bindless)
@@ -2204,7 +2207,7 @@ getDestType(const ImgType type) {
}
void
-NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su)
+NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su, Instruction **loaded)
{
const TexInstruction::ImgFormatDesc *format = su->tex.format;
int width = format->bits[0] + format->bits[1] +
@@ -2223,21 +2226,38 @@ NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su)
if (width < 32)
untypedDst[0] = bld.getSSA();
- for (int i = 0; i < 4; i++) {
- typedDst[i] = su->getDef(i);
+ if (loaded && loaded[0]) {
+ for (int i = 0; i < 4; i++) {
+ if (loaded[i])
+ typedDst[i] = loaded[i]->getDef(0);
+ }
+ } else {
+ for (int i = 0; i < 4; i++) {
+ typedDst[i] = su->getDef(i);
+ }
}
// Set the untyped dsts as the su's destinations
- for (int i = 0; i < 4; i++)
- su->setDef(i, untypedDst[i]);
+ if (loaded && loaded[0]) {
+ for (int i = 0; i < 4; i++)
+ if (loaded[i])
+ loaded[i]->setDef(0, untypedDst[i]);
+ } else {
+ for (int i = 0; i < 4; i++)
+ su->setDef(i, untypedDst[i]);
- bld.setPosition(su, true);
+ bld.setPosition(su, true);
+ }
// Unpack each component into the typed dsts
int bits = 0;
for (int i = 0; i < 4; bits += format->bits[i], i++) {
if (!typedDst[i])
continue;
+
+ if (loaded && loaded[0])
+ bld.setPosition(loaded[i], true);
+
if (i >= format->components) {
if (format->type == FLOAT ||
format->type == UNORM ||
@@ -2308,7 +2328,7 @@ NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
processSurfaceCoordsNVE4(su);
if (su->op == OP_SULDP) {
- convertSurfaceFormat(su);
+ convertSurfaceFormat(su, NULL);
insertOOBSurfaceOpResult(su);
}
@@ -2421,7 +2441,7 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su)
processSurfaceCoordsNVC0(su);
if (su->op == OP_SULDP) {
- convertSurfaceFormat(su);
+ convertSurfaceFormat(su, NULL);
insertOOBSurfaceOpResult(su);
}
@@ -2463,14 +2483,16 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su)
}
}
-void
-NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
+TexInstruction *
+NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su, Instruction *ret[4])
{
const int slot = su->tex.r;
const int dim = su->tex.target.getDim();
- const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
+ const bool array = su->tex.target.isArray() || su->tex.target.isCube();
+ const int arg = dim + array;
Value *ind = su->getIndirectR();
Value *handle;
+ Instruction *pred = NULL, *pred2d = NULL;
int pos = 0;
bld.setPosition(su, false);
@@ -2489,67 +2511,153 @@ NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
assert(pos == 0);
break;
}
+
+ if (dim == 2 && !array) {
+ // This might be a 2d slice of a 3d texture, try to load the z
+ // coordinate in.
+ Value *v;
+ if (!su->tex.bindless)
+ v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless);
+ else
+ v = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), ind, bld.mkImm(11));
+ Value *is_3d = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), v, bld.mkImm(1));
+ pred2d = bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
+ TYPE_U32, bld.mkImm(0), is_3d);
+
+ bld.mkOp2(OP_SHR, TYPE_U32, v, v, bld.loadImm(NULL, 16));
+ su->moveSources(dim, 1);
+ su->setSrc(dim, v);
+ su->tex.target = nv50_ir::TEX_TARGET_3D;
+ pos++;
+ }
+
if (su->tex.bindless)
- handle = ind;
+ handle = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ind, bld.mkImm(2047));
else
handle = loadTexHandle(ind, slot + 32);
+
su->setSrc(arg + pos, handle);
// The address check doesn't make sense here. The format check could make
// sense but it's a bit of a pain.
- if (su->tex.bindless)
- return;
+ if (!su->tex.bindless) {
+ // prevent read fault when the image is not actually bound
+ pred =
+ bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
+ TYPE_U32, bld.mkImm(0),
+ loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless));
+ if (su->op != OP_SUSTP && su->tex.format) {
+ const TexInstruction::ImgFormatDesc *format = su->tex.format;
+ int blockwidth = format->bits[0] + format->bits[1] +
+ format->bits[2] + format->bits[3];
+
+ assert(format->components != 0);
+ // make sure that the format doesn't mismatch when it's not FMT_NONE
+ bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
+ TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
+ loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
+ pred->getDef(0));
+ }
+ }
- // prevent read fault when the image is not actually bound
- CmpInstruction *pred =
- bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
- TYPE_U32, bld.mkImm(0),
- loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless));
- if (su->op != OP_SUSTP && su->tex.format) {
- const TexInstruction::ImgFormatDesc *format = su->tex.format;
- int blockwidth = format->bits[0] + format->bits[1] +
- format->bits[2] + format->bits[3];
+ // Now we have "pred" which (optionally) contains whether to do the surface
+ // op at all, and a "pred2d" which indicates that, in case of doing the
+ // surface op, we have to create a 2d and 3d version, conditioned on pred2d.
+ TexInstruction *su2d = NULL;
+ if (pred2d) {
+ su2d = cloneForward(func, su)->asTex();
+ for (unsigned i = 0; su->defExists(i); ++i)
+ su2d->setDef(i, bld.getSSA());
+ su2d->moveSources(dim + 1, -1);
+ su2d->tex.target = nv50_ir::TEX_TARGET_2D;
+ }
+ if (pred2d && pred) {
+ Instruction *pred3d = bld.mkOp2(OP_AND, TYPE_U8,
+ bld.getSSA(1, FILE_PREDICATE),
+ pred->getDef(0), pred2d->getDef(0));
+ pred3d->src(0).mod = Modifier(NV50_IR_MOD_NOT);
+ pred3d->src(1).mod = Modifier(NV50_IR_MOD_NOT);
+ su->setPredicate(CC_P, pred3d->getDef(0));
+ pred2d = bld.mkOp2(OP_AND, TYPE_U8, bld.getSSA(1, FILE_PREDICATE),
+ pred->getDef(0), pred2d->getDef(0));
+ pred2d->src(0).mod = Modifier(NV50_IR_MOD_NOT);
+ } else if (pred) {
+ su->setPredicate(CC_NOT_P, pred->getDef(0));
+ } else if (pred2d) {
+ su->setPredicate(CC_NOT_P, pred2d->getDef(0));
+ }
+ if (su2d) {
+ su2d->setPredicate(CC_P, pred2d->getDef(0));
+ bld.insert(su2d);
+
+ // Create a UNION so that RA assigns the same registers
+ bld.setPosition(su, true);
+ for (unsigned i = 0; su->defExists(i); ++i) {
+ assert(i < 4);
- assert(format->components != 0);
- // make sure that the format doesn't mismatch when it's not FMT_NONE
- bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
- TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
- loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
- pred->getDef(0));
+ ValueDef &def = su->def(i);
+ ValueDef &def2 = su2d->def(i);
+ Instruction *mov = NULL;
+
+ if (pred) {
+ mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
+ mov->setPredicate(CC_P, pred->getDef(0));
+ }
+
+ Instruction *uni = ret[i] = bld.mkOp2(OP_UNION, TYPE_U32,
+ bld.getSSA(),
+ NULL, def2.get());
+ def.replace(uni->getDef(0), false);
+ uni->setSrc(0, def.get());
+ if (mov)
+ uni->setSrc(2, mov->getDef(0));
+ }
+ } else if (pred) {
+ // Create a UNION so that RA assigns the same registers
+ bld.setPosition(su, true);
+ for (unsigned i = 0; su->defExists(i); ++i) {
+ assert(i < 4);
+
+ ValueDef &def = su->def(i);
+
+ Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
+ mov->setPredicate(CC_P, pred->getDef(0));
+
+ Instruction *uni = ret[i] = bld.mkOp2(OP_UNION, TYPE_U32,
+ bld.getSSA(),
+ NULL, mov->getDef(0));
+ def.replace(uni->getDef(0), false);
+ uni->setSrc(0, def.get());
+ }
}
- su->setPredicate(CC_NOT_P, pred->getDef(0));
+
+ return su2d;
}
void
NVC0LoweringPass::handleSurfaceOpGM107(TexInstruction *su)
{
- processSurfaceCoordsGM107(su);
+ // processSurfaceCoords also takes care of fixing up the outputs and
+ // union'ing them with 0 as necessary. Additionally it may create a second
+ // surface which needs some of the similar fixups.
+
+ Instruction *loaded[4] = {};
+ TexInstruction *su2 = processSurfaceCoordsGM107(su, loaded);
if (su->op == OP_SULDP) {
- convertSurfaceFormat(su);
- insertOOBSurfaceOpResult(su);
+ convertSurfaceFormat(su, loaded);
}
if (su->op == OP_SUREDP) {
- Value *def = su->getDef(0);
-
su->op = OP_SUREDB;
+ }
- // There may not be a predicate in the bindless case.
- if (su->getPredicate()) {
- su->setDef(0, bld.getSSA());
-
- bld.setPosition(su, true);
-
- // make sure to initialize dst value when the atomic operation is not
- // performed
- Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
-
- assert(su->cc == CC_NOT_P);
- mov->setPredicate(CC_P, su->getPredicate());
-
- bld.mkOp2(OP_UNION, TYPE_U32, def, su->getDef(0), mov->getDef(0));
- }
+ // If we fixed up the type of the regular surface load instruction, we also
+ // have to fix up the copy.
+ if (su2) {
+ su2->op = su->op;
+ su2->dType = su->dType;
+ su2->sType = su->sType;
}
}