diff options
Diffstat (limited to 'src/gallium')
4 files changed, 202 insertions, 54 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index 6eefe8f0025..e244bd0d610 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -122,6 +122,8 @@ private: void emitSAM(); void emitRAM(); + void emitPSETP(); + void emitMOV(); void emitS2R(); void emitCS2R(); @@ -690,6 +692,31 @@ CodeEmitterGM107::emitRAM() * predicate/cc ******************************************************************************/ +void +CodeEmitterGM107::emitPSETP() +{ + + emitInsn(0x50900000); + + switch (insn->op) { + case OP_AND: emitField(0x18, 3, 0); break; + case OP_OR: emitField(0x18, 3, 1); break; + case OP_XOR: emitField(0x18, 3, 2); break; + default: + assert(!"unexpected operation"); + break; + } + + // emitINV (0x2a); + emitPRED(0x27); // TODO: support 3-arg + emitINV (0x20, insn->src(1)); + emitPRED(0x1d, insn->src(1)); + emitINV (0x0f, insn->src(0)); + emitPRED(0x0c, insn->src(0)); + emitPRED(0x03, insn->def(0)); + emitPRED(0x00); +} + /******************************************************************************* * movement / conversion ******************************************************************************/ @@ -3557,7 +3584,12 @@ CodeEmitterGM107::emitInstruction(Instruction *i) case OP_AND: case OP_OR: case OP_XOR: - emitLOP(); + switch (insn->def(0).getFile()) { + case FILE_GPR: emitLOP(); break; + case FILE_PREDICATE: emitPSETP(); break; + default: + assert(!"invalid bool op"); + } break; case OP_NOT: emitNOT(); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 1f702a987d8..a76d6c60cda 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1802,6 +1802,9 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless { uint32_t base = slot * NVC0_SU_INFO__STRIDE; + // We don't upload surface info for bindless for GM107+ + assert(!bindless || targ->getChipset() < NVISA_GM107_CHIPSET); + if (ptr) { ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(slot)); if (bindless) @@ -2204,7 +2207,7 @@ getDestType(const ImgType type) { } void -NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su) +NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su, Instruction **loaded) { const TexInstruction::ImgFormatDesc *format = su->tex.format; int width = format->bits[0] + format->bits[1] + @@ -2223,21 +2226,38 @@ NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su) if (width < 32) untypedDst[0] = bld.getSSA(); - for (int i = 0; i < 4; i++) { - typedDst[i] = su->getDef(i); + if (loaded && loaded[0]) { + for (int i = 0; i < 4; i++) { + if (loaded[i]) + typedDst[i] = loaded[i]->getDef(0); + } + } else { + for (int i = 0; i < 4; i++) { + typedDst[i] = su->getDef(i); + } } // Set the untyped dsts as the su's destinations - for (int i = 0; i < 4; i++) - su->setDef(i, untypedDst[i]); + if (loaded && loaded[0]) { + for (int i = 0; i < 4; i++) + if (loaded[i]) + loaded[i]->setDef(0, untypedDst[i]); + } else { + for (int i = 0; i < 4; i++) + su->setDef(i, untypedDst[i]); - bld.setPosition(su, true); + bld.setPosition(su, true); + } // Unpack each component into the typed dsts int bits = 0; for (int i = 0; i < 4; bits += format->bits[i], i++) { if (!typedDst[i]) continue; + + if (loaded && loaded[0]) + bld.setPosition(loaded[i], true); + if (i >= format->components) { if (format->type == FLOAT || format->type == UNORM || @@ -2308,7 +2328,7 @@ NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su) processSurfaceCoordsNVE4(su); if (su->op == OP_SULDP) { - convertSurfaceFormat(su); + convertSurfaceFormat(su, NULL); insertOOBSurfaceOpResult(su); } @@ -2421,7 +2441,7 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su) processSurfaceCoordsNVC0(su); if (su->op == OP_SULDP) { - convertSurfaceFormat(su); + convertSurfaceFormat(su, NULL); insertOOBSurfaceOpResult(su); } @@ -2463,14 +2483,16 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su) } } -void -NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su) +TexInstruction * +NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su, Instruction *ret[4]) { const int slot = su->tex.r; const int dim = su->tex.target.getDim(); - const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube()); + const bool array = su->tex.target.isArray() || su->tex.target.isCube(); + const int arg = dim + array; Value *ind = su->getIndirectR(); Value *handle; + Instruction *pred = NULL, *pred2d = NULL; int pos = 0; bld.setPosition(su, false); @@ -2489,67 +2511,153 @@ NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su) assert(pos == 0); break; } + + if (dim == 2 && !array) { + // This might be a 2d slice of a 3d texture, try to load the z + // coordinate in. + Value *v; + if (!su->tex.bindless) + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless); + else + v = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), ind, bld.mkImm(11)); + Value *is_3d = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), v, bld.mkImm(1)); + pred2d = bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), + TYPE_U32, bld.mkImm(0), is_3d); + + bld.mkOp2(OP_SHR, TYPE_U32, v, v, bld.loadImm(NULL, 16)); + su->moveSources(dim, 1); + su->setSrc(dim, v); + su->tex.target = nv50_ir::TEX_TARGET_3D; + pos++; + } + if (su->tex.bindless) - handle = ind; + handle = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ind, bld.mkImm(2047)); else handle = loadTexHandle(ind, slot + 32); + su->setSrc(arg + pos, handle); // The address check doesn't make sense here. The format check could make // sense but it's a bit of a pain. - if (su->tex.bindless) - return; + if (!su->tex.bindless) { + // prevent read fault when the image is not actually bound + pred = + bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), + TYPE_U32, bld.mkImm(0), + loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless)); + if (su->op != OP_SUSTP && su->tex.format) { + const TexInstruction::ImgFormatDesc *format = su->tex.format; + int blockwidth = format->bits[0] + format->bits[1] + + format->bits[2] + format->bits[3]; + + assert(format->components != 0); + // make sure that the format doesn't mismatch when it's not FMT_NONE + bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0), + TYPE_U32, bld.loadImm(NULL, blockwidth / 8), + loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless), + pred->getDef(0)); + } + } - // prevent read fault when the image is not actually bound - CmpInstruction *pred = - bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), - TYPE_U32, bld.mkImm(0), - loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless)); - if (su->op != OP_SUSTP && su->tex.format) { - const TexInstruction::ImgFormatDesc *format = su->tex.format; - int blockwidth = format->bits[0] + format->bits[1] + - format->bits[2] + format->bits[3]; + // Now we have "pred" which (optionally) contains whether to do the surface + // op at all, and a "pred2d" which indicates that, in case of doing the + // surface op, we have to create a 2d and 3d version, conditioned on pred2d. + TexInstruction *su2d = NULL; + if (pred2d) { + su2d = cloneForward(func, su)->asTex(); + for (unsigned i = 0; su->defExists(i); ++i) + su2d->setDef(i, bld.getSSA()); + su2d->moveSources(dim + 1, -1); + su2d->tex.target = nv50_ir::TEX_TARGET_2D; + } + if (pred2d && pred) { + Instruction *pred3d = bld.mkOp2(OP_AND, TYPE_U8, + bld.getSSA(1, FILE_PREDICATE), + pred->getDef(0), pred2d->getDef(0)); + pred3d->src(0).mod = Modifier(NV50_IR_MOD_NOT); + pred3d->src(1).mod = Modifier(NV50_IR_MOD_NOT); + su->setPredicate(CC_P, pred3d->getDef(0)); + pred2d = bld.mkOp2(OP_AND, TYPE_U8, bld.getSSA(1, FILE_PREDICATE), + pred->getDef(0), pred2d->getDef(0)); + pred2d->src(0).mod = Modifier(NV50_IR_MOD_NOT); + } else if (pred) { + su->setPredicate(CC_NOT_P, pred->getDef(0)); + } else if (pred2d) { + su->setPredicate(CC_NOT_P, pred2d->getDef(0)); + } + if (su2d) { + su2d->setPredicate(CC_P, pred2d->getDef(0)); + bld.insert(su2d); + + // Create a UNION so that RA assigns the same registers + bld.setPosition(su, true); + for (unsigned i = 0; su->defExists(i); ++i) { + assert(i < 4); - assert(format->components != 0); - // make sure that the format doesn't mismatch when it's not FMT_NONE - bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0), - TYPE_U32, bld.loadImm(NULL, blockwidth / 8), - loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless), - pred->getDef(0)); + ValueDef &def = su->def(i); + ValueDef &def2 = su2d->def(i); + Instruction *mov = NULL; + + if (pred) { + mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0)); + mov->setPredicate(CC_P, pred->getDef(0)); + } + + Instruction *uni = ret[i] = bld.mkOp2(OP_UNION, TYPE_U32, + bld.getSSA(), + NULL, def2.get()); + def.replace(uni->getDef(0), false); + uni->setSrc(0, def.get()); + if (mov) + uni->setSrc(2, mov->getDef(0)); + } + } else if (pred) { + // Create a UNION so that RA assigns the same registers + bld.setPosition(su, true); + for (unsigned i = 0; su->defExists(i); ++i) { + assert(i < 4); + + ValueDef &def = su->def(i); + + Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0)); + mov->setPredicate(CC_P, pred->getDef(0)); + + Instruction *uni = ret[i] = bld.mkOp2(OP_UNION, TYPE_U32, + bld.getSSA(), + NULL, mov->getDef(0)); + def.replace(uni->getDef(0), false); + uni->setSrc(0, def.get()); + } } - su->setPredicate(CC_NOT_P, pred->getDef(0)); + + return su2d; } void NVC0LoweringPass::handleSurfaceOpGM107(TexInstruction *su) { - processSurfaceCoordsGM107(su); + // processSurfaceCoords also takes care of fixing up the outputs and + // union'ing them with 0 as necessary. Additionally it may create a second + // surface which needs some of the similar fixups. + + Instruction *loaded[4] = {}; + TexInstruction *su2 = processSurfaceCoordsGM107(su, loaded); if (su->op == OP_SULDP) { - convertSurfaceFormat(su); - insertOOBSurfaceOpResult(su); + convertSurfaceFormat(su, loaded); } if (su->op == OP_SUREDP) { - Value *def = su->getDef(0); - su->op = OP_SUREDB; + } - // There may not be a predicate in the bindless case. - if (su->getPredicate()) { - su->setDef(0, bld.getSSA()); - - bld.setPosition(su, true); - - // make sure to initialize dst value when the atomic operation is not - // performed - Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0)); - - assert(su->cc == CC_NOT_P); - mov->setPredicate(CC_P, su->getPredicate()); - - bld.mkOp2(OP_UNION, TYPE_U32, def, su->getDef(0), mov->getDef(0)); - } + // If we fixed up the type of the regular surface load instruction, we also + // have to fix up the copy. + if (su2) { + su2->op = su->op; + su2->dType = su->dType; + su2->sType = su->sType; } } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 0ce2a4b80f8..b4c405a9ea5 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -171,10 +171,10 @@ private: Value *loadMsInfo32(Value *ptr, uint32_t off); void adjustCoordinatesMS(TexInstruction *); - void processSurfaceCoordsGM107(TexInstruction *); + TexInstruction *processSurfaceCoordsGM107(TexInstruction *, Instruction *[4]); void processSurfaceCoordsNVE4(TexInstruction *); void processSurfaceCoordsNVC0(TexInstruction *); - void convertSurfaceFormat(TexInstruction *); + void convertSurfaceFormat(TexInstruction *, Instruction **); void insertOOBSurfaceOpResult(TexInstruction *); Value *calculateSampleOffset(Value *sampleID); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c index f62e508258b..4948a8f4cea 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c @@ -1433,7 +1433,15 @@ gm107_create_image_handle(struct pipe_context *pipe, nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); - return 0x100000000ULL | tic->id; + // Compute handle. This will include the TIC as well as some additional + // info regarding the bound 3d surface layer, if applicable. + uint64_t handle = 0x100000000ULL | tic->id; + struct nv04_resource *res = nv04_resource(view->resource); + if (res->base.target == PIPE_TEXTURE_3D) { + handle |= 1 << 11; + handle |= view->u.tex.first_layer << (11 + 16); + } + return handle; fail: FREE(tic); |