diff options
author | Ilia Mirkin <imirkin@alum.mit.edu> | 2017-06-24 00:44:35 -0400 |
---|---|---|
committer | Ilia Mirkin <imirkin@alum.mit.edu> | 2018-01-07 11:40:35 -0500 |
commit | 23a6e8d8ff5effa7b88bd77c3bf396879a2ece0c (patch) | |
tree | da4107ad5ff1078b4468e86fcdc3854f08787926 /src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | |
parent | 8eb1214755366fc34ed15a7e3dec48d4f0d65f10 (diff) |
nvc0: add bindless image support for kepler
A part of the driver constbuf area is allocated for bindless images. Any
update requires uploading to all driver constbufs. This also extends the
driver constbuf to 64KB, up from 2KB.
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 57 |
1 files changed, 31 insertions, 26 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 4e65d449ebf..e07f57e782d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1742,19 +1742,23 @@ NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off) #define NVC0_SU_INFO_MS(i) (0x38 + (i) * 4) inline Value * -NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off) +NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless) { uint32_t base = slot * NVC0_SU_INFO__STRIDE; if (ptr) { ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(slot)); - ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7)); + if (bindless) + ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(511)); + else + ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7)); ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(6)); base = 0; } off += base; - return loadResInfo32(ptr, off, prog->driver->io.suInfoBase); + return loadResInfo32(ptr, off, bindless ? prog->driver->io.bindlessBase : + prog->driver->io.suInfoBase); } static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c) @@ -1800,7 +1804,7 @@ NVC0LoweringPass::handleSUQ(TexInstruction *suq) } else { offset = NVC0_SU_INFO_SIZE(c); } - bld.mkMov(suq->getDef(d++), loadSuInfo32(ind, slot, offset)); + bld.mkMov(suq->getDef(d++), loadSuInfo32(ind, slot, offset, suq->tex.bindless)); if (c == 2 && suq->tex.target.isCube()) bld.mkOp2(OP_DIV, TYPE_U32, suq->getDef(d - 1), suq->getDef(d - 1), bld.loadImm(NULL, 6)); @@ -1808,8 +1812,8 @@ NVC0LoweringPass::handleSUQ(TexInstruction *suq) if (mask & 1) { if (suq->tex.target.isMS()) { - Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0)); - Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1)); + Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), suq->tex.bindless); + Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), suq->tex.bindless); Value *ms = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), ms_x, ms_y); bld.mkOp2(OP_SHL, TYPE_U32, suq->getDef(d++), bld.loadImm(NULL, 1), ms); } else { @@ -1842,8 +1846,8 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex) Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA(); Value *ind = tex->getIndirectR(); - Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0)); - Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1)); + Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless); + Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless); bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x); bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y); @@ -1903,9 +1907,9 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) src[c] = bld.getScratch(); if (c == 0 && raw) - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_RAW_X); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_RAW_X, su->tex.bindless); else - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(dimc)); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(dimc), su->tex.bindless); bld.mkOp3(OP_SUCLAMP, TYPE_S32, src[c], su->getSrc(c), v, zero) ->subOp = getSuClampSubOp(su, dimc); } @@ -1927,16 +1931,16 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) bld.mkOp2(OP_AND, TYPE_U32, off, src[0], bld.loadImm(NULL, 0xffff)); } else if (dim == 3) { - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless); bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1]) ->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH, su->tex.bindless); bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0]) ->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l } else { assert(dim == 2); - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH, su->tex.bindless); bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0]) ->subOp = (su->tex.target.isArray() || su->tex.target.isCube()) ? NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l @@ -1947,7 +1951,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) if (raw) { bf = src[0]; } else { - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT, su->tex.bindless); bld.mkOp3(OP_VSHL, TYPE_U32, bf, src[0], v, zero) ->subOp = NV50_IR_SUBOP_V1(7,6,8|2); } @@ -1964,7 +1968,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) case 2: z = off; if (!su->tex.target.isArray() && !su->tex.target.isCube()) { - z = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C); + z = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless); subOp = NV50_IR_SUBOP_SUBFM_3D; } break; @@ -1979,7 +1983,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) } // part 2 - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless); if (su->tex.target == TEX_TARGET_BUFFER) { eau = v; @@ -1988,7 +1992,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) } // add array layer offset if (su->tex.target.isArray() || su->tex.target.isCube()) { - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY, su->tex.bindless); if (dim == 1) bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau) ->subOp = NV50_IR_SUBOP_MADSP(4,0,0); // u16 u24 u32 @@ -2028,7 +2032,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) // let's just set it 0 for raw access and hope it works v = raw ? - bld.mkImm(0) : loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT); + bld.mkImm(0) : loadSuInfo32(ind, slot, NVC0_SU_INFO_FMT, su->tex.bindless); // get rid of old coordinate sources, make space for fmt info and predicate su->moveSources(arg, 3 - arg); @@ -2036,12 +2040,13 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) su->setSrc(0, addr); su->setSrc(1, v); su->setSrc(2, pred); + su->setIndirectR(NULL); // prevent read fault when the image is not actually bound CmpInstruction *pred1 = bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), TYPE_U32, bld.mkImm(0), - loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR)); + loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless)); if (su->op != OP_SUSTP && su->tex.format) { const TexInstruction::ImgFormatDesc *format = su->tex.format; @@ -2052,7 +2057,7 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) assert(format->components != 0); bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred1->getDef(0), TYPE_U32, bld.loadImm(NULL, blockwidth / 8), - loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE), + loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless), pred1->getDef(0)); } su->setPredicate(CC_NOT_P, pred1->getDef(0)); @@ -2247,13 +2252,13 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su) // calculate pixel offset if (su->op == OP_SULDP || su->op == OP_SUREDP) { - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless); su->setSrc(0, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[0], v)); } // add array layer offset if (su->tex.target.isArray() || su->tex.target.isCube()) { - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY); + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY, su->tex.bindless); assert(dim > 1); su->setSrc(2, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v)); } @@ -2262,7 +2267,7 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su) CmpInstruction *pred = bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), TYPE_U32, bld.mkImm(0), - loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR)); + loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless)); if (su->op != OP_SUSTP && su->tex.format) { const TexInstruction::ImgFormatDesc *format = su->tex.format; int blockwidth = format->bits[0] + format->bits[1] + @@ -2272,7 +2277,7 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su) // make sure that the format doesn't mismatch when it's not FMT_NONE bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0), TYPE_U32, bld.loadImm(NULL, blockwidth / 8), - loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE), + loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless), pred->getDef(0)); } su->setPredicate(CC_NOT_P, pred->getDef(0)); @@ -2361,7 +2366,7 @@ NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su) CmpInstruction *pred = bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), TYPE_U32, bld.mkImm(0), - loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR)); + loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless)); if (su->op != OP_SUSTP && su->tex.format) { const TexInstruction::ImgFormatDesc *format = su->tex.format; int blockwidth = format->bits[0] + format->bits[1] + @@ -2371,7 +2376,7 @@ NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su) // make sure that the format doesn't mismatch when it's not FMT_NONE bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0), TYPE_U32, bld.loadImm(NULL, blockwidth / 8), - loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE), + loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless), pred->getDef(0)); } su->setPredicate(CC_NOT_P, pred->getDef(0)); |