diff options
Diffstat (limited to 'src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp')
-rw-r--r-- | src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp | 74 |
1 files changed, 72 insertions, 2 deletions
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp index bd33fbfac5c..318d345efdb 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp | |||
@@ -117,6 +117,9 @@ NVC0LegalizeSSA::visit(BasicBlock *bb) | |||
117 | 117 | ||
118 | class NVC0LegalizePostRA : public Pass | 118 | class NVC0LegalizePostRA : public Pass |
119 | { | 119 | { |
120 | public: | ||
121 | NVC0LegalizePostRA(const Program *); | ||
122 | |||
120 | private: | 123 | private: |
121 | virtual bool visit(Function *); | 124 | virtual bool visit(Function *); |
122 | virtual bool visit(BasicBlock *); | 125 | virtual bool visit(BasicBlock *); |
@@ -127,8 +130,15 @@ private: | |||
127 | void propagateJoin(BasicBlock *); | 130 | void propagateJoin(BasicBlock *); |
128 | 131 | ||
129 | LValue *r63; | 132 | LValue *r63; |
133 | |||
134 | const bool needTexBar; | ||
130 | }; | 135 | }; |
131 | 136 | ||
137 | NVC0LegalizePostRA::NVC0LegalizePostRA(const Program *prog) | ||
138 | : needTexBar(prog->getTarget()->getChipset() >= 0xe0) | ||
139 | { | ||
140 | } | ||
141 | |||
132 | bool | 142 | bool |
133 | NVC0LegalizePostRA::visit(Function *fn) | 143 | NVC0LegalizePostRA::visit(Function *fn) |
134 | { | 144 | { |
@@ -225,6 +235,12 @@ NVC0LegalizePostRA::visit(BasicBlock *bb) | |||
225 | } else | 235 | } else |
226 | if (i->isNop()) { | 236 | if (i->isNop()) { |
227 | bb->remove(i); | 237 | bb->remove(i); |
238 | } else | ||
239 | if (needTexBar && isTextureOp(i->op)) { | ||
240 | Instruction *bar = new_Instruction(func, OP_TEXBAR, TYPE_NONE); | ||
241 | bar->fixed = 1; | ||
242 | bar->subOp = 0; | ||
243 | bb->insertAfter(i, bar); | ||
228 | } else { | 244 | } else { |
229 | if (i->op != OP_MOV && i->op != OP_PFETCH) | 245 | if (i->op != OP_MOV && i->op != OP_PFETCH) |
230 | replaceZero(i); | 246 | replaceZero(i); |
@@ -310,7 +326,61 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) | |||
310 | const int dim = i->tex.target.getDim() + i->tex.target.isCube(); | 326 | const int dim = i->tex.target.getDim() + i->tex.target.isCube(); |
311 | const int arg = i->tex.target.getArgCount(); | 327 | const int arg = i->tex.target.getArgCount(); |
312 | 328 | ||
313 | // generate and move the tsc/tic/array source to the front | 329 | if (prog->getTarget()->getChipset() >= 0xe0) { |
330 | if (i->tex.r == i->tex.s) { | ||
331 | i->tex.r += 8; // NOTE: offset should probably be a driver option | ||
332 | i->tex.s = 0; // only a single cX[] value possible here | ||
333 | } else { | ||
334 | // TODO: extract handles and use register to select TIC/TSC entries | ||
335 | } | ||
336 | if (i->tex.target.isArray()) { | ||
337 | LValue *layer = new_LValue(func, FILE_GPR); | ||
338 | Value *src = i->getSrc(arg - 1); | ||
339 | const int sat = (i->op == OP_TXF) ? 1 : 0; | ||
340 | DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; | ||
341 | bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat; | ||
342 | for (int s = dim; s >= 1; --s) | ||
343 | i->setSrc(s, i->getSrc(s - 1)); | ||
344 | i->setSrc(0, layer); | ||
345 | } | ||
346 | if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { | ||
347 | Value *tmp[2]; | ||
348 | Symbol *bind; | ||
349 | Value *rRel = i->getIndirectR(); | ||
350 | Value *sRel = i->getIndirectS(); | ||
351 | Value *shCnt = bld.loadImm(NULL, 2); | ||
352 | |||
353 | if (rRel) { | ||
354 | tmp[0] = bld.getScratch(); | ||
355 | bind = bld.mkSymbol(FILE_MEMORY_CONST, 15, TYPE_U32, i->tex.r * 4); | ||
356 | bld.mkOp2(OP_SHL, TYPE_U32, tmp[0], rRel, shCnt); | ||
357 | tmp[1] = bld.mkLoad(TYPE_U32, bind, tmp[0]); | ||
358 | bld.mkOp2(OP_AND, TYPE_U32, tmp[0], tmp[1], | ||
359 | bld.loadImm(tmp[0], 0x00ffffffu)); | ||
360 | rRel = tmp[0]; | ||
361 | i->setSrc(i->tex.rIndirectSrc, NULL); | ||
362 | } | ||
363 | if (sRel) { | ||
364 | tmp[0] = bld.getScratch(); | ||
365 | bind = bld.mkSymbol(FILE_MEMORY_CONST, 15, TYPE_U32, i->tex.s * 4); | ||
366 | bld.mkOp2(OP_SHL, TYPE_U32, tmp[0], sRel, shCnt); | ||
367 | tmp[1] = bld.mkLoad(TYPE_U32, bind, tmp[0]); | ||
368 | bld.mkOp2(OP_AND, TYPE_U32, tmp[0], tmp[1], | ||
369 | bld.loadImm(tmp[0], 0xff000000u)); | ||
370 | sRel = tmp[0]; | ||
371 | i->setSrc(i->tex.sIndirectSrc, NULL); | ||
372 | } | ||
373 | bld.mkOp2(OP_OR, TYPE_U32, rRel, rRel, sRel); | ||
374 | |||
375 | int min = i->tex.rIndirectSrc; | ||
376 | if (min < 0 || min > i->tex.sIndirectSrc) | ||
377 | min = i->tex.sIndirectSrc; | ||
378 | for (int s = min; s >= 1; --s) | ||
379 | i->setSrc(s, i->getSrc(s - 1)); | ||
380 | i->setSrc(0, rRel); | ||
381 | } | ||
382 | } else | ||
383 | // (nvc0) generate and move the tsc/tic/array source to the front | ||
314 | if (dim != arg || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { | 384 | if (dim != arg || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { |
315 | LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa | 385 | LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa |
316 | 386 | ||
@@ -717,7 +787,7 @@ TargetNVC0::runLegalizePass(Program *prog, CGStage stage) const | |||
717 | return pass.run(prog, false, true); | 787 | return pass.run(prog, false, true); |
718 | } else | 788 | } else |
719 | if (stage == CG_STAGE_POST_RA) { | 789 | if (stage == CG_STAGE_POST_RA) { |
720 | NVC0LegalizePostRA pass; | 790 | NVC0LegalizePostRA pass(prog); |
721 | return pass.run(prog, false, true); | 791 | return pass.run(prog, false, true); |
722 | } else | 792 | } else |
723 | if (stage == CG_STAGE_SSA) { | 793 | if (stage == CG_STAGE_SSA) { |