diff options
6 files changed, 33 insertions, 52 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index d4bed4e7720..1796334b918 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -1254,14 +1254,12 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) info->prop.cp.numThreads[1] = info->prop.cp.numThreads[2] = 1; } - info->io.pointSize = 0xff; info->io.instanceId = 0xff; info->io.vertexId = 0xff; info->io.edgeFlagIn = 0xff; info->io.edgeFlagOut = 0xff; info->io.fragDepth = 0xff; info->io.sampleMask = 0xff; - info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff; } int diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index a0b545fd24d..4fb89fa3f5e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -1341,6 +1341,7 @@ public: int maxGPR; bool fp64; + bool persampleInvocation; MemoryPool mem_Instruction; MemoryPool mem_CmpInstruction; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 63ea7f5e7e8..53927893752 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -91,7 +91,6 @@ struct nv50_ir_prog_info struct { int16_t maxGPR; /* may be -1 if none used */ - int16_t maxOutput; uint32_t tlsSpace; /* required local memory per thread */ uint32_t smemSize; /* required shared memory per block */ uint32_t *code; @@ -113,17 +112,8 @@ struct nv50_ir_prog_info uint8_t numPatchConstants; /* also included in numInputs/numOutputs */ uint8_t numSysVals; - struct { - uint32_t *buf; /* for IMMEDIATE_ARRAY */ - uint16_t bufSize; /* size of immediate array */ - uint16_t count; /* count of inline immediates */ - uint32_t *data; /* inline immediate data */ - uint8_t *type; /* for each vec4 (128 bit) */ - } immd; - union { struct { - uint32_t inputMask[4]; /* mask of attributes read (1 bit per scalar) */ bool usesDrawParameters; } vp; struct { @@ -134,7 +124,6 @@ struct nv50_ir_prog_info uint8_t outputPrim; /* PIPE_PRIM_{TRIANGLES,LINES,POINTS} */ } tp; struct { - uint8_t inputPrim; uint8_t outputPrim; unsigned instanceCount; unsigned maxVertices; @@ -146,14 +135,12 @@ struct nv50_ir_prog_info bool postDepthCoverage; bool separateFragData; bool usesDiscard; - bool persampleInvocation; bool usesSampleMaskIn; bool readsFramebuffer; bool readsSampleLocations; } fp; struct { uint32_t inputOffset; /* base address for user args */ - uint32_t sharedOffset; /* reserved space in s[] */ uint32_t gridInfoBase; /* base address for NTID,NCTAID */ uint16_t numThreads[3]; /* max number of threads */ } cp; @@ -169,7 +156,6 @@ struct nv50_ir_prog_info uint16_t ucpBase; /* base address for UCPs */ uint16_t drawInfoBase; /* base address for draw parameters */ uint16_t alphaRefBase; /* base address for alpha test values */ - uint8_t pointSize; /* output index for PointSize */ uint8_t instanceId; /* system value index of InstanceID */ uint8_t vertexId; /* system value index of VertexID */ uint8_t edgeFlagIn; @@ -177,7 +163,6 @@ struct nv50_ir_prog_info int8_t viewportId; /* output index of ViewportIndex */ uint8_t fragDepth; /* output index of FragDepth */ uint8_t sampleMask; /* output index of SampleMask */ - uint8_t backFaceColor[2]; /* input/output indices of back face colour */ uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */ bool fp64; /* program uses fp64 math */ bool mul_zero_wins; /* program wants for x*0 = 0 */ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index a972b51b623..c5b11726cec 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -1260,7 +1260,7 @@ Converter::parseNIR() break; case Program::TYPE_FRAGMENT: info->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests; - info->prop.fp.persampleInvocation = + prog->persampleInvocation = (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_ID) || (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS); info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage; @@ -1271,7 +1271,6 @@ Converter::parseNIR() !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN); break; case Program::TYPE_GEOMETRY: - info->prop.gp.inputPrim = nir->info.gs.input_primitive; info->prop.gp.instanceCount = nir->info.gs.invocations; info->prop.gp.maxVertices = nir->info.gs.vertices_out; info->prop.gp.outputPrim = nir->info.gs.output_primitive; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 3fd76f64de0..f0bf2932e3b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -114,12 +114,12 @@ public: return SrcRegister(fsr->Indirect); } - uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const + uint32_t getValueU32(int c, const uint32_t *data) const { assert(reg.File == TGSI_FILE_IMMEDIATE); assert(!reg.Absolute); assert(!reg.Negate); - return info->immd.data[reg.Index * 4 + getSwizzle(c)]; + return data[reg.Index * 4 + getSwizzle(c)]; } private: @@ -986,7 +986,7 @@ bool Instruction::checkDstSrcAliasing() const class Source { public: - Source(struct nv50_ir_prog_info *); + Source(struct nv50_ir_prog_info *, nv50_ir::Program *); ~Source(); public: @@ -1034,7 +1034,13 @@ public: std::vector<bool> bufferAtomics; + struct { + uint16_t count; /* count of inline immediates */ + uint32_t *data; /* inline immediate data */ + } immd; + private: + nv50_ir::Program *prog; int inferSysValDirection(unsigned sn) const; bool scanDeclaration(const struct tgsi_full_declaration *); bool scanInstruction(const struct tgsi_full_instruction *); @@ -1047,12 +1053,16 @@ private: inline bool isEdgeFlagPassthrough(const Instruction&) const; }; -Source::Source(struct nv50_ir_prog_info *prog) : info(prog) +Source::Source(struct nv50_ir_prog_info *info, nv50_ir::Program *prog) +: info(info), prog(prog) { tokens = (const struct tgsi_token *)info->bin.source; - if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) + if (info->dbgFlags & NV50_IR_DEBUG_BASIC) tgsi_dump(tokens, 0); + + immd.count = 0; + immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16); } Source::~Source() @@ -1060,10 +1070,8 @@ Source::~Source() if (insns) FREE(insns); - if (info->immd.data) - FREE(info->immd.data); - if (info->immd.type) - FREE(info->immd.type); + if (immd.data) + FREE(immd.data); } bool Source::scanSource() @@ -1086,8 +1094,6 @@ bool Source::scanSource() memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1); bufferAtomics.resize(scan.file_max[TGSI_FILE_BUFFER] + 1); - info->immd.bufSize = 0; - info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1; info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1; info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1; @@ -1102,9 +1108,6 @@ bool Source::scanSource() info->io.viewportId = -1; - info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16); - info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte)); - tgsi_parse_init(&parse, tokens); while (!tgsi_parse_end_of_tokens(&parse)) { tgsi_parse_token(&parse); @@ -1164,9 +1167,6 @@ void Source::scanProperty(const struct tgsi_full_property *prop) case TGSI_PROPERTY_GS_OUTPUT_PRIM: info->prop.gp.outputPrim = prop->u[0].Data; break; - case TGSI_PROPERTY_GS_INPUT_PRIM: - info->prop.gp.inputPrim = prop->u[0].Data; - break; case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: info->prop.gp.maxVertices = prop->u[0].Data; break; @@ -1240,14 +1240,12 @@ void Source::scanProperty(const struct tgsi_full_property *prop) void Source::scanImmediate(const struct tgsi_full_immediate *imm) { - const unsigned n = info->immd.count++; + const unsigned n = immd.count++; assert(n < scan.immediate_count); for (int c = 0; c < 4; ++c) - info->immd.data[n * 4 + c] = imm->u[c].Uint; - - info->immd.type[n] = imm->Immediate.DataType; + immd.data[n * 4 + c] = imm->u[c].Uint; } int Source::inferSysValDirection(unsigned sn) const @@ -1388,7 +1386,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) break; case TGSI_SEMANTIC_SAMPLEID: case TGSI_SEMANTIC_SAMPLEPOS: - info->prop.fp.persampleInvocation = true; + prog->persampleInvocation = true; break; case TGSI_SEMANTIC_SAMPLEMASK: info->prop.fp.usesSampleMaskIn = true; @@ -2020,7 +2018,7 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) switch (src.getFile()) { case TGSI_FILE_IMMEDIATE: assert(!ptr); - return loadImm(NULL, info->immd.data[idx * 4 + swz]); + return loadImm(NULL, code->immd.data[idx * 4 + swz]); case TGSI_FILE_CONSTANT: return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr)); case TGSI_FILE_INPUT: @@ -2376,7 +2374,7 @@ Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) tgsi.getOpcode() == TGSI_OPCODE_TXP)) texi->tex.levelZero = true; if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && !tgt.isShadow()) - texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, info); + texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, code->immd.data); texi->tex.useOffsets = tgsi.getNumTexOffsets(); for (s = 0; s < tgsi.getNumTexOffsets(); ++s) { @@ -2637,7 +2635,7 @@ Converter::handleLOAD(Value *dst0[4]) if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) { off = NULL; sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, - tgsi.getSrc(1).getValueU32(0, info) + + tgsi.getSrc(1).getValueU32(0, code->immd.data) + src0_component_offset); } else { // yzw are ignored for buffers @@ -2784,7 +2782,7 @@ Converter::handleSTORE() if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) { off = NULL; sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, - tgsi.getSrc(0).getValueU32(0, info) + 4 * c); + tgsi.getSrc(0).getValueU32(0, code->immd.data) + 4 * c); } else { // yzw are ignored for buffers off = fetchSrc(0, 0); @@ -2902,7 +2900,7 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) Value *sym; if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, - tgsi.getSrc(1).getValueU32(c, info)); + tgsi.getSrc(1).getValueU32(c, code->immd.data)); else sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0); if (subOp == NV50_IR_SUBOP_ATOM_CAS) @@ -3537,7 +3535,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_ENDPRIM: { // get vertex stream (must be immediate) - unsigned int stream = tgsi.getSrc(0).getValueU32(0, info); + unsigned int stream = tgsi.getSrc(0).getValueU32(0, code->immd.data); if (stream && op == OP_RESTART) break; if (info->prop.gp.maxVertices == 0) @@ -3732,7 +3730,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) break; case TGSI_OPCODE_MEMBAR: { - uint32_t level = tgsi.getSrc(0).getValueU32(0, info); + uint32_t level = tgsi.getSrc(0).getValueU32(0, code->immd.data); geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL); geni->fixed = 1; if (!(level & ~(TGSI_MEMBAR_THREAD_GROUP | TGSI_MEMBAR_SHARED))) @@ -3794,8 +3792,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) val0 = getScratch(); if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE && tgsi.getSrc(2).getFile() == TGSI_FILE_IMMEDIATE) { - loadImm(val0, (tgsi.getSrc(2).getValueU32(c, info) << 8) | - tgsi.getSrc(1).getValueU32(c, info)); + loadImm(val0, (tgsi.getSrc(2).getValueU32(c, code->immd.data) << 8) | + tgsi.getSrc(1).getValueU32(c, code->immd.data)); } else { src1 = fetchSrc(1, c); src2 = fetchSrc(2, c); @@ -4338,7 +4336,7 @@ namespace nv50_ir { bool Program::makeFromTGSI(struct nv50_ir_prog_info *info) { - tgsi::Source src(info); + tgsi::Source src(info, this); if (!src.scanSource()) return false; tlsSize = info->bin.tlsSpace; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 067f9abaca8..1bcfb054ffa 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -2965,7 +2965,7 @@ NVC0LoweringPass::handleRDSV(Instruction *i) bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ld->getDef(0), bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), bld.loadImm(NULL, 1), sampleid->getDef(0))); - if (prog->driver->prop.fp.persampleInvocation) { + if (prog->persampleInvocation) { bld.mkMov(i->getDef(0), masked); } else { bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), ld->getDef(0), masked, |