nvc0/ir: add fixup to deal with interpolateAtSample with non-MSAA

The spec calls to always use sample 0 in this case, whereas we can do undefined things for invalid sample id's in the MSAA case. Fixes dEQP-GLES31.functional.shaders.multisample_interpolation.interpolate_at_sample.non_multisample_buffer.sample_n_* Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu> Reviewed-by: Karol Herbst <kherbst@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8574>
author: Ilia Mirkin <imirkin@alum.mit.edu> 2021-01-19 05:17:45 -0500
committer: Ilia Mirkin <imirkin@alum.mit.edu> 2021-01-21 15:55:34 -0500
commit: 245a696741d2a4cd5f2aade38c2194e3030d659b (patch)
tree: f8aba9f001dea55a03995a0d703fdbf86a7df37f /src/gallium/drivers/nouveau
parent: 0773cd33c2bd14605eee471dd0dcc5306dd41cd4 (diff)
13 files changed, 84 insertions, 22 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index c36e990735d..bacf95530da 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -224,7 +224,7 @@ extern void nv50_ir_relocate_code(void *relocData, uint32_t *code,
 extern void
 nv50_ir_apply_fixups(void *fixupData, uint32_t *code,
                      bool force_per_sample, bool flatshade,
-                     uint8_t alphatest);
+                     uint8_t alphatest, bool msaa);
 
 /* obtain code that will be shared among programs */
 extern void nv50_ir_get_target_library(uint32_t chipset,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index e651d7fdcb0..8116fb58f7d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -1213,7 +1213,16 @@ void
 gk110_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
 {
    int loc = entry->loc;
-   if (data.force_persample_interp)
+   bool val = false;
+   switch (entry->ipa) {
+   case 0:
+      val = data.force_persample_interp;
+      break;
+   case 1:
+      val = data.msaa;
+      break;
+   }
+   if (val)
       code[loc + 1] |= 1 << 13;
    else
       code[loc + 1] &= ~(1 << 13);
@@ -1226,8 +1235,8 @@ void CodeEmitterGK110::emitSELP(const Instruction *i)
    if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
       code[1] |= 1 << 13;
 
-   if (i->subOp == 1) {
-      addInterp(0, 0, gk110_selpFlip);
+   if (i->subOp >= 1) {
+      addInterp(i->subOp - 1, 0, gk110_selpFlip);
    }
 }
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index f7a2307c9f4..56bc9fc3cef 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -953,7 +953,16 @@ void
 gm107_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
 {
    int loc = entry->loc;
-   if (data.force_persample_interp)
+   bool val = false;
+   switch (entry->ipa) {
+   case 0:
+      val = data.force_persample_interp;
+      break;
+   case 1:
+      val = data.msaa;
+      break;
+   }
+   if (val)
       code[loc + 1] |= 1 << 10;
    else
       code[loc + 1] &= ~(1 << 10);
@@ -985,8 +994,8 @@ CodeEmitterGM107::emitSEL()
    emitGPR (0x08, insn->src(0));
    emitGPR (0x00, insn->def(0));
 
-   if (insn->subOp == 1) {
-      addInterp(0, 0, gm107_selpFlip);
+   if (insn->subOp >= 1) {
+      addInterp(insn->subOp - 1, 0, gm107_selpFlip);
    }
 }
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp
index 8da45ad517d..415fbf385ac 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp
@@ -354,7 +354,16 @@ void
 gv100_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
 {
    int loc = entry->loc;
-   if (data.force_persample_interp)
+   bool val = false;
+   switch (entry->ipa) {
+   case 0:
+      val = data.force_persample_interp;
+      break;
+   case 1:
+      val = data.msaa;
+      break;
+   }
+   if (val)
       code[loc + 2] |= 1 << 26;
    else
       code[loc + 2] &= ~(1 << 26);
@@ -366,8 +375,8 @@ CodeEmitterGV100::emitSEL()
    emitFormA(0x007, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
    emitNOT  (90, insn->src(2));
    emitPRED (87, insn->src(2));
-   if (insn->subOp == 1)
-      addInterp(0, 0, gv100_selpFlip);
+   if (insn->subOp >= 1)
+      addInterp(insn->subOp - 1, 0, gv100_selpFlip);
 }
 
 void
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 0a82c6de20d..08863868b7a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -1259,7 +1259,16 @@ void
 nvc0_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
 {
    int loc = entry->loc;
-   if (data.force_persample_interp)
+   bool val = false;
+   switch (entry->ipa) {
+   case 0:
+      val = data.force_persample_interp;
+      break;
+   case 1:
+      val = data.msaa;
+      break;
+   }
+   if (val)
       code[loc + 1] |= 1 << 20;
    else
       code[loc + 1] &= ~(1 << 20);
@@ -1272,8 +1281,8 @@ void CodeEmitterNVC0::emitSELP(const Instruction *i)
    if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
       code[1] |= 1 << 20;
 
-   if (i->subOp == 1) {
-      addInterp(0, 0, nvc0_selpFlip);
+   if (i->subOp >= 1) {
+      addInterp(i->subOp - 1, 0, nvc0_selpFlip);
    }
 }
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 417800585da..b857d3cd6a9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -1809,7 +1809,10 @@ Converter::visit(nir_intrinsic_instr *insn)
          mode = NV50_IR_INTERP_DEFAULT;
       } else if (op == nir_intrinsic_load_barycentric_at_sample) {
          info_out->prop.fp.readsSampleLocations = true;
-         mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], getSrc(&insn->src[0], 0))->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
+         Value *sample = getSSA();
+         mkOp3(OP_SELP, TYPE_U32, sample, mkImm(0), getSrc(&insn->src[0], 0), mkImm(0))
+            ->subOp = 2;
+         mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], sample)->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
          mode = NV50_IR_INTERP_OFFSET;
       } else {
          unreachable("all intrinsics already handled above");
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index d4881af6281..19f070f44bb 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -3047,11 +3047,19 @@ Converter::handleINTERP(Value *dst[4])
    case TGSI_OPCODE_INTERP_CENTROID:
       mode |= NV50_IR_INTERP_CENTROID;
       break;
-   case TGSI_OPCODE_INTERP_SAMPLE:
-      insn = mkOp1(OP_PIXLD, TYPE_U32, (offset = getScratch()), fetchSrc(1, 0));
+   case TGSI_OPCODE_INTERP_SAMPLE: {
+      // When using a non-MS buffer, we're supposed to always use the center
+      // (i.e. sample 0). This adds a SELP which will be always true or false
+      // based on a data fixup.
+      Value *sample = getScratch();
+      mkOp3(OP_SELP, TYPE_U32, sample, mkImm(0), fetchSrc(1, 0), mkImm(0))
+         ->subOp = 2;
+
+      insn = mkOp1(OP_PIXLD, TYPE_U32, (offset = getScratch()), sample);
       insn->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
       mode |= NV50_IR_INTERP_OFFSET;
       break;
+   }
    case TGSI_OPCODE_INTERP_OFFSET: {
       // The input in src1.xy is float, but we need a single 32-bit value
       // where the upper and lower 16 bits are encoded in S0.12 format. We need
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index 5d03f8f6055..fdaa75ea560 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -495,7 +495,7 @@ nv50_ir_relocate_code(void *relocData, uint32_t *code,
 void
 nv50_ir_apply_fixups(void *fixupData, uint32_t *code,
                      bool force_persample_interp, bool flatshade,
-                     uint8_t alphatest)
+                     uint8_t alphatest, bool msaa)
 {
    nv50_ir::FixupInfo *info = reinterpret_cast<nv50_ir::FixupInfo *>(
       fixupData);
@@ -503,7 +503,8 @@ nv50_ir_apply_fixups(void *fixupData, uint32_t *code,
    // force_persample_interp: all non-flat -> per-sample
    // flatshade: all color -> flat
    // alphatest: PIPE_FUNC_* to use with alphatest
-   nv50_ir::FixupData data(force_persample_interp, flatshade, alphatest);
+   // msaa: false = sample id -> 0 for interpolateAtSample
+   nv50_ir::FixupData data(force_persample_interp, flatshade, alphatest, msaa);
    for (unsigned i = 0; i < info->count; ++i)
       info->entry[i].apply(&info->entry[i], code, data);
 }
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
index db8ae0ae99e..ce84ea367f1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
@@ -59,11 +59,12 @@ struct RelocInfo
 };
 
 struct FixupData {
-   FixupData(bool force, bool flat, uint8_t alphatest) :
-      force_persample_interp(force), flatshade(flat), alphatest(alphatest) {}
+   FixupData(bool force, bool flat, uint8_t alphatest, bool msaa) :
+      force_persample_interp(force), flatshade(flat), alphatest(alphatest), msaa(msaa) {}
    bool force_persample_interp;
    bool flatshade;
    uint8_t alphatest;
+   bool msaa;
 };
 
 struct FixupEntry;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 21ffd951c26..8c14a6531ec 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -510,7 +510,8 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
       nv50_ir_apply_fixups(prog->interps, prog->code,
                            prog->fp.force_persample_interp,
                            false /* flatshade */,
-                           prog->fp.alphatest - 1);
+                           prog->fp.alphatest - 1,
+                           false /* msaa */);
 
    nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
                        (prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 98dae6c703e..d2b37d1aaa1 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -844,7 +844,8 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
       nv50_ir_apply_fixups(prog->fixups, prog->code,
                            prog->fp.force_persample_interp,
                            prog->fp.flatshade,
-                           0 /* alphatest */);
+                           0 /* alphatest */,
+                           prog->fp.msaa);
       for (int i = 0; i < 2; i++) {
          unsigned mask = prog->fp.color_interp[i] >> 4;
          unsigned interp = prog->fp.color_interp[i] & 3;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
index 8f209d75ca1..74996fbc867 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
@@ -54,6 +54,7 @@ struct nvc0_program {
       bool flatshade;
       bool reads_framebuffer;
       bool post_depth_coverage;
+      bool msaa;
    } fp;
    struct {
       uint32_t tess_mode; /* ~0 if defined by the other stage */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index 5e2a6c0566e..a8203b5c6ce 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -118,6 +118,16 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0)
       fp->fp.force_persample_interp = rast->force_persample_interp;
    }
 
+   if (fp->fp.msaa != rast->multisample) {
+      /* Force the program to be reuploaded, which will trigger interp fixups
+       * to get applied
+       */
+      if (fp->mem)
+         nouveau_heap_free(&fp->mem);
+
+      fp->fp.msaa = rast->multisample;
+   }
+
    /* Shade model works well enough when both colors follow it. However if one
     * (or both) is explicitly set, then we have to go the patching route.
     */
author	Ilia Mirkin <imirkin@alum.mit.edu>	2021-01-19 05:17:45 -0500
committer	Ilia Mirkin <imirkin@alum.mit.edu>	2021-01-21 15:55:34 -0500
commit	245a696741d2a4cd5f2aade38c2194e3030d659b (patch)
tree	f8aba9f001dea55a03995a0d703fdbf86a7df37f /src/gallium/drivers/nouveau
parent	0773cd33c2bd14605eee471dd0dcc5306dd41cd4 (diff)