summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlia Mirkin <imirkin@alum.mit.edu>2021-11-29 01:44:32 -0500
committerMarge Bot <emma+marge@anholt.net>2022-03-03 18:26:43 +0000
commit539fae796a547a174c1bb92951dbbb132493a01d (patch)
tree4678460f856ba72546328c0f6575688541be0866
parent96211adf771da5211b9d5d8178f1cee0626a0792 (diff)
freedreno/a4xx: fix integer tg4
Something is slightly off in the integer values returned. It passes many tests without the fixup, but the dEQP-GLES31 tests complain. The blob ends up doing 3x gathers, and selects between them based on getinfo results. Since we already have a per-sampler key with some spare bits, just stick the bit-size info in there. And we can derive signedness from the associated type info. Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14670>
-rw-r--r--src/freedreno/ir3/ir3_compiler_nir.c42
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.c52
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_texture.c22
3 files changed, 113 insertions, 3 deletions
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index fc216505c7f..ad1a275685b 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -2914,7 +2914,6 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex)
bool tg4_swizzle_fixup = false;
if (tex->op == nir_texop_tg4 && ctx->compiler->gen == 4 &&
ctx->sampler_swizzles[tex->texture_index] != 0x688 /* rgba */) {
- /* XXX fix-up ASTC alpha as well? */
uint16_t swizzles = ctx->sampler_swizzles[tex->texture_index];
uint16_t swizzle = (swizzles >> (tex->component * 3)) & 7;
if (swizzle > 3) {
@@ -2954,10 +2953,47 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex)
sam = emit_sam(ctx, opc, info, type, MASK(ncomp), col0, col1);
}
- if (tg4_swizzle_fixup)
+ if (tg4_swizzle_fixup) {
+ /* TODO: fix-up for ASTC when alpha is selected? */
array_insert(ctx->ir, ctx->ir->tg4, sam);
- if ((ctx->astc_srgb & (1 << tex->texture_index)) &&
+ ir3_split_dest(b, dst, sam, 0, 4);
+
+ uint8_t tex_bits = ctx->sampler_swizzles[tex->texture_index] >> 12;
+ if (!type_float(type) && tex_bits != 3 /* 32bpp */ &&
+ tex_bits != 0 /* key unset */) {
+ uint8_t bits = 0;
+ switch (tex_bits) {
+ case 1: /* 8bpp */
+ bits = 8;
+ break;
+ case 2: /* 16bpp */
+ bits = 16;
+ break;
+ case 4: /* 10bpp or 2bpp for alpha */
+ if (opc == OPC_GATHER4A)
+ bits = 2;
+ else
+ bits = 10;
+ break;
+ default:
+ debug_assert(0);
+ }
+
+ sam->cat5.type = TYPE_F32;
+ for (int i = 0; i < 4; i++) {
+ /* scale and offset the unorm data */
+ dst[i] = ir3_MAD_F32(b, dst[i], 0, create_immed(b, fui((1 << bits) - 1)), 0, create_immed(b, fui(0.5f)), 0);
+ /* convert the scaled value to integer */
+ dst[i] = ir3_COV(b, dst[i], TYPE_F32, TYPE_U32);
+ /* sign extend for signed values */
+ if (type == TYPE_S32) {
+ dst[i] = ir3_SHL_B(b, dst[i], 0, create_immed(b, 32 - bits), 0);
+ dst[i] = ir3_ASHR_B(b, dst[i], 0, create_immed(b, 32 - bits), 0);
+ }
+ }
+ }
+ } else if ((ctx->astc_srgb & (1 << tex->texture_index)) &&
tex->op != nir_texop_tg4 && /* leave out tg4, unless it's on alpha? */
!nir_tex_instr_is_query(tex)) {
assert(opc != OPC_META_TEX_PREFETCH);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index ec43cffc293..73c4fd6bc87 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -262,6 +262,58 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
A4XX_TEX_CONST_0_SWIZ_Z(A4XX_TEX_Z) |
A4XX_TEX_CONST_0_SWIZ_W(A4XX_TEX_W);
+ /* Remap integer formats as unorm (will be fixed up in shader) */
+ if (util_format_is_pure_integer(view->base.format)) {
+ texconst0 &= ~A4XX_TEX_CONST_0_FMT__MASK;
+ switch (fd4_pipe2tex(view->base.format)) {
+ case TFMT4_8_8_8_8_UINT:
+ case TFMT4_8_8_8_8_SINT:
+ texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_8_8_8_8_UNORM);
+ break;
+ case TFMT4_8_8_UINT:
+ case TFMT4_8_8_SINT:
+ texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_8_8_UNORM);
+ break;
+ case TFMT4_8_UINT:
+ case TFMT4_8_SINT:
+ texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_8_UNORM);
+ break;
+
+ case TFMT4_16_16_16_16_UINT:
+ case TFMT4_16_16_16_16_SINT:
+ texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_16_16_16_16_UNORM);
+ break;
+ case TFMT4_16_16_UINT:
+ case TFMT4_16_16_SINT:
+ texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_16_16_UNORM);
+ break;
+ case TFMT4_16_UINT:
+ case TFMT4_16_SINT:
+ texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_16_UNORM);
+ break;
+
+ case TFMT4_32_32_32_32_UINT:
+ case TFMT4_32_32_32_32_SINT:
+ texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_32_32_32_32_FLOAT);
+ break;
+ case TFMT4_32_32_UINT:
+ case TFMT4_32_32_SINT:
+ texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_32_32_FLOAT);
+ break;
+ case TFMT4_32_UINT:
+ case TFMT4_32_SINT:
+ texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_32_FLOAT);
+ break;
+
+ case TFMT4_10_10_10_2_UINT:
+ texconst0 |= A4XX_TEX_CONST_0_FMT(TFMT4_10_10_10_2_UNORM);
+ break;
+
+ default:
+ debug_assert(0);
+ }
+ }
+
OUT_RING(ring, texconst0);
OUT_RING(ring, view->texconst1);
OUT_RING(ring, view->texconst2);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index b03fc9e8d97..98e3811265e 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -258,6 +258,28 @@ fd4_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader,
if (view->astc_srgb)
astc_srgb |= (1 << (start + i));
sampler_swizzles[start + i] = view->swizzle >> 4;
+
+ const struct util_format_description *desc =
+ util_format_description(view->base.format);
+ int c = util_format_get_first_non_void_channel(desc->format);
+ if (c >= 0 && desc->channel[c].pure_integer) {
+ switch (desc->channel[c].size) {
+ case 8:
+ sampler_swizzles[start + i] |= 0x1000;
+ break;
+ case 16:
+ sampler_swizzles[start + i] |= 0x2000;
+ break;
+ case 32:
+ sampler_swizzles[start + i] |= 0x3000;
+ break;
+ case 10:
+ sampler_swizzles[start + i] |= 0x4000;
+ break;
+ default:
+ debug_assert(0);
+ }
+ }
}
}