diff options
author | Iago Toral Quiroga <itoral@igalia.com> | 2020-08-06 14:14:17 +0200 |
---|---|---|
committer | Alejandro Piñeiro <apinheiro@igalia.com> | 2020-10-15 02:04:04 +0200 |
commit | 442f48f27b666cd2183d4ce27977da045ee34b0f (patch) | |
tree | 1bec951431b2ea9a03d9cfa9b07c00da32c05698 | |
parent | 3ec165bce99dfc58f5d023d9dc853c71f36a3f74 (diff) |
v3d/compiler: implement load interpolated input intrinsics
We will lower GLSL interpolateAt functions to these.
Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Acked-by: Eric Anholt <eric@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7155>
-rw-r--r-- | src/broadcom/compiler/nir_to_vir.c | 227 |
1 files changed, 227 insertions, 0 deletions
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index b42ddeadaca..3e2e079c3fb 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -2200,6 +2200,145 @@ ntq_emit_store_output(struct v3d_compile *c, nir_intrinsic_instr *instr) } } } + +/** + * This implementation is based on v3d_sample_{x,y}_offset() from + * v3d_sample_offset.h. + */ +static void +ntq_get_sample_offset(struct v3d_compile *c, struct qreg sample_idx, + struct qreg *sx, struct qreg *sy) +{ + sample_idx = vir_ITOF(c, sample_idx); + + struct qreg offset_x = + vir_FADD(c, vir_uniform_f(c, -0.125f), + vir_FMUL(c, sample_idx, + vir_uniform_f(c, 0.5f))); + vir_set_pf(vir_FCMP_dest(c, vir_nop_reg(), + vir_uniform_f(c, 2.0f), sample_idx), + V3D_QPU_PF_PUSHC); + offset_x = vir_SEL(c, V3D_QPU_COND_IFA, + vir_FSUB(c, offset_x, vir_uniform_f(c, 1.25f)), + offset_x); + + struct qreg offset_y = + vir_FADD(c, vir_uniform_f(c, -0.375f), + vir_FMUL(c, sample_idx, + vir_uniform_f(c, 0.25f))); + *sx = offset_x; + *sy = offset_y; +} + +/** + * This implementation is based on get_centroid_offset() from fep.c. + */ +static void +ntq_get_barycentric_centroid(struct v3d_compile *c, + struct qreg *out_x, + struct qreg *out_y) +{ + struct qreg sample_mask; + if (c->output_sample_mask_index != -1) + sample_mask = c->outputs[c->output_sample_mask_index]; + else + sample_mask = vir_MSF(c); + + struct qreg i0 = vir_uniform_ui(c, 0); + struct qreg i1 = vir_uniform_ui(c, 1); + struct qreg i2 = vir_uniform_ui(c, 2); + struct qreg i3 = vir_uniform_ui(c, 3); + struct qreg i4 = vir_uniform_ui(c, 4); + struct qreg i8 = vir_uniform_ui(c, 8); + + /* sN = TRUE if sample N enabled in sample mask, FALSE otherwise */ + struct qreg F = vir_uniform_ui(c, 0); + struct qreg T = vir_uniform_ui(c, ~0); + struct qreg s0 = vir_XOR(c, vir_AND(c, sample_mask, i1), i1); + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s0), V3D_QPU_PF_PUSHZ); + s0 = vir_SEL(c, V3D_QPU_COND_IFA, T, F); + struct qreg s1 = vir_XOR(c, vir_AND(c, sample_mask, i2), i2); + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s1), V3D_QPU_PF_PUSHZ); + s1 = vir_SEL(c, V3D_QPU_COND_IFA, T, F); + struct qreg s2 = vir_XOR(c, vir_AND(c, sample_mask, i4), i4); + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s2), V3D_QPU_PF_PUSHZ); + s2 = vir_SEL(c, V3D_QPU_COND_IFA, T, F); + struct qreg s3 = vir_XOR(c, vir_AND(c, sample_mask, i8), i8); + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s3), V3D_QPU_PF_PUSHZ); + s3 = vir_SEL(c, V3D_QPU_COND_IFA, T, F); + + /* sample_idx = s0 ? 0 : s2 ? 2 : s1 ? 1 : 3 */ + struct qreg sample_idx = i3; + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s1), V3D_QPU_PF_PUSHZ); + sample_idx = vir_SEL(c, V3D_QPU_COND_IFNA, i1, sample_idx); + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s2), V3D_QPU_PF_PUSHZ); + sample_idx = vir_SEL(c, V3D_QPU_COND_IFNA, i2, sample_idx); + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s0), V3D_QPU_PF_PUSHZ); + sample_idx = vir_SEL(c, V3D_QPU_COND_IFNA, i0, sample_idx); + + /* Get offset at selected sample index */ + struct qreg offset_x, offset_y; + ntq_get_sample_offset(c, sample_idx, &offset_x, &offset_y); + + /* Select pixel center [offset=(0,0)] if two opposing samples (or none) + * are selected. + */ + struct qreg s0_and_s3 = vir_AND(c, s0, s3); + struct qreg s1_and_s2 = vir_AND(c, s1, s2); + + struct qreg use_center = vir_XOR(c, sample_mask, vir_uniform_ui(c, 0)); + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), use_center), V3D_QPU_PF_PUSHZ); + use_center = vir_SEL(c, V3D_QPU_COND_IFA, T, F); + use_center = vir_OR(c, use_center, s0_and_s3); + use_center = vir_OR(c, use_center, s1_and_s2); + + struct qreg zero = vir_uniform_f(c, 0.0f); + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), use_center), V3D_QPU_PF_PUSHZ); + offset_x = vir_SEL(c, V3D_QPU_COND_IFNA, zero, offset_x); + offset_y = vir_SEL(c, V3D_QPU_COND_IFNA, zero, offset_y); + + *out_x = offset_x; + *out_y = offset_y; +} + +static struct qreg +ntq_emit_load_interpolated_input(struct v3d_compile *c, + struct qreg p, + struct qreg C, + struct qreg offset_x, + struct qreg offset_y, + unsigned mode) +{ + if (mode == INTERP_MODE_FLAT) + return C; + + struct qreg sample_offset_x = + vir_FSUB(c, vir_FXCD(c), vir_ITOF(c, vir_XCD(c))); + struct qreg sample_offset_y = + vir_FSUB(c, vir_FYCD(c), vir_ITOF(c, vir_YCD(c))); + + struct qreg scaleX = + vir_FADD(c, vir_FSUB(c, vir_uniform_f(c, 0.5f), sample_offset_x), + offset_x); + struct qreg scaleY = + vir_FADD(c, vir_FSUB(c, vir_uniform_f(c, 0.5f), sample_offset_y), + offset_y); + + struct qreg pInterp = + vir_FADD(c, p, vir_FADD(c, vir_FMUL(c, vir_FDX(c, p), scaleX), + vir_FMUL(c, vir_FDY(c, p), scaleY))); + + if (mode == INTERP_MODE_NOPERSPECTIVE) + return vir_FADD(c, pInterp, C); + + struct qreg w = c->payload_w; + struct qreg wInterp = + vir_FADD(c, w, vir_FADD(c, vir_FMUL(c, vir_FDX(c, w), scaleX), + vir_FMUL(c, vir_FDY(c, w), scaleY))); + + return vir_FADD(c, vir_FMUL(c, pInterp, wInterp), C); +} + static void ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) { @@ -2526,6 +2665,94 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) vir_FSUB(c, vir_FYCD(c), vir_ITOF(c, vir_YCD(c)))); break; + case nir_intrinsic_load_barycentric_at_offset: + ntq_store_dest(c, &instr->dest, 0, + vir_MOV(c, ntq_get_src(c, instr->src[0], 0))); + ntq_store_dest(c, &instr->dest, 1, + vir_MOV(c, ntq_get_src(c, instr->src[0], 1))); + break; + + case nir_intrinsic_load_barycentric_pixel: + ntq_store_dest(c, &instr->dest, 0, vir_uniform_f(c, 0.0f)); + ntq_store_dest(c, &instr->dest, 1, vir_uniform_f(c, 0.0f)); + break; + + case nir_intrinsic_load_barycentric_at_sample: { + if (!c->fs_key->msaa) { + ntq_store_dest(c, &instr->dest, 0, vir_uniform_f(c, 0.0f)); + ntq_store_dest(c, &instr->dest, 1, vir_uniform_f(c, 0.0f)); + return; + } + + struct qreg offset_x, offset_y; + struct qreg sample_idx = ntq_get_src(c, instr->src[0], 0); + ntq_get_sample_offset(c, sample_idx, &offset_x, &offset_y); + + ntq_store_dest(c, &instr->dest, 0, vir_MOV(c, offset_x)); + ntq_store_dest(c, &instr->dest, 1, vir_MOV(c, offset_y)); + break; + } + + case nir_intrinsic_load_barycentric_sample: { + struct qreg offset_x = + vir_FSUB(c, vir_FXCD(c), vir_ITOF(c, vir_XCD(c))); + struct qreg offset_y = + vir_FSUB(c, vir_FYCD(c), vir_ITOF(c, vir_YCD(c))); + + ntq_store_dest(c, &instr->dest, 0, + vir_FSUB(c, offset_x, vir_uniform_f(c, 0.5f))); + ntq_store_dest(c, &instr->dest, 1, + vir_FSUB(c, offset_y, vir_uniform_f(c, 0.5f))); + break; + } + + case nir_intrinsic_load_barycentric_centroid: { + struct qreg offset_x, offset_y; + ntq_get_barycentric_centroid(c, &offset_x, &offset_y); + ntq_store_dest(c, &instr->dest, 0, vir_MOV(c, offset_x)); + ntq_store_dest(c, &instr->dest, 1, vir_MOV(c, offset_y)); + break; + } + + case nir_intrinsic_load_interpolated_input: { + assert(nir_src_is_const(instr->src[1])); + const uint32_t offset = nir_src_as_uint(instr->src[1]); + + for (int i = 0; i < instr->num_components; i++) { + const uint32_t input_idx = + (nir_intrinsic_base(instr) + offset) * 4 + + nir_intrinsic_component(instr) + i; + + /* If we are not in MSAA or if we are not interpolating + * a user varying, just return the pre-computed + * interpolated input. + */ + if (!c->fs_key->msaa || + c->interp[input_idx].vp.file == QFILE_NULL) { + ntq_store_dest(c, &instr->dest, i, + vir_MOV(c, c->inputs[input_idx])); + continue; + } + + /* Otherwise compute interpolation at the specified + * offset. + */ + struct qreg p = c->interp[input_idx].vp; + struct qreg C = c->interp[input_idx].C; + unsigned interp_mode = c->interp[input_idx].mode; + + struct qreg offset_x = ntq_get_src(c, instr->src[0], 0); + struct qreg offset_y = ntq_get_src(c, instr->src[0], 1); + + struct qreg result = + ntq_emit_load_interpolated_input(c, p, C, + offset_x, offset_y, + interp_mode); + ntq_store_dest(c, &instr->dest, i, result); + } + break; + } + default: fprintf(stderr, "Unknown intrinsic: "); nir_print_instr(&instr->instr, stderr); |