summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIago Toral Quiroga <itoral@igalia.com>2020-08-06 14:14:17 +0200
committerAlejandro Piñeiro <apinheiro@igalia.com>2020-10-15 02:04:04 +0200
commit442f48f27b666cd2183d4ce27977da045ee34b0f (patch)
tree1bec951431b2ea9a03d9cfa9b07c00da32c05698
parent3ec165bce99dfc58f5d023d9dc853c71f36a3f74 (diff)
v3d/compiler: implement load interpolated input intrinsics
We will lower GLSL interpolateAt functions to these. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Acked-by: Eric Anholt <eric@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7155>
-rw-r--r--src/broadcom/compiler/nir_to_vir.c227
1 files changed, 227 insertions, 0 deletions
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index b42ddeadaca..3e2e079c3fb 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -2200,6 +2200,145 @@ ntq_emit_store_output(struct v3d_compile *c, nir_intrinsic_instr *instr)
}
}
}
+
+/**
+ * This implementation is based on v3d_sample_{x,y}_offset() from
+ * v3d_sample_offset.h.
+ */
+static void
+ntq_get_sample_offset(struct v3d_compile *c, struct qreg sample_idx,
+ struct qreg *sx, struct qreg *sy)
+{
+ sample_idx = vir_ITOF(c, sample_idx);
+
+ struct qreg offset_x =
+ vir_FADD(c, vir_uniform_f(c, -0.125f),
+ vir_FMUL(c, sample_idx,
+ vir_uniform_f(c, 0.5f)));
+ vir_set_pf(vir_FCMP_dest(c, vir_nop_reg(),
+ vir_uniform_f(c, 2.0f), sample_idx),
+ V3D_QPU_PF_PUSHC);
+ offset_x = vir_SEL(c, V3D_QPU_COND_IFA,
+ vir_FSUB(c, offset_x, vir_uniform_f(c, 1.25f)),
+ offset_x);
+
+ struct qreg offset_y =
+ vir_FADD(c, vir_uniform_f(c, -0.375f),
+ vir_FMUL(c, sample_idx,
+ vir_uniform_f(c, 0.25f)));
+ *sx = offset_x;
+ *sy = offset_y;
+}
+
+/**
+ * This implementation is based on get_centroid_offset() from fep.c.
+ */
+static void
+ntq_get_barycentric_centroid(struct v3d_compile *c,
+ struct qreg *out_x,
+ struct qreg *out_y)
+{
+ struct qreg sample_mask;
+ if (c->output_sample_mask_index != -1)
+ sample_mask = c->outputs[c->output_sample_mask_index];
+ else
+ sample_mask = vir_MSF(c);
+
+ struct qreg i0 = vir_uniform_ui(c, 0);
+ struct qreg i1 = vir_uniform_ui(c, 1);
+ struct qreg i2 = vir_uniform_ui(c, 2);
+ struct qreg i3 = vir_uniform_ui(c, 3);
+ struct qreg i4 = vir_uniform_ui(c, 4);
+ struct qreg i8 = vir_uniform_ui(c, 8);
+
+ /* sN = TRUE if sample N enabled in sample mask, FALSE otherwise */
+ struct qreg F = vir_uniform_ui(c, 0);
+ struct qreg T = vir_uniform_ui(c, ~0);
+ struct qreg s0 = vir_XOR(c, vir_AND(c, sample_mask, i1), i1);
+ vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s0), V3D_QPU_PF_PUSHZ);
+ s0 = vir_SEL(c, V3D_QPU_COND_IFA, T, F);
+ struct qreg s1 = vir_XOR(c, vir_AND(c, sample_mask, i2), i2);
+ vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s1), V3D_QPU_PF_PUSHZ);
+ s1 = vir_SEL(c, V3D_QPU_COND_IFA, T, F);
+ struct qreg s2 = vir_XOR(c, vir_AND(c, sample_mask, i4), i4);
+ vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s2), V3D_QPU_PF_PUSHZ);
+ s2 = vir_SEL(c, V3D_QPU_COND_IFA, T, F);
+ struct qreg s3 = vir_XOR(c, vir_AND(c, sample_mask, i8), i8);
+ vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s3), V3D_QPU_PF_PUSHZ);
+ s3 = vir_SEL(c, V3D_QPU_COND_IFA, T, F);
+
+ /* sample_idx = s0 ? 0 : s2 ? 2 : s1 ? 1 : 3 */
+ struct qreg sample_idx = i3;
+ vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s1), V3D_QPU_PF_PUSHZ);
+ sample_idx = vir_SEL(c, V3D_QPU_COND_IFNA, i1, sample_idx);
+ vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s2), V3D_QPU_PF_PUSHZ);
+ sample_idx = vir_SEL(c, V3D_QPU_COND_IFNA, i2, sample_idx);
+ vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), s0), V3D_QPU_PF_PUSHZ);
+ sample_idx = vir_SEL(c, V3D_QPU_COND_IFNA, i0, sample_idx);
+
+ /* Get offset at selected sample index */
+ struct qreg offset_x, offset_y;
+ ntq_get_sample_offset(c, sample_idx, &offset_x, &offset_y);
+
+ /* Select pixel center [offset=(0,0)] if two opposing samples (or none)
+ * are selected.
+ */
+ struct qreg s0_and_s3 = vir_AND(c, s0, s3);
+ struct qreg s1_and_s2 = vir_AND(c, s1, s2);
+
+ struct qreg use_center = vir_XOR(c, sample_mask, vir_uniform_ui(c, 0));
+ vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), use_center), V3D_QPU_PF_PUSHZ);
+ use_center = vir_SEL(c, V3D_QPU_COND_IFA, T, F);
+ use_center = vir_OR(c, use_center, s0_and_s3);
+ use_center = vir_OR(c, use_center, s1_and_s2);
+
+ struct qreg zero = vir_uniform_f(c, 0.0f);
+ vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), use_center), V3D_QPU_PF_PUSHZ);
+ offset_x = vir_SEL(c, V3D_QPU_COND_IFNA, zero, offset_x);
+ offset_y = vir_SEL(c, V3D_QPU_COND_IFNA, zero, offset_y);
+
+ *out_x = offset_x;
+ *out_y = offset_y;
+}
+
+static struct qreg
+ntq_emit_load_interpolated_input(struct v3d_compile *c,
+ struct qreg p,
+ struct qreg C,
+ struct qreg offset_x,
+ struct qreg offset_y,
+ unsigned mode)
+{
+ if (mode == INTERP_MODE_FLAT)
+ return C;
+
+ struct qreg sample_offset_x =
+ vir_FSUB(c, vir_FXCD(c), vir_ITOF(c, vir_XCD(c)));
+ struct qreg sample_offset_y =
+ vir_FSUB(c, vir_FYCD(c), vir_ITOF(c, vir_YCD(c)));
+
+ struct qreg scaleX =
+ vir_FADD(c, vir_FSUB(c, vir_uniform_f(c, 0.5f), sample_offset_x),
+ offset_x);
+ struct qreg scaleY =
+ vir_FADD(c, vir_FSUB(c, vir_uniform_f(c, 0.5f), sample_offset_y),
+ offset_y);
+
+ struct qreg pInterp =
+ vir_FADD(c, p, vir_FADD(c, vir_FMUL(c, vir_FDX(c, p), scaleX),
+ vir_FMUL(c, vir_FDY(c, p), scaleY)));
+
+ if (mode == INTERP_MODE_NOPERSPECTIVE)
+ return vir_FADD(c, pInterp, C);
+
+ struct qreg w = c->payload_w;
+ struct qreg wInterp =
+ vir_FADD(c, w, vir_FADD(c, vir_FMUL(c, vir_FDX(c, w), scaleX),
+ vir_FMUL(c, vir_FDY(c, w), scaleY)));
+
+ return vir_FADD(c, vir_FMUL(c, pInterp, wInterp), C);
+}
+
static void
ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
{
@@ -2526,6 +2665,94 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
vir_FSUB(c, vir_FYCD(c), vir_ITOF(c, vir_YCD(c))));
break;
+ case nir_intrinsic_load_barycentric_at_offset:
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_MOV(c, ntq_get_src(c, instr->src[0], 0)));
+ ntq_store_dest(c, &instr->dest, 1,
+ vir_MOV(c, ntq_get_src(c, instr->src[0], 1)));
+ break;
+
+ case nir_intrinsic_load_barycentric_pixel:
+ ntq_store_dest(c, &instr->dest, 0, vir_uniform_f(c, 0.0f));
+ ntq_store_dest(c, &instr->dest, 1, vir_uniform_f(c, 0.0f));
+ break;
+
+ case nir_intrinsic_load_barycentric_at_sample: {
+ if (!c->fs_key->msaa) {
+ ntq_store_dest(c, &instr->dest, 0, vir_uniform_f(c, 0.0f));
+ ntq_store_dest(c, &instr->dest, 1, vir_uniform_f(c, 0.0f));
+ return;
+ }
+
+ struct qreg offset_x, offset_y;
+ struct qreg sample_idx = ntq_get_src(c, instr->src[0], 0);
+ ntq_get_sample_offset(c, sample_idx, &offset_x, &offset_y);
+
+ ntq_store_dest(c, &instr->dest, 0, vir_MOV(c, offset_x));
+ ntq_store_dest(c, &instr->dest, 1, vir_MOV(c, offset_y));
+ break;
+ }
+
+ case nir_intrinsic_load_barycentric_sample: {
+ struct qreg offset_x =
+ vir_FSUB(c, vir_FXCD(c), vir_ITOF(c, vir_XCD(c)));
+ struct qreg offset_y =
+ vir_FSUB(c, vir_FYCD(c), vir_ITOF(c, vir_YCD(c)));
+
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_FSUB(c, offset_x, vir_uniform_f(c, 0.5f)));
+ ntq_store_dest(c, &instr->dest, 1,
+ vir_FSUB(c, offset_y, vir_uniform_f(c, 0.5f)));
+ break;
+ }
+
+ case nir_intrinsic_load_barycentric_centroid: {
+ struct qreg offset_x, offset_y;
+ ntq_get_barycentric_centroid(c, &offset_x, &offset_y);
+ ntq_store_dest(c, &instr->dest, 0, vir_MOV(c, offset_x));
+ ntq_store_dest(c, &instr->dest, 1, vir_MOV(c, offset_y));
+ break;
+ }
+
+ case nir_intrinsic_load_interpolated_input: {
+ assert(nir_src_is_const(instr->src[1]));
+ const uint32_t offset = nir_src_as_uint(instr->src[1]);
+
+ for (int i = 0; i < instr->num_components; i++) {
+ const uint32_t input_idx =
+ (nir_intrinsic_base(instr) + offset) * 4 +
+ nir_intrinsic_component(instr) + i;
+
+ /* If we are not in MSAA or if we are not interpolating
+ * a user varying, just return the pre-computed
+ * interpolated input.
+ */
+ if (!c->fs_key->msaa ||
+ c->interp[input_idx].vp.file == QFILE_NULL) {
+ ntq_store_dest(c, &instr->dest, i,
+ vir_MOV(c, c->inputs[input_idx]));
+ continue;
+ }
+
+ /* Otherwise compute interpolation at the specified
+ * offset.
+ */
+ struct qreg p = c->interp[input_idx].vp;
+ struct qreg C = c->interp[input_idx].C;
+ unsigned interp_mode = c->interp[input_idx].mode;
+
+ struct qreg offset_x = ntq_get_src(c, instr->src[0], 0);
+ struct qreg offset_y = ntq_get_src(c, instr->src[0], 1);
+
+ struct qreg result =
+ ntq_emit_load_interpolated_input(c, p, C,
+ offset_x, offset_y,
+ interp_mode);
+ ntq_store_dest(c, &instr->dest, i, result);
+ }
+ break;
+ }
+
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(&instr->instr, stderr);