summaryrefslogtreecommitdiff
path: root/src/sna/brw/brw_wm.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/sna/brw/brw_wm.c')
-rw-r--r--src/sna/brw/brw_wm.c542
1 files changed, 542 insertions, 0 deletions
diff --git a/src/sna/brw/brw_wm.c b/src/sna/brw/brw_wm.c
new file mode 100644
index 00000000..9a8af5f4
--- /dev/null
+++ b/src/sna/brw/brw_wm.c
@@ -0,0 +1,542 @@
+#include "brw.h"
+
+#define X16 8
+#define Y16 10
+
+static void brw_wm_xy(struct brw_compile *p, int dw)
+{
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+ struct brw_reg r1_uw = __retype_uw(r1);
+ struct brw_reg x_uw, y_uw;
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ if (dw == 16) {
+ x_uw = brw_uw16_grf(30, 0);
+ y_uw = brw_uw16_grf(28, 0);
+ } else {
+ x_uw = brw_uw8_grf(30, 0);
+ y_uw = brw_uw8_grf(28, 0);
+ }
+
+ brw_ADD(p,
+ x_uw,
+ __stride(__suboffset(r1_uw, 4), 2, 4, 0),
+ brw_imm_v(0x10101010));
+ brw_ADD(p,
+ y_uw,
+ __stride(__suboffset(r1_uw, 5), 2, 4, 0),
+ brw_imm_v(0x11001100));
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
+ brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1));
+ brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1)));
+}
+
+static void brw_wm_affine_st(struct brw_compile *p, int dw, int channel, int msg)
+{
+ int uv;
+
+ if (dw == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ uv = p->gen >= 60 ? 6 : 3;
+ uv += 2*channel;
+ } else {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ uv = p->gen >= 60 ? 4 : 3;
+ uv += channel;
+ }
+
+ msg++;
+ if (p->gen >= 60) {
+ brw_PLN(p,
+ brw_message_reg(msg),
+ brw_vec1_grf(uv, 0),
+ brw_vec8_grf(2, 0));
+ msg += dw/8;
+
+ brw_PLN(p,
+ brw_message_reg(msg),
+ brw_vec1_grf(uv, 4),
+ brw_vec8_grf(2, 0));
+ } else {
+ struct brw_reg r = brw_vec1_grf(uv, 0);
+
+ brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0));
+ brw_MAC(p, brw_message_reg(msg), __suboffset(r, 1), brw_vec8_grf(Y16, 0));
+ msg += dw/8;
+
+ brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0));
+ brw_MAC(p, brw_message_reg(msg), __suboffset(r, 5), brw_vec8_grf(Y16, 0));
+ }
+}
+
+static inline unsigned simd(int dw)
+{
+ return dw == 16 ? BRW_SAMPLER_SIMD_MODE_SIMD16 : BRW_SAMPLER_SIMD_MODE_SIMD8;
+}
+
+static inline struct brw_reg sample_result(int dw, int result)
+{
+ return brw_reg(BRW_GENERAL_REGISTER_FILE, result, 0,
+ BRW_REGISTER_TYPE_UW,
+ dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
+ dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ WRITEMASK_XYZW);
+}
+
+static void brw_wm_sample(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
+{
+ struct brw_reg src0;
+ bool header;
+ int len;
+
+ len = dw == 16 ? 4 : 2;
+ if (p->gen >= 60) {
+ header = false;
+ src0 = brw_message_reg(++msg);
+ } else {
+ header = true;
+ src0 = brw_vec8_grf(0, 0);
+ }
+
+ brw_SAMPLE(p, sample_result(dw, result), msg, src0,
+ channel+1, channel, WRITEMASK_XYZW, 0,
+ 2*len, len+header, header, simd(dw));
+}
+
+static void brw_wm_sample__alpha(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
+{
+ struct brw_reg src0;
+ int len;
+
+ len = dw == 16 ? 4 : 2;
+ if (p->gen >= 60)
+ src0 = brw_message_reg(msg);
+ else
+ src0 = brw_vec8_grf(0, 0);
+
+ brw_SAMPLE(p, sample_result(dw, result), msg, src0,
+ channel+1, channel, WRITEMASK_W, 0,
+ len/2, len+1, true, simd(dw));
+}
+
+static void brw_wm_affine(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
+{
+ brw_wm_affine_st(p, dw, channel, msg);
+ brw_wm_sample(p, dw, channel, msg, result);
+}
+
+static void brw_wm_affine__alpha(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
+{
+ brw_wm_affine_st(p, dw, channel, msg);
+ brw_wm_sample__alpha(p, dw, channel, msg, result);
+}
+
+static inline struct brw_reg null_result(int dw)
+{
+ return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_NULL, 0,
+ BRW_REGISTER_TYPE_UW,
+ dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
+ dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ WRITEMASK_XYZW);
+}
+
+static void brw_fb_write(struct brw_compile *p, int dw)
+{
+ struct brw_instruction *insn;
+ unsigned msg_control, msg_type, msg_len;
+ struct brw_reg src0;
+ bool header;
+
+ if (dw == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
+ msg_len = 8;
+ } else {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
+ msg_len = 4;
+ }
+
+ if (p->gen < 60) {
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, brw_message_reg(1), brw_vec8_grf(1, 0));
+ brw_pop_insn_state(p);
+
+ msg_len += 2;
+ }
+
+ /* The execution mask is ignored for render target writes. */
+ insn = brw_next_insn(p, BRW_OPCODE_SEND);
+ insn->header.predicate_control = 0;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+
+ if (p->gen >= 60) {
+ src0 = brw_message_reg(2);
+ msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
+ header = false;
+ } else {
+ insn->header.destreg__conditionalmod = 0;
+ msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
+ src0 = __retype_uw(brw_vec8_grf(0, 0));
+ header = true;
+ }
+
+ brw_set_dest(p, insn, null_result(dw));
+ brw_set_src0(p, insn, src0);
+ brw_set_dp_write_message(p, insn, 0,
+ msg_control, msg_type, msg_len,
+ header, true, 0, true, false);
+}
+
+static void brw_wm_write(struct brw_compile *p, int dw, int src)
+{
+ int n;
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
+ if (dw == 8 && p->gen >= 60) {
+ brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src, 0));
+ brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0));
+ goto done;
+ }
+
+ for (n = 0; n < 4; n++) {
+ if (p->gen >= 60) {
+ brw_MOV(p,
+ brw_message_reg(2 + 2*n),
+ brw_vec8_grf(src + 2*n, 0));
+ } else if (p->gen >= 45 && dw == 16) {
+ brw_MOV(p,
+ brw_message_reg(2 + n + BRW_MRF_COMPR4),
+ brw_vec8_grf(src + 2*n, 0));
+ } else {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p,
+ brw_message_reg(2 + n),
+ brw_vec8_grf(src + 2*n, 0));
+
+ if (dw == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p,
+ brw_message_reg(2 + n + 4),
+ brw_vec8_grf(src + 2*n+1, 0));
+ }
+ }
+ }
+
+done:
+ brw_fb_write(p, dw);
+}
+
+static inline struct brw_reg mask_a8(int nr)
+{
+ return brw_reg(BRW_GENERAL_REGISTER_FILE,
+ nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ WRITEMASK_XYZW);
+}
+
+static void brw_wm_write__mask(struct brw_compile *p,
+ int dw,
+ int src, int mask)
+{
+ int n;
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
+ if (dw == 8 && p->gen >= 60) {
+ brw_MUL(p,
+ brw_message_reg(2),
+ brw_vec8_grf(src, 0),
+ mask_a8(mask));
+ brw_MUL(p,
+ brw_message_reg(4),
+ brw_vec8_grf(src+2, 0),
+ mask_a8(mask));
+ goto done;
+ }
+
+ for (n = 0; n < 4; n++) {
+ if (p->gen >= 60) {
+ brw_MUL(p,
+ brw_message_reg(2 + 2*n),
+ brw_vec8_grf(src + 2*n, 0),
+ brw_vec8_grf(mask, 0));
+ } else if (p->gen >= 45 && dw == 16) {
+ brw_MUL(p,
+ brw_message_reg(2 + n + BRW_MRF_COMPR4),
+ brw_vec8_grf(src + 2*n, 0),
+ brw_vec8_grf(mask, 0));
+ } else {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MUL(p,
+ brw_message_reg(2 + n),
+ brw_vec8_grf(src + 2*n, 0),
+ brw_vec8_grf(mask, 0));
+
+ if (dw == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MUL(p,
+ brw_message_reg(2 + n + 4),
+ brw_vec8_grf(src + 2*n+1, 0),
+ brw_vec8_grf(mask+1, 0));
+ }
+ }
+ }
+
+done:
+ brw_fb_write(p, dw);
+}
+
+static void brw_wm_write__mask_ca(struct brw_compile *p,
+ int dw, int src, int mask)
+{
+ int n;
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
+ if (dw == 8 && p->gen >= 60) {
+ brw_MUL(p,
+ brw_message_reg(2),
+ brw_vec8_grf(src, 0),
+ brw_vec8_grf(mask, 0));
+ brw_MUL(p,
+ brw_message_reg(4),
+ brw_vec8_grf(src + 2, 0),
+ brw_vec8_grf(mask + 2, 0));
+ goto done;
+ }
+
+ for (n = 0; n < 4; n++) {
+ if (p->gen >= 60) {
+ brw_MUL(p,
+ brw_message_reg(2 + 2*n),
+ brw_vec8_grf(src + 2*n, 0),
+ brw_vec8_grf(mask + 2*n, 0));
+ } else if (p->gen >= 45 && dw == 16) {
+ brw_MUL(p,
+ brw_message_reg(2 + n + BRW_MRF_COMPR4),
+ brw_vec8_grf(src + 2*n, 0),
+ brw_vec8_grf(mask + 2*n, 0));
+ } else {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MUL(p,
+ brw_message_reg(2 + n),
+ brw_vec8_grf(src + 2*n, 0),
+ brw_vec8_grf(mask + 2*n, 0));
+
+ if (dw == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MUL(p,
+ brw_message_reg(2 + n + 4),
+ brw_vec8_grf(src + 2*n + 1, 0),
+ brw_vec8_grf(mask + 2*n + 1, 0));
+ }
+ }
+ }
+
+done:
+ brw_fb_write(p, dw);
+}
+
+void
+brw_wm_kernel__affine(struct brw_compile *p, int dispatch)
+{
+ int src = 12;
+
+ if (p->gen < 60)
+ brw_wm_xy(p, dispatch);
+ brw_wm_affine(p, dispatch, 0, 1, src);
+ brw_wm_write(p, dispatch, src);
+}
+
+void
+brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch)
+{
+ int src = 12, mask = 20;
+
+ if (p->gen < 60)
+ brw_wm_xy(p, dispatch);
+ brw_wm_affine(p, dispatch, 0, 1, src);
+ brw_wm_affine__alpha(p, dispatch, 1, 7, mask);
+ brw_wm_write__mask(p, dispatch, src, mask);
+}
+
+void
+brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch)
+{
+ int src = 12, mask = 20;
+
+ if (p->gen < 60)
+ brw_wm_xy(p, dispatch);
+ brw_wm_affine(p, dispatch, 0, 1, src);
+ brw_wm_affine(p, dispatch, 1, 7, mask);
+ brw_wm_write__mask_ca(p, dispatch, src, mask);
+}
+
+void
+brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch)
+{
+ int src = 12, mask = 14;
+
+ if (p->gen < 60)
+ brw_wm_xy(p, dispatch);
+ brw_wm_affine__alpha(p, dispatch, 0, 1, src);
+ brw_wm_affine(p, dispatch, 1, 7, mask);
+ brw_wm_write__mask(p, dispatch, mask, src);
+}
+
+/* Projective variants */
+
+static void brw_wm_projective_st(struct brw_compile *p, int dw, int channel, int msg)
+{
+ int uv;
+
+ if (dw == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ uv = p->gen >= 60 ? 6 : 3;
+ uv += 2*channel;
+ } else {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ uv = p->gen >= 60 ? 4 : 3;
+ uv += channel;
+ }
+
+ msg++;
+ if (p->gen >= 60) {
+ /* First compute 1/z */
+ brw_PLN(p,
+ brw_message_reg(msg),
+ brw_vec1_grf(uv+1, 0),
+ brw_vec8_grf(2, 0));
+
+ if (dw == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
+ brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0));
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ } else
+ brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
+ brw_PLN(p,
+ brw_vec8_grf(28, 0),
+ brw_vec1_grf(uv, 0),
+ brw_vec8_grf(2, 0));
+ brw_MUL(p,
+ brw_message_reg(msg),
+ brw_vec8_grf(28, 0),
+ brw_vec8_grf(30, 0));
+ msg += dw/8;
+
+ brw_PLN(p,
+ brw_vec8_grf(28, 0),
+ brw_vec1_grf(uv, 0),
+ brw_vec8_grf(4, 0));
+ brw_MUL(p,
+ brw_message_reg(msg),
+ brw_vec8_grf(28, 0),
+ brw_vec8_grf(30, 0));
+ } else {
+ struct brw_reg r = brw_vec1_grf(uv, 0);
+
+ /* First compute 1/z */
+ brw_LINE(p, brw_null_reg(), brw_vec1_grf(uv+1, 0), brw_vec8_grf(X16, 0));
+ brw_MAC(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 1), brw_vec8_grf(Y16, 0));
+
+ if (dw == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
+ brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0));
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ } else
+ brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
+
+ /* Now compute the output s,t values */
+ brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0));
+ brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 1), brw_vec8_grf(Y16, 0));
+ brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0));
+ msg += dw/8;
+
+ brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0));
+ brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 5), brw_vec8_grf(Y16, 0));
+ brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0));
+ }
+}
+
+static void brw_wm_projective(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
+{
+ brw_wm_projective_st(p, dw, channel, msg);
+ brw_wm_sample(p, dw, channel, msg, result);
+}
+
+static void brw_wm_projective__alpha(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
+{
+ brw_wm_projective_st(p, dw, channel, msg);
+ brw_wm_sample__alpha(p, dw, channel, msg, result);
+}
+
+void
+brw_wm_kernel__projective(struct brw_compile *p, int dispatch)
+{
+ int src = 12;
+
+ if (p->gen < 60)
+ brw_wm_xy(p, dispatch);
+ brw_wm_projective(p, dispatch, 0, 1, src);
+ brw_wm_write(p, dispatch, src);
+}
+
+void
+brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch)
+{
+ int src = 12, mask = 20;
+
+ if (p->gen < 60)
+ brw_wm_xy(p, dispatch);
+ brw_wm_projective(p, dispatch, 0, 1, src);
+ brw_wm_projective__alpha(p, dispatch, 1, 7, mask);
+ brw_wm_write__mask(p, dispatch, src, mask);
+}
+
+void
+brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch)
+{
+ int src = 12, mask = 20;
+
+ if (p->gen < 60)
+ brw_wm_xy(p, dispatch);
+ brw_wm_projective(p, dispatch, 0, 1, src);
+ brw_wm_projective(p, dispatch, 1,7, mask);
+ brw_wm_write__mask_ca(p, dispatch, src, mask);
+}
+
+void
+brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch)
+{
+ int src = 12, mask = 14;
+
+ if (p->gen < 60)
+ brw_wm_xy(p, dispatch);
+ brw_wm_projective__alpha(p, dispatch, 0, 1, src);
+ brw_wm_projective(p, dispatch, 1, 7, mask);
+ brw_wm_write__mask(p, dispatch, mask, src);
+}