summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2021-04-06 13:14:26 -0700
committerMarge Bot <eric+marge@anholt.net>2021-04-16 08:27:35 +0000
commit566dc4d74021e7d7a50e30a34a7061b12f65160f (patch)
tree4323563fe32f941cdc823040b229ef2015118a93
parenta2572a9da49561af2d8dafce44bbb50c80505531 (diff)
intel/eu: Add instruction compaction support on XeHP.
This patch includes a number of reworks and fixes squashed in by Nanley Chery, Sagar Ghuge, Jordan Justen and Francisco Jerez. Reviewed-by: Francisco Jerez <currojerez@riseup.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10000>
-rw-r--r--src/intel/compiler/brw_eu_compact.c192
1 files changed, 185 insertions, 7 deletions
diff --git a/src/intel/compiler/brw_eu_compact.c b/src/intel/compiler/brw_eu_compact.c
index 5ed14651ad5..1841c95bfc1 100644
--- a/src/intel/compiler/brw_eu_compact.c
+++ b/src/intel/compiler/brw_eu_compact.c
@@ -821,6 +821,44 @@ static const uint16_t gfx12_src1_index_table[16] = {
0b100011010001, /* -r<4;4,1> */
};
+static const uint16_t xehp_src0_index_table[16] = {
+ 0b000100000000, /* r<1;1,0> */
+ 0b000000000000, /* r<0;1,0> */
+ 0b000100000010, /* -r<1;1,0> */
+ 0b000100000001, /* (abs)r<1;1,0> */
+ 0b000000000010, /* -r<0;1,0> */
+ 0b001000000000, /* r<2;1,0> */
+ 0b001001000000, /* r<2;4,0> */
+ 0b001101000000, /* r<4;4,0> */
+ 0b001100000000, /* r<4;1,0> */
+ 0b000100000011, /* -(abs)r<1;1,0> */
+ 0b000000000001, /* (abs)r<0;1,0> */
+ 0b111100010000, /* r[a]<1,0> */
+ 0b010001100000, /* r<8;8,0> */
+ 0b000101000000, /* r<1;4,0> */
+ 0b010001001000, /* r<8;4,2> */
+ 0b001000000010, /* -r<2;1,0> */
+};
+
+static const uint16_t xehp_src1_index_table[16] = {
+ 0b000001000000, /* r<1;1,0> */
+ 0b000000000000, /* r<0;1,0> */
+ 0b100001000000, /* -r<1;1,0> */
+ 0b100000000000, /* -r<0;1,0> */
+ 0b010001000000, /* (abs)r<1;1,0> */
+ 0b100011010000, /* -r<4;4,0> */
+ 0b000010000000, /* r<2;1,0> */
+ 0b000011010000, /* r<4;4,0> */
+ 0b000011000000, /* r<4;1,0> */
+ 0b110001000000, /* -(abs)r<1;1,0> */
+ 0b010000000000, /* (abs)r<0;1,0> */
+ 0b110000000000, /* -(abs)r<0;1,0> */
+ 0b000100011000, /* r<8;8,0> */
+ 0b100010000000, /* -r<2;1,0> */
+ 0b100000001001, /* -r<0;2,1> */
+ 0b100001000100, /* -r[a]<1;1,0> */
+};
+
/* This is actually the control index table for Cherryview (26 bits), but the
* only difference from Broadwell (24 bits) is that it has two extra 0-bits at
* the start.
@@ -883,6 +921,41 @@ static const uint64_t gfx12_3src_control_index_table[32] = {
0b000001001000010101010000000000000011, /* (8|M0) (sat)arf<1>:f :f :f :f */
};
+static const uint64_t xehp_3src_control_index_table[32] = {
+ 0b0000010010100010101000000000000000100, /* (16|M0) grf<1>:f :f :f :f */
+ 0b0000010010100010101000000000000000011, /* (8|M0) grf<1>:f :f :f :f */
+ 0b0000010010000010101000000000000000011, /* (8|M0) arf<1>:f :f :f :f */
+ 0b0000010010100010101000010000000000011, /* (W) (8|M0) grf<1>:f :f :f :f */
+ 0b0000010010000010101000010000000000011, /* (W) (8|M0) arf<1>:f :f :f :f */
+ 0b0000010010000010101000000000000010011, /* (8|M8) arf<1>:f :f :f :f */
+ 0b0000010010100010101000000000000010011, /* (8|M8) grf<1>:f :f :f :f */
+ 0b0000010010000010101000010000000010011, /* (W) (8|M8) arf<1>:f :f :f :f */
+ 0b0000010010100010101000010000000010011, /* (W) (8|M8) grf<1>:f :f :f :f */
+ 0b0000010010100010101000010000000000100, /* (W) (16|M0) grf<1>:f :f :f :f */
+ 0b0000010010000010101000000000000000100, /* (16|M0) arf<1>:f :f :f :f */
+ 0b0000010010100010101010000000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */
+ 0b0000010010100010101000000000000100100, /* (16|M16) grf<1>:f :f :f :f */
+ 0b0000010010000010101000010000000000100, /* (W) (16|M0) arf<1>:f :f :f :f */
+ 0b0000010010100010101000010000000000000, /* (W) (1|M0) grf<1>:f :f :f :f */
+ 0b0000010010100010101010000000000000011, /* (8|M0) (sat)grf<1>:f :f :f :f */
+ 0b0000010010000010101000010000000100011, /* (W) (8|M16) arf<1>:f :f :f :f */
+ 0b0000010010000010101000010000000110011, /* (W) (8|M24) arf<1>:f :f :f :f */
+ 0b0000010010100010101000010000000100011, /* (W) (8|M16) grf<1>:f :f :f :f */
+ 0b0000010010100010101000010000000110011, /* (W) (8|M24) grf<1>:f :f :f :f */
+ 0b0000010010000010101000000000000110011, /* (8|M24) arf<1>:f :f :f :f */
+ 0b0000010010000010101000000000000100011, /* (8|M16) arf<1>:f :f :f :f */
+ 0b0000000100111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :b */
+ 0b0000000000111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :ub {Atomic} */
+ 0b0000100100111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :b {Atomic} */
+ 0b0000100000111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :ub {Atomic} */
+ 0b0000100100111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :b */
+ 0b0000000000111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :ub */
+ 0b0000000100111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :b {Atomic} */
+ 0b0000100000111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :ub */
+ 0b0000101101111010101000100000000000011, /* dpas.8x* (8|M0) grf<1>:f :f :bf :bf {Atomic} */
+ 0b0000101101111010101000000000000000011, /* dpas.8x* (8|M0) grf<1>:f :f :bf :bf */
+};
+
static const uint32_t gfx12_3src_source_index_table[32] = {
0b100101100001100000000, /* grf<0;0> grf<8;1> grf<0> */
0b100101100001001000010, /* arf<4;1> grf<8;1> grf<0> */
@@ -918,6 +991,44 @@ static const uint32_t gfx12_3src_source_index_table[32] = {
0b101001100101101000011, /* grf<8;1> arf<8;1> -grf<1> */
};
+static const uint32_t xehp_3src_source_index_table[32] = {
+ 0b100100000001100000000, /* grf<0;0> grf<1;0> grf<0> */
+ 0b100100000001000000001, /* arf<1;0> grf<1;0> grf<0> */
+ 0b101100000001100000001, /* grf<1;0> grf<1;0> grf<1> */
+ 0b100100000001100000001, /* grf<1;0> grf<1;0> grf<0> */
+ 0b101100000000100000001, /* grf<1;0> grf<0;0> grf<1> */
+ 0b101100000001100001001, /* -grf<1;0> grf<1;0> grf<1> */
+ 0b101000000001100000001, /* grf<1;0> arf<1;0> grf<1> */
+ 0b101100000001100000000, /* grf<0;0> grf<1;0> grf<1> */
+ 0b100000000001100000000, /* grf<0;0> arf<1;0> grf<0> */
+ 0b101100000101100000001, /* grf<1;0> grf<1;0> -grf<1> */
+ 0b101100010001100000001, /* grf<1;0> -grf<1;0> grf<1> */
+ 0b101100000000100000000, /* grf<0;0> grf<0;0> grf<1> */
+ 0b100000000001100000001, /* grf<1;0> arf<1;0> grf<0> */
+ 0b100100010001100000000, /* grf<0;0> -grf<1;0> grf<0> */
+ 0b100100010001100000001, /* grf<1;0> -grf<1;0> grf<0> */
+ 0b100100000001100001001, /* -grf<1;0> grf<1;0> grf<0> */
+ 0b100100000000100000001, /* grf<1;0> grf<0;0> grf<0> */
+ 0b100100000001100001000, /* -grf<0;0> grf<1;0> grf<0> */
+ 0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0>
+ * dpas.*x1 grf:d grf:[ub,b] grf:[ub,b]
+ * dpas.*x1 grf:f grf:bf grf:bf
+ */
+ 0b101100010001100000000, /* grf<0;0> -grf<1;0> grf<1> */
+ 0b100100000101100000000, /* grf<0;0> grf<1;0> -grf<0> */
+ 0b101000000001100000000, /* grf<0;0> arf<1;0> grf<1> */
+ 0b100100000101100000001, /* grf<1;0> grf<1;0> -grf<0> */
+ 0b101100000101100001001, /* -grf<1;0> grf<1;0> -grf<1> */
+ 0b100100010000100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[ub,b] */
+ 0b100100000100100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u2,s2] */
+ 0b100100010100100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u2,s2] */
+ 0b100100001000100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[ub,b] */
+ 0b100100001100100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u2,s2] */
+ 0b100100000010100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u4,s4] */
+ 0b100100001010100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u4,s4] */
+ 0b100100010010100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u4,s4] */
+};
+
static const uint32_t gfx12_3src_subreg_table[32] = {
0b00000000000000000000, /* .0 .0 .0 .0 */
0b00100000000000000000, /* .0 .0 .0 .4 */
@@ -1161,7 +1272,34 @@ set_3src_control_index(const struct gen_device_info *devinfo,
{
assert(devinfo->ver >= 8);
- if (devinfo->ver >= 12) {
+ if (devinfo->verx10 >= 125) {
+ uint64_t uncompacted = /* 37b/XeHP+ */
+ (brw_inst_bits(src, 95, 92) << 33) | /* 4b */
+ (brw_inst_bits(src, 90, 88) << 30) | /* 3b */
+ (brw_inst_bits(src, 82, 80) << 27) | /* 3b */
+ (brw_inst_bits(src, 50, 50) << 26) | /* 1b */
+ (brw_inst_bits(src, 49, 48) << 24) | /* 2b */
+ (brw_inst_bits(src, 42, 40) << 21) | /* 3b */
+ (brw_inst_bits(src, 39, 39) << 20) | /* 1b */
+ (brw_inst_bits(src, 38, 36) << 17) | /* 3b */
+ (brw_inst_bits(src, 34, 34) << 16) | /* 1b */
+ (brw_inst_bits(src, 33, 33) << 15) | /* 1b */
+ (brw_inst_bits(src, 32, 32) << 14) | /* 1b */
+ (brw_inst_bits(src, 31, 31) << 13) | /* 1b */
+ (brw_inst_bits(src, 28, 28) << 12) | /* 1b */
+ (brw_inst_bits(src, 27, 24) << 8) | /* 4b */
+ (brw_inst_bits(src, 23, 23) << 7) | /* 1b */
+ (brw_inst_bits(src, 22, 22) << 6) | /* 1b */
+ (brw_inst_bits(src, 21, 19) << 3) | /* 3b */
+ (brw_inst_bits(src, 18, 16)); /* 3b */
+
+ for (unsigned i = 0; i < ARRAY_SIZE(xehp_3src_control_index_table); i++) {
+ if (xehp_3src_control_index_table[i] == uncompacted) {
+ brw_compact_inst_set_3src_control_index(devinfo, dst, i);
+ return true;
+ }
+ }
+ } else if (devinfo->ver >= 12) {
uint64_t uncompacted = /* 36b/TGL+ */
(brw_inst_bits(src, 95, 92) << 32) | /* 4b */
(brw_inst_bits(src, 90, 88) << 29) | /* 3b */
@@ -1233,8 +1371,15 @@ set_3src_source_index(const struct gen_device_info *devinfo,
(brw_inst_bits(src, 43, 43) << 1) | /* 1b */
(brw_inst_bits(src, 35, 35)); /* 1b */
- for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_source_index_table); i++) {
- if (gfx12_3src_source_index_table[i] == uncompacted) {
+ const uint32_t *three_src_source_index_table =
+ devinfo->verx10 >= 125 ?
+ xehp_3src_source_index_table : gfx12_3src_source_index_table;
+ const uint32_t three_src_source_index_table_len =
+ devinfo->verx10 >= 125 ? ARRAY_SIZE(xehp_3src_source_index_table) :
+ ARRAY_SIZE(gfx12_3src_source_index_table);
+
+ for (unsigned i = 0; i < three_src_source_index_table_len; i++) {
+ if (three_src_source_index_table[i] == uncompacted) {
brw_compact_inst_set_3src_source_index(devinfo, dst, i);
return true;
}
@@ -1885,7 +2030,30 @@ set_uncompacted_3src_control_index(const struct compaction_state *c,
const struct gen_device_info *devinfo = c->devinfo;
assert(devinfo->ver >= 8);
- if (devinfo->ver >= 12) {
+ if (devinfo->verx10 >= 125) {
+ uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
+ uint64_t uncompacted = xehp_3src_control_index_table[compacted];
+
+ brw_inst_set_bits(dst, 95, 92, (uncompacted >> 33));
+ brw_inst_set_bits(dst, 90, 88, (uncompacted >> 30) & 0x7);
+ brw_inst_set_bits(dst, 82, 80, (uncompacted >> 27) & 0x7);
+ brw_inst_set_bits(dst, 50, 50, (uncompacted >> 26) & 0x1);
+ brw_inst_set_bits(dst, 49, 48, (uncompacted >> 24) & 0x3);
+ brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
+ brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
+ brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
+ brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
+ brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
+ brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
+ brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
+ brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
+ brw_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf);
+ brw_inst_set_bits(dst, 23, 23, (uncompacted >> 7) & 0x1);
+ brw_inst_set_bits(dst, 22, 22, (uncompacted >> 6) & 0x1);
+ brw_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7);
+ brw_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7);
+
+ } else if (devinfo->ver >= 12) {
uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
uint64_t uncompacted = gfx12_3src_control_index_table[compacted];
@@ -1928,7 +2096,10 @@ set_uncompacted_3src_source_index(const struct gen_device_info *devinfo,
uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src);
if (devinfo->ver >= 12) {
- uint32_t uncompacted = gfx12_3src_source_index_table[compacted];
+ const uint32_t *three_src_source_index_table =
+ devinfo->verx10 >= 125 ?
+ xehp_3src_source_index_table : gfx12_3src_source_index_table;
+ uint32_t uncompacted = three_src_source_index_table[compacted];
brw_inst_set_bits(dst, 114, 114, (uncompacted >> 20));
brw_inst_set_bits(dst, 113, 112, (uncompacted >> 18) & 0x3);
@@ -2213,6 +2384,8 @@ compaction_state_init(struct compaction_state *c,
assert(gfx12_subreg_table[ARRAY_SIZE(gfx12_subreg_table) - 1] != 0);
assert(gfx12_src0_index_table[ARRAY_SIZE(gfx12_src0_index_table) - 1] != 0);
assert(gfx12_src1_index_table[ARRAY_SIZE(gfx12_src1_index_table) - 1] != 0);
+ assert(xehp_src0_index_table[ARRAY_SIZE(xehp_src0_index_table) - 1] != 0);
+ assert(xehp_src1_index_table[ARRAY_SIZE(xehp_src1_index_table) - 1] != 0);
c->devinfo = devinfo;
switch (devinfo->ver) {
@@ -2220,8 +2393,13 @@ compaction_state_init(struct compaction_state *c,
c->control_index_table = gfx12_control_index_table;;
c->datatype_table = gfx12_datatype_table;
c->subreg_table = gfx12_subreg_table;
- c->src0_index_table = gfx12_src0_index_table;
- c->src1_index_table = gfx12_src1_index_table;
+ if (devinfo->verx10 >= 125) {
+ c->src0_index_table = xehp_src0_index_table;
+ c->src1_index_table = xehp_src1_index_table;
+ } else {
+ c->src0_index_table = gfx12_src0_index_table;
+ c->src1_index_table = gfx12_src1_index_table;
+ }
break;
case 11:
c->control_index_table = gfx8_control_index_table;