summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrancisco Jerez <currojerez@riseup.net>2021-04-06 13:03:45 -0700
committerMarge Bot <eric+marge@anholt.net>2021-04-16 08:27:34 +0000
commit12479abded50e7caf5544776f7b7ecf161c6c487 (patch)
treef5020006e031cd3b9e9ca970aaaec15ff31ce521
parentd53fc2240b6b012951692e172682d6799d78b802 (diff)
intel/fs: Implement representation of SWSB cross-pipeline synchronization annotations.
The execution units of XeHP platforms have multiple asynchronous ALU pipelines instead of (as far as software is concerned) the single in-order pipeline that handled most ALU instructions except for extended math in the original Xe. It's now the compiler's responsibility to identify cross-pipeline dependencies and insert synchronization annotations whenever necessary, which are encoded as some additional bits of the SWSB instruction field. This commit represents the cross-pipeline synchronization annotations as part of the existing tgl_swsb structure used for codegen. The existing tgl_swsb_*() helpers used by hand-crafted assembly are extended to default to TGL_PIPE_ALL big-hammer synchronization in order to ensure backwards compatibility with the existing assembly. The following commits will extend the software scoreboard lowering pass in order to keep track of cross-pipeline dependencies across IR instructions, and insert more specific pipeline annotations in the SWSB field. The disassembler is also extended here to print out any existing pipeline sync annotations. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10000>
-rw-r--r--src/intel/compiler/brw_disasm.c12
-rw-r--r--src/intel/compiler/brw_eu_defines.h52
-rw-r--r--src/intel/compiler/brw_eu_emit.c2
3 files changed, 55 insertions, 11 deletions
diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c
index 330af60b2bd..99c69b2b196 100644
--- a/src/intel/compiler/brw_disasm.c
+++ b/src/intel/compiler/brw_disasm.c
@@ -1642,10 +1642,16 @@ qtr_ctrl(FILE *file, const struct gen_device_info *devinfo, const brw_inst *inst
static int
swsb(FILE *file, const struct gen_device_info *devinfo, const brw_inst *inst)
{
- const struct tgl_swsb swsb = tgl_swsb_decode(brw_inst_opcode(devinfo, inst),
- brw_inst_swsb(devinfo, inst));
+ const enum opcode opcode = brw_inst_opcode(devinfo, inst);
+ const uint8_t x = brw_inst_swsb(devinfo, inst);
+ const struct tgl_swsb swsb = tgl_swsb_decode(devinfo, opcode, x);
if (swsb.regdist)
- format(file, " @%d", swsb.regdist);
+ format(file, " %s@%d",
+ (swsb.pipe == TGL_PIPE_FLOAT ? "F" :
+ swsb.pipe == TGL_PIPE_INT ? "I" :
+ swsb.pipe == TGL_PIPE_LONG ? "L" :
+ swsb.pipe == TGL_PIPE_ALL ? "A" : "" ),
+ swsb.regdist);
if (swsb.mode)
format(file, " $%d%s", swsb.sbid,
(swsb.mode & TGL_SBID_SET ? "" :
diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h
index e9b3827599e..21c719fa136 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -35,6 +35,7 @@
#include <stdint.h>
#include <stdlib.h>
#include "util/macros.h"
+#include "dev/gen_device_info.h"
/* The following hunk, up-to "Execution Unit" is used by both the
* intel/compiler and i965 codebase. */
@@ -1098,11 +1099,33 @@ operator|=(tgl_sbid_mode &x, tgl_sbid_mode y)
#endif
/**
+ * TGL+ SWSB RegDist synchronization pipeline.
+ *
+ * On TGL all instructions that use the RegDist synchronization mechanism are
+ * considered to be executed as a single in-order pipeline, therefore only the
+ * TGL_PIPE_FLOAT pipeline is applicable. On XeHP+ platforms there are two
+ * additional asynchronous ALU pipelines (which still execute instructions
+ * in-order and use the RegDist synchronization mechanism). TGL_PIPE_NONE
+ * doesn't provide any RegDist pipeline synchronization information and allows
+ * the hardware to infer the pipeline based on the source types of the
+ * instruction. TGL_PIPE_ALL can be used when synchronization with all ALU
+ * pipelines is intended.
+ */
+enum tgl_pipe {
+ TGL_PIPE_NONE = 0,
+ TGL_PIPE_FLOAT,
+ TGL_PIPE_INT,
+ TGL_PIPE_LONG,
+ TGL_PIPE_ALL
+};
+
+/**
* Logical representation of the SWSB scheduling information of a hardware
* instruction. The binary representation is slightly more compact.
*/
struct tgl_swsb {
unsigned regdist : 3;
+ enum tgl_pipe pipe : 3;
unsigned sbid : 4;
enum tgl_sbid_mode mode : 3;
};
@@ -1115,7 +1138,7 @@ struct tgl_swsb {
static inline struct tgl_swsb
tgl_swsb_regdist(unsigned d)
{
- const struct tgl_swsb swsb = { d };
+ const struct tgl_swsb swsb = { d, d ? TGL_PIPE_ALL : TGL_PIPE_NONE };
assert(swsb.regdist == d);
return swsb;
}
@@ -1127,7 +1150,7 @@ tgl_swsb_regdist(unsigned d)
static inline struct tgl_swsb
tgl_swsb_sbid(enum tgl_sbid_mode mode, unsigned sbid)
{
- const struct tgl_swsb swsb = { 0, sbid, mode };
+ const struct tgl_swsb swsb = { 0, TGL_PIPE_NONE, sbid, mode };
assert(swsb.sbid == sbid);
return swsb;
}
@@ -1151,6 +1174,7 @@ tgl_swsb_dst_dep(struct tgl_swsb swsb, unsigned regdist)
{
swsb.regdist = regdist;
swsb.mode = swsb.mode & TGL_SBID_SET;
+ swsb.pipe = (regdist ? TGL_PIPE_ALL : TGL_PIPE_NONE);
return swsb;
}
@@ -1170,10 +1194,15 @@ tgl_swsb_src_dep(struct tgl_swsb swsb)
* SWSB annotation.
*/
static inline uint8_t
-tgl_swsb_encode(struct tgl_swsb swsb)
+tgl_swsb_encode(const struct gen_device_info *devinfo, struct tgl_swsb swsb)
{
if (!swsb.mode) {
- return swsb.regdist;
+ const unsigned pipe = devinfo->verx10 < 125 ? 0 :
+ swsb.pipe == TGL_PIPE_FLOAT ? 0x10 :
+ swsb.pipe == TGL_PIPE_INT ? 0x18 :
+ swsb.pipe == TGL_PIPE_LONG ? 0x50 :
+ swsb.pipe == TGL_PIPE_ALL ? 0x8 : 0;
+ return pipe | swsb.regdist;
} else if (swsb.regdist) {
return 0x80 | swsb.regdist << 4 | swsb.sbid;
} else {
@@ -1187,10 +1216,12 @@ tgl_swsb_encode(struct tgl_swsb swsb)
* tgl_swsb.
*/
static inline struct tgl_swsb
-tgl_swsb_decode(enum opcode opcode, uint8_t x)
+tgl_swsb_decode(const struct gen_device_info *devinfo, const enum opcode opcode,
+ const uint8_t x)
{
if (x & 0x80) {
- const struct tgl_swsb swsb = { (x & 0x70u) >> 4, x & 0xfu,
+ const struct tgl_swsb swsb = { (x & 0x70u) >> 4, TGL_PIPE_NONE,
+ x & 0xfu,
(opcode == BRW_OPCODE_SEND ||
opcode == BRW_OPCODE_SENDC ||
opcode == BRW_OPCODE_MATH) ?
@@ -1203,7 +1234,14 @@ tgl_swsb_decode(enum opcode opcode, uint8_t x)
} else if ((x & 0x70) == 0x40) {
return tgl_swsb_sbid(TGL_SBID_SET, x & 0xfu);
} else {
- return tgl_swsb_regdist(x & 0x7u);
+ const struct tgl_swsb swsb = { x & 0x7u,
+ ((x & 0x78) == 0x10 ? TGL_PIPE_FLOAT :
+ (x & 0x78) == 0x18 ? TGL_PIPE_INT :
+ (x & 0x78) == 0x50 ? TGL_PIPE_LONG :
+ (x & 0x78) == 0x8 ? TGL_PIPE_ALL :
+ TGL_PIPE_NONE) };
+ assert(devinfo->verx10 >= 125 || swsb.pipe == TGL_PIPE_NONE);
+ return swsb;
}
}
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index 211fa4c9633..2f53609ed9e 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -620,7 +620,7 @@ brw_inst_set_state(const struct gen_device_info *devinfo,
brw_inst_set_access_mode(devinfo, insn, state->access_mode);
brw_inst_set_mask_control(devinfo, insn, state->mask_control);
if (devinfo->ver >= 12)
- brw_inst_set_swsb(devinfo, insn, tgl_swsb_encode(state->swsb));
+ brw_inst_set_swsb(devinfo, insn, tgl_swsb_encode(devinfo, state->swsb));
brw_inst_set_saturate(devinfo, insn, state->saturate);
brw_inst_set_pred_control(devinfo, insn, state->predicate);
brw_inst_set_pred_inv(devinfo, insn, state->pred_inv);