summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Ekstrand <jason.ekstrand@intel.com>2017-10-17 18:59:26 -0700
committerEmil Velikov <emil.l.velikov@gmail.com>2018-02-09 03:50:10 +0000
commit0bc9182f89f166f141e0f847d42c9fc557cc3aea (patch)
treefc59a857b1d7b506fba3e58f17bcc48837b9bd0e
parenta094314340387ef2463ed8b4ddc9317bc539832b (diff)
intel/fs: Use the original destination region for int MUL lowering
Some hardware (CHV, BXT) have special restrictions on register regions when doing integer multiplication. We want to respect those when we lower to DxW multiplication. Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit 18fde36ced4279f2577097a1a7d31b55f2f5f141) Squashed with: i965/fs: Reset the register file to VGRF in lower_integer_multiplication 18fde36ced4279f2577097a1a7d31b55f2f5f141 changed the way temporary registers were allocated in lower_integer_multiplication so that we allocate regs_written(inst) space and keep the stride of the original destination register. This was to ensure that any MUL which originally followed the CHV/BXT integer multiply regioning restrictions would continue to follow those restrictions even after lowering. This works fine except that I forgot to reset the register file to VGRF so, even though they were assigned a number from alloc.allocate(), they had the wrong register file. This caused some GLES 3.0 CTS tests to start failing on Sandy Bridge due to attempted reads from the MRF: ES3-CTS.functional.shaders.precision.int.highp_mul_fragment.snbm64 ES3-CTS.functional.shaders.precision.int.mediump_mul_fragment.snbm64 ES3-CTS.functional.shaders.precision.int.lowp_mul_fragment.snbm64 ES3-CTS.functional.shaders.precision.uint.highp_mul_fragment.snbm64 ES3-CTS.functional.shaders.precision.uint.mediump_mul_fragment.snbm64 ES3-CTS.functional.shaders.precision.uint.lowp_mul_fragment.snbm64 This commit remedies this problem by, instead of copying inst->dst and overwriting nr, just make a new register and set the region to match inst->dst. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103626 Fixes: 18fde36ced4279f2577097a1a7d31b55f2f5f141 Cc: "17.3" <mesa-stable@lists.freedesktop.org> Reviewed-by: Matt Turner <mattst88@gmail.com> (cherry picked from commit db682b8f0eafd3b9d58e736e9e2f520943a89942)
-rw-r--r--src/intel/compiler/brw_fs.cpp19
1 files changed, 13 insertions, 6 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index abb2592fdb8..8c61fc20ceb 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -3485,18 +3485,25 @@ fs_visitor::lower_integer_multiplication()
bool needs_mov = false;
fs_reg orig_dst = inst->dst;
+ fs_reg low = inst->dst;
if (orig_dst.is_null() || orig_dst.file == MRF ||
regions_overlap(inst->dst, inst->size_written,
inst->src[0], inst->size_read(0)) ||
regions_overlap(inst->dst, inst->size_written,
inst->src[1], inst->size_read(1))) {
needs_mov = true;
- inst->dst = fs_reg(VGRF, alloc.allocate(dispatch_width / 8),
- inst->dst.type);
+ /* Get a new VGRF but keep the same stride as inst->dst */
+ low = fs_reg(VGRF, alloc.allocate(regs_written(inst)),
+ inst->dst.type);
+ low.stride = inst->dst.stride;
+ low.offset = inst->dst.offset % REG_SIZE;
}
- fs_reg low = inst->dst;
- fs_reg high(VGRF, alloc.allocate(dispatch_width / 8),
+
+ /* Get a new VGRF but keep the same stride as inst->dst */
+ fs_reg high(VGRF, alloc.allocate(regs_written(inst)),
inst->dst.type);
+ high.stride = inst->dst.stride;
+ high.offset = inst->dst.offset % REG_SIZE;
if (devinfo->gen >= 7) {
if (inst->src[1].file == IMM) {
@@ -3517,13 +3524,13 @@ fs_visitor::lower_integer_multiplication()
inst->src[1]);
}
- ibld.ADD(subscript(inst->dst, BRW_REGISTER_TYPE_UW, 1),
+ ibld.ADD(subscript(low, BRW_REGISTER_TYPE_UW, 1),
subscript(low, BRW_REGISTER_TYPE_UW, 1),
subscript(high, BRW_REGISTER_TYPE_UW, 0));
if (needs_mov || inst->conditional_mod) {
set_condmod(inst->conditional_mod,
- ibld.MOV(orig_dst, inst->dst));
+ ibld.MOV(orig_dst, low));
}
}