summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
diff options
context:
space:
mode:
authorJason Ekstrand <jason.ekstrand@intel.com>2014-08-18 14:27:55 -0700
committerJason Ekstrand <jason.ekstrand@intel.com>2014-09-30 10:29:14 -0700
commit7210583eb84a5d49803dbe37b0960373b4224d10 (patch)
treed355daca1d46348861122d6daf505eb6a75fd1ec /src/mesa/drivers/dri/i965/brw_fs_cse.cpp
parent4232a776a699d80601496802ab2d817374a31f56 (diff)
i965/fs_reg: Allocate double the number of vgrfs in SIMD16 mode
This is actually the squash of a bunch of different changes. Individual commit titles follow: i965/fs: Always 2-align registers SIMD16 for gen <= 5 i965/fs: Use the register width when applying offsets This reworks both byte_offset() and offset() to be more intelligent. The byte_offset() function now supports offsets bigger than 32. The offset() function uses the byte_offset() function together with the register width and the type size to offset the register by the correct amount. i965/fs: Change regs_read to be in hardware registers i965/fs: Change regs_written to be actual hardware registers i965/fs: Properly handle register widths in LOAD_PAYLOAD The LOAD_PAYLOAD instruction is a bit special because it collects a bunch of registers (with possibly different widths) into a single payload block. Once the payload is constructed, it's treated as a single block of data and most of the information such as register widths doesn't matter anymore. In particular, the offset of any particular source register is the accumulation of the sizes of the previous source registers. i965/fs: Properly set writemasks in LOAD_PAYLOAD i965/fs: Handle register widths in demote_pull_constants i965/fs: Get rid of implicit register doubling in the allocator i965/fs: Reserve enough registers for PLN instructions i965/fs: Make sources and destinations interfere in 16-wide i965/fs: Properly handle register widths in CSE i965/fs: Properly handle register widths in register_coalesce i965/fs: Properly handle widths in copy propagation i965/fs: Properly handle register widths in VARYING_PULL_CONSTANT_LOAD i965/fs: Properly handle register widths and odd register sizes in spilling i965/fs: Don't waste a register on texture lookups for gen >= 7 Previously, we were waisting a register in SIMD16 mode because we could only allocate registers in pairs. Now that we can allocate and address odd-sized registers, let's get rid of this special-case. Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com> Reviewed-by: Matt Turner <mattst88@gmail.com>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_cse.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_cse.cpp22
1 files changed, 13 insertions, 9 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 7edbe1915c..817fc1f1a1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -202,19 +202,21 @@ fs_visitor::opt_cse_local(bblock_t *block)
bool no_existing_temp = entry->tmp.file == BAD_FILE;
if (no_existing_temp && !entry->generator->dst.is_null()) {
int written = entry->generator->regs_written;
+ int dst_width = entry->generator->dst.width / 8;
+ assert(written % dst_width == 0);
fs_reg orig_dst = entry->generator->dst;
fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written),
- orig_dst.type);
+ orig_dst.type, orig_dst.width);
entry->tmp = tmp;
entry->generator->dst = tmp;
fs_inst *copy;
- if (written > 1) {
- fs_reg *sources = ralloc_array(mem_ctx, fs_reg, written);
- for (int i = 0; i < written; i++)
+ if (written > dst_width) {
+ fs_reg *sources = ralloc_array(mem_ctx, fs_reg, written / dst_width);
+ for (int i = 0; i < written / dst_width; i++)
sources[i] = offset(tmp, i);
- copy = LOAD_PAYLOAD(orig_dst, sources, written);
+ copy = LOAD_PAYLOAD(orig_dst, sources, written / dst_width);
} else {
copy = MOV(orig_dst, tmp);
copy->force_writemask_all =
@@ -226,16 +228,18 @@ fs_visitor::opt_cse_local(bblock_t *block)
/* dest <- temp */
if (!inst->dst.is_null()) {
int written = inst->regs_written;
+ int dst_width = inst->dst.width / 8;
assert(written == entry->generator->regs_written);
+ assert(dst_width == entry->generator->dst.width / 8);
assert(inst->dst.type == entry->tmp.type);
fs_reg dst = inst->dst;
fs_reg tmp = entry->tmp;
fs_inst *copy;
- if (written > 1) {
- fs_reg *sources = ralloc_array(mem_ctx, fs_reg, written);
- for (int i = 0; i < written; i++)
+ if (written > dst_width) {
+ fs_reg *sources = ralloc_array(mem_ctx, fs_reg, written / dst_width);
+ for (int i = 0; i < written / dst_width; i++)
sources[i] = offset(tmp, i);
- copy = LOAD_PAYLOAD(dst, sources, written);
+ copy = LOAD_PAYLOAD(dst, sources, written / dst_width);
} else {
copy = MOV(dst, tmp);
copy->force_writemask_all = inst->force_writemask_all;