From fadf34773527779eef4622b2586d87ec00476c0f Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Mon, 13 Jul 2015 15:52:28 +0300 Subject: i965: Fix stride field for the result of emit_uniformize(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is essentially the same problem fixed in an earlier patch for immediates. Setting the stride to zero will be particularly useful for my future SIMD lowering pass, because we will be able to just check whether the stride of a source register is zero and skip emitting the copies required to unzip it in that case. Instead of setting stride to zero in every caller of emit_uniformize() I've changed the function to return the result as its return value (previously it was being written into a caller-provided destination register), because this way we can enforce that the result is used with the correct regioning from the function itself. The changes to the prototype of its VEC4 counterpart are mainly for the sake of symmetry, VEC4 registers don't have stride. Reviewed-by: Samuel Iglesias Gonsálvez --- src/mesa/drivers/dri/i965/brw_fs_builder.h | 16 +++++++++------- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 4 ++-- src/mesa/drivers/dri/i965/brw_vec4.h | 7 +++++-- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 18 ++++++++++-------- 4 files changed, 26 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h index 34646d7fd74..c4ee9d484b7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h @@ -350,17 +350,19 @@ namespace brw { } /** - * Copy any live channel from \p src to the first channel of \p dst. + * Copy any live channel from \p src to the first channel of the result. */ - void - emit_uniformize(const dst_reg &dst, const src_reg &src) const + src_reg + emit_uniformize(const src_reg &src) const { const fs_builder ubld = exec_all(); - const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD); + const dst_reg chan_index = component(vgrf(BRW_REGISTER_TYPE_UD), 0); + const dst_reg dst = component(vgrf(src.type), 0); + + ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index); + ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index); - ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, component(chan_index, 0)); - ubld.emit(SHADER_OPCODE_BROADCAST, component(dst, 0), - src, component(chan_index, 0)); + return src_reg(dst); } /** diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 12cd4530c37..6c979334ac7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1378,7 +1378,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr surf_index = vgrf(glsl_type::uint_type); bld.ADD(surf_index, get_nir_src(instr->src[0]), fs_reg(stage_prog_data->binding_table.ubo_start)); - bld.emit_uniformize(surf_index, surf_index); + surf_index = bld.emit_uniformize(surf_index); /* Assume this may touch any UBO. It would be nice to provide * a tighter bound, but the array information is already lowered away. @@ -1673,7 +1673,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) /* Emit code to evaluate the actual indexing expression */ sampler_reg = vgrf(glsl_type::uint_type); bld.ADD(sampler_reg, src, fs_reg(sampler)); - bld.emit_uniformize(sampler_reg, sampler_reg); + sampler_reg = bld.emit_uniformize(sampler_reg); break; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 36436511df0..7bf027a0306 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -293,8 +293,11 @@ public: void emit_lrp(const dst_reg &dst, const src_reg &x, const src_reg &y, const src_reg &a); - /** Copy any live channel from \p src to the first channel of \p dst. */ - void emit_uniformize(const dst_reg &dst, const src_reg &src); + /** + * Copy any live channel from \p src to the first channel of the + * result. + */ + src_reg emit_uniformize(const src_reg &src); void emit_block_move(dst_reg *dst, src_reg *src, const struct glsl_type *type, brw_predicate predicate); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index f351bf4075b..a6eee47fcb0 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1374,15 +1374,19 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst, emit(pull); } -void -vec4_visitor::emit_uniformize(const dst_reg &dst, const src_reg &src) +src_reg +vec4_visitor::emit_uniformize(const src_reg &src) { const src_reg chan_index(this, glsl_type::uint_type); + const dst_reg dst = retype(dst_reg(this, glsl_type::uint_type), + src.type); emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index)) ->force_writemask_all = true; emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index) ->force_writemask_all = true; + + return src_reg(dst); } void @@ -1826,7 +1830,7 @@ vec4_visitor::visit(ir_expression *ir) surf_index = src_reg(this, glsl_type::uint_type); emit(ADD(dst_reg(surf_index), op[0], src_reg(prog_data->base.binding_table.ubo_start))); - emit_uniformize(dst_reg(surf_index), surf_index); + surf_index = emit_uniformize(surf_index); /* Assume this may touch any UBO. It would be nice to provide * a tighter bound, but the array information is already lowered away. @@ -2522,11 +2526,9 @@ vec4_visitor::visit(ir_texture *ir) /* Emit code to evaluate the actual indexing expression */ nonconst_sampler_index->accept(this); - dst_reg temp(this, glsl_type::uint_type); - emit(ADD(temp, this->result, src_reg(sampler))); - emit_uniformize(temp, src_reg(temp)); - - sampler_reg = src_reg(temp); + src_reg temp(this, glsl_type::uint_type); + emit(ADD(dst_reg(temp), this->result, src_reg(sampler))); + sampler_reg = emit_uniformize(temp); } else { /* Single sampler, or constant array index; the indexing expression * is just an immediate. -- cgit v1.2.3