summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2022-07-12 15:32:01 -0700
committerMarge Bot <emma+marge@anholt.net>2022-07-26 17:25:18 +0000
commit349a040f684cc5c6b80d40a4edbefa410e91034d (patch)
treee9496685b6a20935bb3257785f5c04707ef09a49
parent5dab077824665b42c48acbe0e193a0786812672d (diff)
intel/fs: Make logical URB write instructions more like other logical instructions
The changes to fs_visitor::validate() helped track down a place where I initially forgot to convert a message to the new sources layout. This had caused a different validation failure in dEQP-GLES31.functional.tessellation.tesscoord.triangles_equal_spacing, but this were not detected until after SENDs were lowered. Tiger Lake, Ice Lake, and Skylake had similar results. (Ice Lake shown) total instructions in shared programs: 19951145 -> 19951133 (<.01%) instructions in affected programs: 2429 -> 2417 (-0.49%) helped: 8 / HURT: 0 total cycles in shared programs: 858904152 -> 858862331 (<.01%) cycles in affected programs: 5702652 -> 5660831 (-0.73%) helped: 2138 / HURT: 1255 Broadwell total cycles in shared programs: 904869459 -> 904835501 (<.01%) cycles in affected programs: 7686744 -> 7652786 (-0.44%) helped: 2861 / HURT: 2050 Tiger Lake, Ice Lake, and Skylake had similar results. (Ice Lake shown) Instructions in all programs: 141442369 -> 141442032 (-0.0%) Instructions helped: 337 Cycles in all programs: 9099270231 -> 9099036492 (-0.0%) Cycles helped: 40661 Cycles hurt: 28606 Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17605>
-rw-r--r--src/intel/compiler/brw_eu_defines.h11
-rw-r--r--src/intel/compiler/brw_fs.cpp47
-rw-r--r--src/intel/compiler/brw_fs_nir.cpp77
-rw-r--r--src/intel/compiler/brw_fs_validate.cpp14
-rw-r--r--src/intel/compiler/brw_fs_visitor.cpp53
-rw-r--r--src/intel/compiler/brw_lower_logical_sends.cpp25
-rw-r--r--src/intel/compiler/brw_mesh.cpp70
7 files changed, 176 insertions, 121 deletions
diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h
index da7c09c96f2..fecb3273d86 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -950,6 +950,17 @@ enum rt_logical_srcs {
RT_LOGICAL_NUM_SRCS
};
+enum urb_logical_srcs {
+ URB_LOGICAL_SRC_HANDLE,
+ URB_LOGICAL_SRC_PER_SLOT_OFFSETS,
+ URB_LOGICAL_SRC_CHANNEL_MASK,
+ /** Data to be written. BAD_FILE for reads. */
+ URB_LOGICAL_SRC_DATA,
+
+ URB_LOGICAL_NUM_SRCS
+};
+
+
#ifdef __cplusplus
/**
* Allow brw_urb_write_flags enums to be ORed together.
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 9e5ede1dc48..34a88ac89e2 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -863,6 +863,17 @@ fs_inst::components_read(unsigned i) const
return 1;
}
+ case SHADER_OPCODE_URB_WRITE_LOGICAL:
+ case SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL:
+ case SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL:
+ case SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL:
+ if (i == URB_LOGICAL_SRC_DATA)
+ return mlen - 1 -
+ unsigned(src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS].file != BAD_FILE) -
+ unsigned(src[URB_LOGICAL_SRC_CHANNEL_MASK].file != BAD_FILE);
+ else
+ return 1;
+
default:
return 1;
}
@@ -891,10 +902,6 @@ fs_inst::size_read(int arg) const
break;
case FS_OPCODE_FB_READ:
- case SHADER_OPCODE_URB_WRITE_LOGICAL:
- case SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL:
- case SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL:
- case SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL:
case SHADER_OPCODE_URB_READ_LOGICAL:
case SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL:
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
@@ -1546,17 +1553,17 @@ fs_visitor::emit_gs_thread_end()
break;
}
}
- fs_reg hdr = abld.vgrf(BRW_REGISTER_TYPE_UD, 1);
- abld.MOV(hdr, fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)));
- inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, hdr);
+ fs_reg srcs[URB_LOGICAL_NUM_SRCS];
+ srcs[URB_LOGICAL_SRC_HANDLE] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
+ inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
+ srcs, ARRAY_SIZE(srcs));
inst->mlen = 1;
} else {
- fs_reg payload = abld.vgrf(BRW_REGISTER_TYPE_UD, 2);
- fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 2);
- sources[0] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
- sources[1] = this->final_gs_vertex_count;
- abld.LOAD_PAYLOAD(payload, sources, 2, 2);
- inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, payload);
+ fs_reg srcs[URB_LOGICAL_NUM_SRCS];
+ srcs[URB_LOGICAL_SRC_HANDLE] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
+ srcs[URB_LOGICAL_SRC_DATA] = this->final_gs_vertex_count;
+ inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
+ srcs, ARRAY_SIZE(srcs));
inst->mlen = 2;
}
inst->eot = true;
@@ -6676,16 +6683,12 @@ fs_visitor::run_tcs()
}
/* Emit EOT write; set TR DS Cache bit */
- fs_reg srcs[3] = {
- fs_reg(get_tcs_output_urb_handle()),
- fs_reg(brw_imm_ud(WRITEMASK_X << 16)),
- fs_reg(brw_imm_ud(0)),
- };
- fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 3);
- bld.LOAD_PAYLOAD(payload, srcs, 3, 2);
-
+ fs_reg srcs[URB_LOGICAL_NUM_SRCS];
+ srcs[URB_LOGICAL_SRC_HANDLE] = get_tcs_output_urb_handle();
+ srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16);
+ srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0);
fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
- bld.null_reg_ud(), payload);
+ reg_undef, srcs, ARRAY_SIZE(srcs));
inst->mlen = 3;
inst->eot = true;
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 87aff871e78..35a50e838a8 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -2341,27 +2341,27 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count)
}
/* Store the control data bits in the message payload and send it. */
- unsigned mlen = 2;
- if (channel_mask.file != BAD_FILE)
- mlen += 4; /* channel masks, plus 3 extra copies of the data */
- if (per_slot_offset.file != BAD_FILE)
- mlen++;
-
- fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, mlen);
- fs_reg *sources = ralloc_array(mem_ctx, fs_reg, mlen);
- unsigned i = 0;
- sources[i++] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
- if (per_slot_offset.file != BAD_FILE)
- sources[i++] = per_slot_offset;
- if (channel_mask.file != BAD_FILE)
- sources[i++] = channel_mask;
- while (i < mlen) {
- sources[i++] = this->control_data_bits;
- }
-
- abld.LOAD_PAYLOAD(payload, sources, mlen, mlen);
- fs_inst *inst = abld.emit(opcode, reg_undef, payload);
- inst->mlen = mlen;
+ const unsigned header_size = 1 + unsigned(channel_mask.file != BAD_FILE) +
+ unsigned(per_slot_offset.file != BAD_FILE);
+
+ /* If there are channel masks, add 3 extra copies of the data. */
+ const unsigned length = 1 + 3 * unsigned(channel_mask.file != BAD_FILE);
+
+ fs_reg sources[4];
+
+ for (unsigned i = 0; i < ARRAY_SIZE(sources); i++)
+ sources[i] = this->control_data_bits;
+
+ fs_reg srcs[URB_LOGICAL_NUM_SRCS];
+ srcs[URB_LOGICAL_SRC_HANDLE] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
+ srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = per_slot_offset;
+ srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = channel_mask;
+ srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, alloc.allocate(length),
+ BRW_REGISTER_TYPE_F);
+ abld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], sources, length, 0);
+
+ fs_inst *inst = abld.emit(opcode, reg_undef, srcs, ARRAY_SIZE(srcs));
+ inst->mlen = header_size + length;
/* We need to increment Global Offset by 256-bits to make room for
* Broadwell's extra "Vertex Count" payload at the beginning of the
* URB entry. Since this is an OWord message, Global Offset is counted
@@ -3046,15 +3046,6 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
fs_reg indirect_offset = get_indirect_offset(instr);
unsigned imm_offset = instr->const_index[0];
unsigned mask = instr->const_index[1];
- unsigned header_regs = 0;
- struct brw_reg output_handles = get_tcs_output_urb_handle();
-
- fs_reg srcs[7];
- srcs[header_regs++] = output_handles;
-
- if (indirect_offset.file != BAD_FILE) {
- srcs[header_regs++] = indirect_offset;
- }
if (mask == 0)
break;
@@ -3068,8 +3059,9 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
unsigned first_component = nir_intrinsic_component(instr);
mask = mask << first_component;
+ fs_reg mask_reg;
if (mask != WRITEMASK_XYZW) {
- srcs[header_regs++] = brw_imm_ud(mask << 16);
+ mask_reg = brw_imm_ud(mask << 16);
opcode = indirect_offset.file != BAD_FILE ?
SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL :
SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL;
@@ -3079,21 +3071,30 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
SHADER_OPCODE_URB_WRITE_LOGICAL;
}
+ fs_reg sources[4];
+
for (unsigned i = 0; i < num_components; i++) {
if (!(mask & (1 << (i + first_component))))
continue;
- srcs[header_regs + i + first_component] = offset(value, bld, i);
+ sources[i + first_component] = offset(value, bld, i);
}
- unsigned mlen = header_regs + num_components + first_component;
- fs_reg payload =
- bld.vgrf(BRW_REGISTER_TYPE_UD, mlen);
- bld.LOAD_PAYLOAD(payload, srcs, mlen, header_regs);
+ unsigned header_size = 1 + unsigned(indirect_offset.file != BAD_FILE) +
+ unsigned(mask != WRITEMASK_XYZW);
+ const unsigned length = num_components + first_component;
+
+ fs_reg srcs[URB_LOGICAL_NUM_SRCS];
+ srcs[URB_LOGICAL_SRC_HANDLE] = get_tcs_output_urb_handle();
+ srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = indirect_offset;
+ srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = mask_reg;
+ srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, alloc.allocate(length),
+ BRW_REGISTER_TYPE_F);
+ bld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], sources, length, 0);
- fs_inst *inst = bld.emit(opcode, bld.null_reg_ud(), payload);
+ fs_inst *inst = bld.emit(opcode, reg_undef, srcs, ARRAY_SIZE(srcs));
inst->offset = imm_offset;
- inst->mlen = mlen;
+ inst->mlen = header_size + length;
break;
}
diff --git a/src/intel/compiler/brw_fs_validate.cpp b/src/intel/compiler/brw_fs_validate.cpp
index 75a794fd794..3fb071086f6 100644
--- a/src/intel/compiler/brw_fs_validate.cpp
+++ b/src/intel/compiler/brw_fs_validate.cpp
@@ -43,6 +43,20 @@ fs_visitor::validate()
{
#ifndef NDEBUG
foreach_block_and_inst (block, fs_inst, inst, cfg) {
+ if (inst->opcode == SHADER_OPCODE_URB_WRITE_LOGICAL) {
+ const unsigned header_size = 1 +
+ unsigned(inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS].file != BAD_FILE) +
+ unsigned(inst->src[URB_LOGICAL_SRC_CHANNEL_MASK].file != BAD_FILE);
+
+ unsigned data_size = 0;
+ for (unsigned i = header_size, j = 0; i < inst->mlen; i++, j++) {
+ fsv_assert(type_sz(offset(inst->src[URB_LOGICAL_SRC_DATA], bld, j).type) == 4);
+ data_size++;
+ }
+
+ fsv_assert(header_size + data_size == inst->mlen);
+ }
+
if (inst->dst.file == VGRF) {
fsv_assert(inst->dst.offset / REG_SIZE + regs_written(inst) <=
alloc.sizes[inst->dst.nr]);
diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp
index 1a6c42f2715..3ced049d101 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -935,22 +935,15 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
if (length == 8 || (length > 0 && slot == last_slot))
flush = true;
if (flush) {
- fs_reg *payload_sources =
- ralloc_array(mem_ctx, fs_reg, length + header_size);
- fs_reg payload = fs_reg(VGRF, alloc.allocate(length + header_size),
- BRW_REGISTER_TYPE_F);
- payload_sources[0] = urb_handle;
+ fs_reg srcs[URB_LOGICAL_NUM_SRCS];
- if (opcode == SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL)
- payload_sources[1] = per_slot_offsets;
+ srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
+ srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = per_slot_offsets;
+ srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, alloc.allocate(length),
+ BRW_REGISTER_TYPE_F);
+ abld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], sources, length, 0);
- memcpy(&payload_sources[header_size], sources,
- length * sizeof sources[0]);
-
- abld.LOAD_PAYLOAD(payload, payload_sources, length + header_size,
- header_size);
-
- fs_inst *inst = abld.emit(opcode, reg_undef, payload);
+ fs_inst *inst = abld.emit(opcode, reg_undef, srcs, ARRAY_SIZE(srcs));
/* For ICL WA 1805992985 one needs additional write in the end. */
if (devinfo->ver == 11 && stage == MESA_SHADER_TESS_EVAL)
@@ -985,10 +978,17 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
if (stage == MESA_SHADER_GEOMETRY)
return;
- fs_reg payload = fs_reg(VGRF, alloc.allocate(2), BRW_REGISTER_TYPE_UD);
- bld.exec_all().MOV(payload, urb_handle);
+ fs_reg uniform_urb_handle = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+ fs_reg payload = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+
+ bld.exec_all().MOV(uniform_urb_handle, urb_handle);
- fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, payload);
+ fs_reg srcs[URB_LOGICAL_NUM_SRCS];
+ srcs[URB_LOGICAL_SRC_HANDLE] = uniform_urb_handle;
+ srcs[URB_LOGICAL_SRC_DATA] = payload;
+
+ fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
+ srcs, ARRAY_SIZE(srcs));
inst->eot = true;
inst->mlen = 2;
inst->offset = 1;
@@ -1002,14 +1002,16 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
* all 8 lanes must valid.
*/
if (devinfo->ver == 11 && stage == MESA_SHADER_TESS_EVAL) {
- fs_reg payload = fs_reg(VGRF, alloc.allocate(6), BRW_REGISTER_TYPE_UD);
+ fs_reg uniform_urb_handle = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+ fs_reg uniform_mask = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+ fs_reg payload = fs_reg(VGRF, alloc.allocate(4), BRW_REGISTER_TYPE_UD);
/* Workaround requires all 8 channels (lanes) to be valid. This is
* understood to mean they all need to be alive. First trick is to find
* a live channel and copy its urb handle for all the other channels to
* make sure all handles are valid.
*/
- bld.exec_all().MOV(payload, bld.emit_uniformize(urb_handle));
+ bld.exec_all().MOV(uniform_urb_handle, bld.emit_uniformize(urb_handle));
/* Second trick is to use masked URB write where one can tell the HW to
* actually write data only for selected channels even though all are
@@ -1025,14 +1027,19 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
* 4 slots data. All are explicitly zeros in order to to keep the MBZ
* area written as zeros.
*/
- bld.exec_all().MOV(offset(payload, bld, 1), brw_imm_ud(0x10000u));
+ bld.exec_all().MOV(uniform_mask, brw_imm_ud(0x10000u));
+ bld.exec_all().MOV(offset(payload, bld, 0), brw_imm_ud(0u));
+ bld.exec_all().MOV(offset(payload, bld, 1), brw_imm_ud(0u));
bld.exec_all().MOV(offset(payload, bld, 2), brw_imm_ud(0u));
bld.exec_all().MOV(offset(payload, bld, 3), brw_imm_ud(0u));
- bld.exec_all().MOV(offset(payload, bld, 4), brw_imm_ud(0u));
- bld.exec_all().MOV(offset(payload, bld, 5), brw_imm_ud(0u));
+
+ fs_reg srcs[URB_LOGICAL_NUM_SRCS];
+ srcs[URB_LOGICAL_SRC_HANDLE] = uniform_urb_handle;
+ srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = uniform_mask;
+ srcs[URB_LOGICAL_SRC_DATA] = payload;
fs_inst *inst = bld.exec_all().emit(SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
- reg_undef, payload);
+ reg_undef, srcs, ARRAY_SIZE(srcs));
inst->eot = true;
inst->mlen = 6;
inst->offset = 0;
diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp
index e1845a4fc34..0ebc9984b1e 100644
--- a/src/intel/compiler/brw_lower_logical_sends.cpp
+++ b/src/intel/compiler/brw_lower_logical_sends.cpp
@@ -73,8 +73,27 @@ lower_urb_write_logical_send(const fs_builder &bld, fs_inst *inst,
assert(inst->header_size == 0);
+ fs_reg *payload_sources = new fs_reg[inst->mlen];
+ fs_reg payload = fs_reg(VGRF, bld.shader->alloc.allocate(inst->mlen),
+ BRW_REGISTER_TYPE_F);
+
+ unsigned header_size = 0;
+ payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_HANDLE];
+ if (per_slot_present)
+ payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS];
+
+ if (channel_mask_present)
+ payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_CHANNEL_MASK];
+
+ for (unsigned i = header_size, j = 0; i < inst->mlen; i++, j++)
+ payload_sources[i] = offset(inst->src[URB_LOGICAL_SRC_DATA], bld, j);
+
+ bld.LOAD_PAYLOAD(payload, payload_sources, inst->mlen, header_size);
+
+ delete [] payload_sources;
+
inst->opcode = SHADER_OPCODE_SEND;
- inst->header_size = 1;
+ inst->header_size = header_size;
inst->dst = brw_null_reg();
inst->sfid = BRW_SFID_URB;
@@ -88,13 +107,11 @@ lower_urb_write_logical_send(const fs_builder &bld, fs_inst *inst,
inst->ex_mlen = 0;
inst->send_has_side_effects = true;
- fs_reg tmp = inst->src[0];
-
inst->resize_sources(4);
inst->src[0] = brw_imm_ud(0); /* desc */
inst->src[1] = brw_imm_ud(0); /* ex_desc */
- inst->src[2] = tmp;
+ inst->src[2] = payload;
inst->src[3] = brw_null_reg();
}
diff --git a/src/intel/compiler/brw_mesh.cpp b/src/intel/compiler/brw_mesh.cpp
index d9828923c9e..6a8872cebe9 100644
--- a/src/intel/compiler/brw_mesh.cpp
+++ b/src/intel/compiler/brw_mesh.cpp
@@ -892,25 +892,25 @@ emit_urb_direct_writes(const fs_builder &bld, nir_intrinsic_instr *instr,
for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) {
fs_builder bld8 = bld.group(8, q);
- fs_reg payload_srcs[6];
- unsigned p = 0;
-
- payload_srcs[p++] = urb_handle;
- payload_srcs[p++] = brw_imm_ud(first_mask << 16);
- const unsigned header_size = p;
+ fs_reg payload_srcs[4];
+ unsigned length = 0;
for (unsigned i = 0; i < comp_shift; i++)
- payload_srcs[p++] = reg_undef;
+ payload_srcs[length++] = reg_undef;
for (unsigned c = 0; c < first_comps; c++)
- payload_srcs[p++] = quarter(offset(src, bld, c), q);
+ payload_srcs[length++] = quarter(offset(src, bld, c), q);
- fs_reg payload = bld8.vgrf(BRW_REGISTER_TYPE_UD, p);
- bld8.LOAD_PAYLOAD(payload, payload_srcs, p, header_size);
+ fs_reg srcs[URB_LOGICAL_NUM_SRCS];
+ srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
+ srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(first_mask << 16);
+ srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, bld.shader->alloc.allocate(length),
+ BRW_REGISTER_TYPE_F);
+ bld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
- reg_undef, payload);
- inst->mlen = p;
+ reg_undef, srcs, ARRAY_SIZE(srcs));
+ inst->mlen = 2 + length;
inst->offset = urb_global_offset;
assert(inst->offset < 2048);
}
@@ -923,22 +923,22 @@ emit_urb_direct_writes(const fs_builder &bld, nir_intrinsic_instr *instr,
for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) {
fs_builder bld8 = bld.group(8, q);
- fs_reg payload_srcs[6];
- unsigned p = 0;
-
- payload_srcs[p++] = urb_handle;
- payload_srcs[p++] = brw_imm_ud(second_mask << 16);
- const unsigned header_size = p;
+ fs_reg payload_srcs[4];
+ unsigned length = 0;
for (unsigned c = 0; c < second_comps; c++)
- payload_srcs[p++] = quarter(offset(src, bld, c + first_comps), q);
+ payload_srcs[length++] = quarter(offset(src, bld, c + first_comps), q);
- fs_reg payload = bld8.vgrf(BRW_REGISTER_TYPE_UD, p);
- bld8.LOAD_PAYLOAD(payload, payload_srcs, p, header_size);
+ fs_reg srcs[URB_LOGICAL_NUM_SRCS];
+ srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
+ srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(second_mask << 16);
+ srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, bld.shader->alloc.allocate(length),
+ BRW_REGISTER_TYPE_F);
+ bld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
- reg_undef, payload);
- inst->mlen = p;
+ reg_undef, srcs, ARRAY_SIZE(srcs));
+ inst->mlen = 2 + length;
inst->offset = urb_global_offset;
assert(inst->offset < 2048);
}
@@ -988,21 +988,23 @@ emit_urb_indirect_writes(const fs_builder &bld, nir_intrinsic_instr *instr,
bld8.SHR(off, off, brw_imm_ud(2));
- fs_reg payload_srcs[7];
- int x = 0;
- payload_srcs[x++] = urb_handle;
- payload_srcs[x++] = off;
- payload_srcs[x++] = mask;
+ fs_reg payload_srcs[4];
+ unsigned length = 0;
for (unsigned j = 0; j < 4; j++)
- payload_srcs[x++] = quarter(src_comp, q);
+ payload_srcs[length++] = quarter(src_comp, q);
- fs_reg payload = bld8.vgrf(BRW_REGISTER_TYPE_UD, x);
- bld8.LOAD_PAYLOAD(payload, payload_srcs, x, 3);
+ fs_reg srcs[URB_LOGICAL_NUM_SRCS];
+ srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
+ srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off;
+ srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = mask;
+ srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, bld.shader->alloc.allocate(length),
+ BRW_REGISTER_TYPE_F);
+ bld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
- fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL,
- reg_undef, payload);
- inst->mlen = x;
+ fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
+ reg_undef, srcs, ARRAY_SIZE(srcs));
+ inst->mlen = 3 + length;
inst->offset = 0;
}
}