summaryrefslogtreecommitdiff
path: root/src/amd
diff options
context:
space:
mode:
authorDaniel Schürmann <daniel@schuermann.dev>2020-01-16 16:54:35 +0100
committerMarge Bot <eric+marge@anholt.net>2020-01-29 18:45:23 +0000
commit71440ba0f5512fe455be66ca48b253ecc37478a9 (patch)
treeae9619e671d1e87a3bbef4d002d7bcc75b32fee5 /src/amd
parent8548fe19f03ecaee711ed9041be3dc05c7c22e56 (diff)
aco: reorder VMEM operands in ACO IR
For all VMEM instructions, the resource constant is now in operands[0]. For MIMG instructions, the sampler shares operands[1] with write data in case this instruction writes memory. Moving the VADDR to be the last operand for MIMG is the first step to support Navi NSA encoding. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3602>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/compiler/aco_assembler.cpp20
-rw-r--r--src/amd/compiler/aco_builder_h.py2
-rw-r--r--src/amd/compiler/aco_insert_NOPs.cpp17
-rw-r--r--src/amd/compiler/aco_insert_waitcnt.cpp11
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp146
-rw-r--r--src/amd/compiler/aco_ir.h17
-rw-r--r--src/amd/compiler/aco_optimizer.cpp25
-rw-r--r--src/amd/compiler/aco_register_allocation.cpp11
-rw-r--r--src/amd/compiler/aco_scheduler.cpp2
-rw-r--r--src/amd/compiler/aco_spill.cpp8
-rw-r--r--src/amd/compiler/aco_validate.cpp26
11 files changed, 153 insertions, 132 deletions
diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp
index 94c41173d6e..7683c8f45af 100644
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@@ -329,10 +329,10 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
}
encoding |= instr->operands[2].physReg() << 24;
encoding |= (mubuf->tfe ? 1 : 0) << 23;
- encoding |= (instr->operands[1].physReg() >> 2) << 16;
+ encoding |= (instr->operands[0].physReg() >> 2) << 16;
unsigned reg = instr->operands.size() > 3 ? instr->operands[3].physReg() : instr->definitions[0].physReg();
encoding |= (0xFF & reg) << 8;
- encoding |= (0xFF & instr->operands[0].physReg());
+ encoding |= (0xFF & instr->operands[1].physReg());
out.push_back(encoding);
break;
}
@@ -362,10 +362,10 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
encoding |= instr->operands[2].physReg() << 24;
encoding |= (mtbuf->tfe ? 1 : 0) << 23;
encoding |= (mtbuf->slc ? 1 : 0) << 22;
- encoding |= (instr->operands[1].physReg() >> 2) << 16;
+ encoding |= (instr->operands[0].physReg() >> 2) << 16;
unsigned reg = instr->operands.size() > 3 ? instr->operands[3].physReg() : instr->definitions[0].physReg();
encoding |= (0xFF & reg) << 8;
- encoding |= (0xFF & instr->operands[0].physReg());
+ encoding |= (0xFF & instr->operands[1].physReg());
if (ctx.chip_class >= GFX10) {
encoding |= (((opcode & 0x08) >> 4) << 21); /* MSB of 4-bit OPCODE */
@@ -395,15 +395,15 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
}
encoding |= (0xF & mimg->dmask) << 8;
out.push_back(encoding);
- encoding = (0xFF & instr->operands[0].physReg()); /* VADDR */
+ encoding = (0xFF & instr->operands[2].physReg()); /* VADDR */
if (!instr->definitions.empty()) {
encoding |= (0xFF & instr->definitions[0].physReg()) << 8; /* VDATA */
- } else if (instr->operands.size() == 4) {
- encoding |= (0xFF & instr->operands[3].physReg()) << 8; /* VDATA */
+ } else if (instr->operands[1].regClass().type() == RegType::vgpr) {
+ encoding |= (0xFF & instr->operands[1].physReg()) << 8; /* VDATA */
}
- encoding |= (0x1F & (instr->operands[1].physReg() >> 2)) << 16; /* T# (resource) */
- if (instr->operands.size() > 2)
- encoding |= (0x1F & (instr->operands[2].physReg() >> 2)) << 21; /* sampler */
+ encoding |= (0x1F & (instr->operands[0].physReg() >> 2)) << 16; /* T# (resource) */
+ if (instr->operands[1].regClass().type() == RegType::sgpr)
+ encoding |= (0x1F & (instr->operands[1].physReg() >> 2)) << 21; /* sampler */
assert(!mimg->d16 || ctx.chip_class >= GFX9);
encoding |= mimg->d16 ? 1 << 15 : 0;
diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py
index 4e1d6f72b63..97c03ac8adf 100644
--- a/src/amd/compiler/aco_builder_h.py
+++ b/src/amd/compiler/aco_builder_h.py
@@ -473,7 +473,7 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod
("ds", [Format.DS], 'DS_instruction', [(1, 1), (1, 2), (0, 3), (0, 4)]),
("mubuf", [Format.MUBUF], 'MUBUF_instruction', [(0, 4), (1, 3)]),
("mtbuf", [Format.MTBUF], 'MTBUF_instruction', [(0, 4), (1, 3)]),
- ("mimg", [Format.MIMG], 'MIMG_instruction', [(0, 4), (1, 3), (0, 3), (1, 2)]), #TODO(pendingchaos): less shapes?
+ ("mimg", [Format.MIMG], 'MIMG_instruction', [(0, 3), (1, 3)]),
("exp", [Format.EXP], 'Export_instruction', [(0, 4)]),
("branch", [Format.PSEUDO_BRANCH], 'Pseudo_branch_instruction', itertools.product([0], [0, 1])),
("barrier", [Format.PSEUDO_BARRIER], 'Pseudo_barrier_instruction', [(0, 0)]),
diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp
index 605ed8a2081..9ff3d580736 100644
--- a/src/amd/compiler/aco_insert_NOPs.cpp
+++ b/src/amd/compiler/aco_insert_NOPs.cpp
@@ -325,9 +325,9 @@ int handle_instruction_gfx8_9(NOP_ctx_gfx8_9& ctx, aco_ptr<Instruction>& instr,
pred->operands[2].physReg() >= 128;
/* MIMG store with a 128-bit T# with more than two bits set in dmask (making it a >64-bit store) */
bool consider_mimg = pred->format == Format::MIMG &&
- pred->operands.size() == 4 &&
- pred->operands[3].size() > 2 &&
- pred->operands[1].size() != 8;
+ pred->operands[1].regClass().type() == RegType::vgpr &&
+ pred->operands[1].size() > 2 &&
+ pred->operands[0].size() == 4;
/* FLAT/GLOBAL/SCRATCH store with >64-bit data */
bool consider_flat = (pred->isFlatOrGlobal() || pred->format == Format::SCRATCH) &&
pred->operands.size() == 3 &&
@@ -376,6 +376,7 @@ int handle_instruction_gfx8_9(NOP_ctx_gfx8_9& ctx, aco_ptr<Instruction>& instr,
/* If the VALU writes the SGPR that is used by a VMEM, the user must add five wait states. */
for (int pred_idx = new_idx - 1; pred_idx >= 0 && pred_idx >= new_idx - 5; pred_idx--) {
aco_ptr<Instruction>& pred = new_instructions[pred_idx];
+ // TODO: break if something else writes the SGPR
if (!(pred->isVALU() && VALU_writes_sgpr(pred)))
continue;
@@ -383,16 +384,10 @@ int handle_instruction_gfx8_9(NOP_ctx_gfx8_9& ctx, aco_ptr<Instruction>& instr,
if (def.physReg() > 102)
continue;
- if (instr->operands.size() > 1 &&
- regs_intersect(instr->operands[1].physReg(), instr->operands[1].size(),
- def.physReg(), def.size())) {
+ for (const Operand& op : instr->operands) {
+ if (regs_intersect(op.physReg(), op.size(), def.physReg(), def.size()))
return 5 + pred_idx - new_idx + 1;
- }
- if (instr->operands.size() > 2 &&
- regs_intersect(instr->operands[2].physReg(), instr->operands[2].size(),
- def.physReg(), def.size())) {
- return 5 + pred_idx - new_idx + 1;
}
}
}
diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp
index 8e1b64bfcd0..5ec9636752d 100644
--- a/src/amd/compiler/aco_insert_waitcnt.cpp
+++ b/src/amd/compiler/aco_insert_waitcnt.cpp
@@ -689,11 +689,20 @@ void gen(Instruction* instr, wait_ctx& ctx)
if (!instr->definitions.empty())
insert_wait_entry(ctx, instr->definitions[0], ev);
- if (instr->operands.size() == 4 && ctx.chip_class == GFX6) {
+ if (ctx.chip_class == GFX6 &&
+ instr->format != Format::MIMG &&
+ instr->operands.size() == 4) {
ctx.exp_cnt++;
update_counters(ctx, event_vmem_gpr_lock);
insert_wait_entry(ctx, instr->operands[3], event_vmem_gpr_lock);
+ } else if (ctx.chip_class == GFX6 &&
+ instr->format == Format::MIMG &&
+ instr->operands[1].regClass().type() == RegType::vgpr) {
+ ctx.exp_cnt++;
+ update_counters(ctx, event_vmem_gpr_lock);
+ insert_wait_entry(ctx, instr->operands[1], event_vmem_gpr_lock);
}
+
break;
}
case Format::SOPP: {
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 6734b9c98d3..8d08c416ef0 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -2929,8 +2929,8 @@ void visit_store_vsgs_output(isel_context *ctx, nir_intrinsic_instr *instr)
}
aco_ptr<MTBUF_instruction> mtbuf{create_instruction<MTBUF_instruction>(aco_opcode::tbuffer_store_format_x, Format::MTBUF, 4, 0)};
- mtbuf->operands[0] = vaddr_offset;
- mtbuf->operands[1] = Operand(esgs_ring);
+ mtbuf->operands[0] = Operand(esgs_ring);
+ mtbuf->operands[1] = vaddr_offset;
mtbuf->operands[2] = Operand(get_arg(ctx, ctx->args->es2gs_offset));
mtbuf->operands[3] = Operand(elem);
mtbuf->offen = !vaddr_offset.isUndefined();
@@ -3288,12 +3288,12 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr)
if (use_mubuf) {
Instruction *mubuf = bld.mubuf(opcode,
- Definition(fetch_dst), fetch_index, list, soffset,
+ Definition(fetch_dst), list, fetch_index, soffset,
fetch_offset, false, true).instr;
static_cast<MUBUF_instruction*>(mubuf)->can_reorder = true;
} else {
Instruction *mtbuf = bld.mtbuf(opcode,
- Definition(fetch_dst), fetch_index, list, soffset,
+ Definition(fetch_dst), list, fetch_index, soffset,
fetch_dfmt, nfmt, fetch_offset, false, true).instr;
static_cast<MTBUF_instruction*>(mtbuf)->can_reorder = true;
}
@@ -3487,8 +3487,8 @@ void visit_load_per_vertex_input(isel_context *ctx, nir_intrinsic_instr *instr)
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(aco_opcode::buffer_load_dword, Format::MUBUF, 3, 1)};
mubuf->definitions[0] = bld.def(v1);
subelems[j] = mubuf->definitions[0].getTemp();
- mubuf->operands[0] = Operand(offset);
- mubuf->operands[1] = Operand(esgs_ring);
+ mubuf->operands[0] = Operand(esgs_ring);
+ mubuf->operands[1] = Operand(offset);
mubuf->operands[2] = Operand(soffset);
mubuf->offen = true;
mubuf->offset = const_offset % 4096u;
@@ -3616,8 +3616,8 @@ void load_buffer(isel_context *ctx, unsigned num_components, Temp dst,
lower = bld.tmp(v4);
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
mubuf->definitions[0] = Definition(lower);
- mubuf->operands[0] = vaddr;
- mubuf->operands[1] = Operand(rsrc);
+ mubuf->operands[0] = Operand(rsrc);
+ mubuf->operands[1] = vaddr;
mubuf->operands[2] = soffset;
mubuf->offen = (offset.type() == RegType::vgpr);
mubuf->glc = glc;
@@ -3651,8 +3651,8 @@ void load_buffer(isel_context *ctx, unsigned num_components, Temp dst,
unreachable("Load SSBO not implemented for this size.");
}
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
- mubuf->operands[0] = vaddr;
- mubuf->operands[1] = Operand(rsrc);
+ mubuf->operands[0] = Operand(rsrc);
+ mubuf->operands[1] = vaddr;
mubuf->operands[2] = soffset;
mubuf->offen = (offset.type() == RegType::vgpr);
mubuf->glc = glc;
@@ -4228,9 +4228,10 @@ static Temp adjust_sample_index_using_fmask(isel_context *ctx, bool da, Temp coo
? ac_get_sampler_dim(ctx->options->chip_class, GLSL_SAMPLER_DIM_2D, da)
: 0;
- aco_ptr<MIMG_instruction> load{create_instruction<MIMG_instruction>(aco_opcode::image_load, Format::MIMG, 2, 1)};
- load->operands[0] = Operand(coords);
- load->operands[1] = Operand(fmask_desc_ptr);
+ aco_ptr<MIMG_instruction> load{create_instruction<MIMG_instruction>(aco_opcode::image_load, Format::MIMG, 3, 1)};
+ load->operands[0] = Operand(fmask_desc_ptr);
+ load->operands[1] = Operand(s4); /* no sampler */
+ load->operands[2] = Operand(coords);
load->definitions[0] = Definition(fmask);
load->glc = false;
load->dlc = false;
@@ -4374,8 +4375,8 @@ void visit_image_load(isel_context *ctx, nir_intrinsic_instr *instr)
unreachable(">4 channel buffer image load");
}
aco_ptr<MUBUF_instruction> load{create_instruction<MUBUF_instruction>(opcode, Format::MUBUF, 3, 1)};
- load->operands[0] = Operand(vindex);
- load->operands[1] = Operand(rsrc);
+ load->operands[0] = Operand(rsrc);
+ load->operands[1] = Operand(vindex);
load->operands[2] = Operand((uint32_t) 0);
Temp tmp;
if (num_channels == instr->dest.ssa.num_components && dst.type() == RegType::vgpr)
@@ -4407,9 +4408,10 @@ void visit_image_load(isel_context *ctx, nir_intrinsic_instr *instr)
bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
aco_opcode opcode = level_zero ? aco_opcode::image_load : aco_opcode::image_load_mip;
- aco_ptr<MIMG_instruction> load{create_instruction<MIMG_instruction>(opcode, Format::MIMG, 2, 1)};
- load->operands[0] = Operand(coords);
- load->operands[1] = Operand(resource);
+ aco_ptr<MIMG_instruction> load{create_instruction<MIMG_instruction>(opcode, Format::MIMG, 3, 1)};
+ load->operands[0] = Operand(resource);
+ load->operands[1] = Operand(s4); /* no sampler */
+ load->operands[2] = Operand(coords);
load->definitions[0] = Definition(tmp);
load->glc = var->data.access & (ACCESS_VOLATILE | ACCESS_COHERENT) ? 1 : 0;
load->dlc = load->glc && ctx->options->chip_class >= GFX10;
@@ -4455,8 +4457,8 @@ void visit_image_store(isel_context *ctx, nir_intrinsic_instr *instr)
unreachable(">4 channel buffer image store");
}
aco_ptr<MUBUF_instruction> store{create_instruction<MUBUF_instruction>(opcode, Format::MUBUF, 4, 0)};
- store->operands[0] = Operand(vindex);
- store->operands[1] = Operand(rsrc);
+ store->operands[0] = Operand(rsrc);
+ store->operands[1] = Operand(vindex);
store->operands[2] = Operand((uint32_t) 0);
store->operands[3] = Operand(data);
store->idxen = true;
@@ -4476,11 +4478,10 @@ void visit_image_store(isel_context *ctx, nir_intrinsic_instr *instr)
bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0;
aco_opcode opcode = level_zero ? aco_opcode::image_store : aco_opcode::image_store_mip;
- aco_ptr<MIMG_instruction> store{create_instruction<MIMG_instruction>(opcode, Format::MIMG, 4, 0)};
- store->operands[0] = Operand(coords);
- store->operands[1] = Operand(resource);
- store->operands[2] = Operand(s4);
- store->operands[3] = Operand(data);
+ aco_ptr<MIMG_instruction> store{create_instruction<MIMG_instruction>(opcode, Format::MIMG, 3, 0)};
+ store->operands[0] = Operand(resource);
+ store->operands[1] = Operand(data);
+ store->operands[2] = Operand(coords);
store->glc = glc;
store->dlc = false;
store->dim = ac_get_image_dim(ctx->options->chip_class, dim, is_array);
@@ -4572,8 +4573,8 @@ void visit_image_atomic(isel_context *ctx, nir_intrinsic_instr *instr)
Temp resource = get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), ACO_DESC_BUFFER, nullptr, true, true);
//assert(ctx->options->chip_class < GFX9 && "GFX9 stride size workaround not yet implemented.");
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(buf_op, Format::MUBUF, 4, return_previous ? 1 : 0)};
- mubuf->operands[0] = Operand(vindex);
- mubuf->operands[1] = Operand(resource);
+ mubuf->operands[0] = Operand(resource);
+ mubuf->operands[1] = Operand(vindex);
mubuf->operands[2] = Operand((uint32_t)0);
mubuf->operands[3] = Operand(data);
if (return_previous)
@@ -4591,11 +4592,10 @@ void visit_image_atomic(isel_context *ctx, nir_intrinsic_instr *instr)
Temp coords = get_image_coords(ctx, instr, type);
Temp resource = get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), ACO_DESC_IMAGE, nullptr, true, true);
- aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>(image_op, Format::MIMG, 4, return_previous ? 1 : 0)};
- mimg->operands[0] = Operand(coords);
- mimg->operands[1] = Operand(resource);
- mimg->operands[2] = Operand(s4); /* no sampler */
- mimg->operands[3] = Operand(data);
+ aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>(image_op, Format::MIMG, 3, return_previous ? 1 : 0)};
+ mimg->operands[0] = Operand(resource);
+ mimg->operands[1] = Operand(data);
+ mimg->operands[2] = Operand(coords);
if (return_previous)
mimg->definitions[0] = Definition(dst);
mimg->glc = return_previous;
@@ -4661,9 +4661,10 @@ void visit_image_size(isel_context *ctx, nir_intrinsic_instr *instr)
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
- aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>(aco_opcode::image_get_resinfo, Format::MIMG, 2, 1)};
- mimg->operands[0] = Operand(lod);
- mimg->operands[1] = Operand(resource);
+ aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>(aco_opcode::image_get_resinfo, Format::MIMG, 3, 1)};
+ mimg->operands[0] = Operand(resource);
+ mimg->operands[1] = Operand(s4); /* no sampler */
+ mimg->operands[2] = Operand(lod);
uint8_t& dmask = mimg->dmask;
mimg->dim = ac_get_image_dim(ctx->options->chip_class, dim, is_array);
mimg->dmask = (1 << instr->dest.ssa.num_components) - 1;
@@ -4823,8 +4824,8 @@ void visit_store_ssbo(isel_context *ctx, nir_intrinsic_instr *instr)
}
} else {
aco_ptr<MUBUF_instruction> store{create_instruction<MUBUF_instruction>(vmem_op, Format::MUBUF, 4, 0)};
- store->operands[0] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1);
- store->operands[1] = Operand(rsrc);
+ store->operands[0] = Operand(rsrc);
+ store->operands[1] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1);
store->operands[2] = offset.type() == RegType::sgpr ? Operand(offset) : Operand((uint32_t) 0);
store->operands[3] = Operand(write_data);
store->offset = start * elem_size_bytes;
@@ -4912,8 +4913,8 @@ void visit_atomic_ssbo(isel_context *ctx, nir_intrinsic_instr *instr)
}
aco_opcode op = instr->dest.ssa.bit_size == 32 ? op32 : op64;
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, return_previous ? 1 : 0)};
- mubuf->operands[0] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1);
- mubuf->operands[1] = Operand(rsrc);
+ mubuf->operands[0] = Operand(rsrc);
+ mubuf->operands[1] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1);
mubuf->operands[2] = offset.type() == RegType::sgpr ? Operand(offset) : Operand((uint32_t) 0);
mubuf->operands[3] = Operand(data);
if (return_previous)
@@ -5021,8 +5022,8 @@ void visit_load_global(isel_context *ctx, nir_intrinsic_instr *instr)
Temp rsrc = get_gfx6_global_rsrc(bld, addr);
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
- mubuf->operands[0] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
- mubuf->operands[1] = Operand(rsrc);
+ mubuf->operands[0] = Operand(rsrc);
+ mubuf->operands[1] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
mubuf->operands[2] = Operand(0u);
mubuf->glc = glc;
mubuf->dlc = false;
@@ -5202,8 +5203,8 @@ void visit_store_global(isel_context *ctx, nir_intrinsic_instr *instr)
Temp rsrc = get_gfx6_global_rsrc(bld, addr);
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, 0)};
- mubuf->operands[0] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
- mubuf->operands[1] = Operand(rsrc);
+ mubuf->operands[0] = Operand(rsrc);
+ mubuf->operands[1] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
mubuf->operands[2] = Operand(0u);
mubuf->operands[3] = Operand(write_data);
mubuf->glc = glc;
@@ -5360,8 +5361,8 @@ void visit_global_atomic(isel_context *ctx, nir_intrinsic_instr *instr)
aco_opcode op = instr->dest.ssa.bit_size == 32 ? op32 : op64;
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, return_previous ? 1 : 0)};
- mubuf->operands[0] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
- mubuf->operands[1] = Operand(rsrc);
+ mubuf->operands[0] = Operand(rsrc);
+ mubuf->operands[1] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
mubuf->operands[2] = Operand(0u);
mubuf->operands[3] = Operand(data);
if (return_previous)
@@ -5589,12 +5590,12 @@ void visit_load_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
case 8: {
std::array<Temp,NIR_MAX_VEC_COMPONENTS> elems;
Temp lower = bld.mubuf(aco_opcode::buffer_load_dwordx4,
- bld.def(v4), offset, rsrc,
+ bld.def(v4), rsrc, offset,
ctx->program->scratch_offset, 0, true);
Temp upper = bld.mubuf(dst.size() == 6 ? aco_opcode::buffer_load_dwordx2 :
aco_opcode::buffer_load_dwordx4,
dst.size() == 6 ? bld.def(v2) : bld.def(v4),
- offset, rsrc, ctx->program->scratch_offset, 16, true);
+ rsrc, offset, ctx->program->scratch_offset, 16, true);
emit_split_vector(ctx, lower, 2);
elems[0] = emit_extract_vector(ctx, lower, 0, v2);
elems[1] = emit_extract_vector(ctx, lower, 1, v2);
@@ -5619,7 +5620,7 @@ void visit_load_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
unreachable("Wrong dst size for nir_intrinsic_load_scratch");
}
- bld.mubuf(op, Definition(dst), offset, rsrc, ctx->program->scratch_offset, 0, true);
+ bld.mubuf(op, Definition(dst), rsrc, offset, ctx->program->scratch_offset, 0, true);
emit_split_vector(ctx, dst, instr->num_components);
}
@@ -5680,7 +5681,7 @@ void visit_store_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
unreachable("Invalid data size for nir_intrinsic_store_scratch.");
}
- bld.mubuf(op, offset, rsrc, ctx->program->scratch_offset, write_data, start * elem_size_bytes, true);
+ bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset, write_data, start * elem_size_bytes, true);
}
}
@@ -5784,8 +5785,8 @@ void visit_emit_vertex_with_counter(isel_context *ctx, nir_intrinsic_instr *inst
}
aco_ptr<MTBUF_instruction> mtbuf{create_instruction<MTBUF_instruction>(aco_opcode::tbuffer_store_format_x, Format::MTBUF, 4, 0)};
- mtbuf->operands[0] = vaddr_offset;
- mtbuf->operands[1] = Operand(gsvs_ring);
+ mtbuf->operands[0] = Operand(gsvs_ring);
+ mtbuf->operands[1] = vaddr_offset;
mtbuf->operands[2] = Operand(get_arg(ctx, ctx->args->gs2vs_offset));
mtbuf->operands[3] = Operand(ctx->outputs.outputs[i][j]);
mtbuf->offen = !vaddr_offset.isUndefined();
@@ -6110,8 +6111,8 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
aco_ptr<MUBUF_instruction> load{create_instruction<MUBUF_instruction>(aco_opcode::buffer_load_dwordx2, Format::MUBUF, 3, 1)};
load->definitions[0] = Definition(sample_pos);
- load->operands[0] = Operand(addr);
- load->operands[1] = Operand(rsrc);
+ load->operands[0] = Operand(rsrc);
+ load->operands[1] = Operand(addr);
load->operands[2] = Operand(0u);
load->offset = sample_pos_offset;
load->offen = 0;
@@ -7340,9 +7341,10 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
if (tmp_dst.id() == dst.id() && div_by_6)
tmp_dst = bld.tmp(tmp_dst.regClass());
- tex.reset(create_instruction<MIMG_instruction>(aco_opcode::image_get_resinfo, Format::MIMG, 2, 1));
- tex->operands[0] = Operand(as_vgpr(ctx,lod));
- tex->operands[1] = Operand(resource);
+ tex.reset(create_instruction<MIMG_instruction>(aco_opcode::image_get_resinfo, Format::MIMG, 3, 1));
+ tex->operands[0] = Operand(resource);
+ tex->operands[1] = Operand(s4); /* no sampler */
+ tex->operands[2] = Operand(as_vgpr(ctx,lod));
if (ctx->options->chip_class == GFX9 &&
instr->op == nir_texop_txs &&
instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
@@ -7380,9 +7382,10 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
Temp tg4_compare_cube_wa64 = Temp();
if (tg4_integer_workarounds) {
- tex.reset(create_instruction<MIMG_instruction>(aco_opcode::image_get_resinfo, Format::MIMG, 2, 1));
- tex->operands[0] = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), Operand(0u));
- tex->operands[1] = Operand(resource);
+ tex.reset(create_instruction<MIMG_instruction>(aco_opcode::image_get_resinfo, Format::MIMG, 3, 1));
+ tex->operands[0] = Operand(resource);
+ tex->operands[1] = Operand(s4); /* no sampler */
+ tex->operands[2] = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), Operand(0u));
tex->dim = dim;
tex->dmask = 0x3;
tex->da = da;
@@ -7537,8 +7540,8 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
tmp_dst = bld.tmp(RegType::vgpr, last_bit);
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
- mubuf->operands[0] = Operand(coords);
- mubuf->operands[1] = Operand(resource);
+ mubuf->operands[0] = Operand(resource);
+ mubuf->operands[1] = Operand(coords);
mubuf->operands[2] = Operand((uint32_t) 0);
mubuf->definitions[0] = Definition(tmp_dst);
mubuf->idxen = true;
@@ -7556,9 +7559,10 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
instr->op == nir_texop_fragment_fetch ||
instr->op == nir_texop_fragment_mask_fetch) {
aco_opcode op = level_zero || instr->sampler_dim == GLSL_SAMPLER_DIM_MS || instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS ? aco_opcode::image_load : aco_opcode::image_load_mip;
- tex.reset(create_instruction<MIMG_instruction>(op, Format::MIMG, 2, 1));
- tex->operands[0] = Operand(arg);
- tex->operands[1] = Operand(resource);
+ tex.reset(create_instruction<MIMG_instruction>(op, Format::MIMG, 3, 1));
+ tex->operands[0] = Operand(resource);
+ tex->operands[1] = Operand(s4); /* no sampler */
+ tex->operands[2] = Operand(arg);
tex->dim = dim;
tex->dmask = dmask;
tex->unrm = true;
@@ -7644,9 +7648,9 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
}
tex.reset(create_instruction<MIMG_instruction>(opcode, Format::MIMG, 3, 1));
- tex->operands[0] = Operand(arg);
- tex->operands[1] = Operand(resource);
- tex->operands[2] = Operand(sampler);
+ tex->operands[0] = Operand(resource);
+ tex->operands[1] = Operand(sampler);
+ tex->operands[2] = Operand(arg);
tex->dim = dim;
tex->dmask = dmask;
tex->da = da;
@@ -8753,8 +8757,8 @@ static void emit_stream_output(isel_context *ctx,
}
aco_ptr<MUBUF_instruction> store{create_instruction<MUBUF_instruction>(opcode, Format::MUBUF, 4, 0)};
- store->operands[0] = Operand(so_write_offset[buf]);
- store->operands[1] = Operand(so_buffers[buf]);
+ store->operands[0] = Operand(so_buffers[buf]);
+ store->operands[1] = Operand(so_write_offset[buf]);
store->operands[2] = Operand((uint32_t) 0);
store->operands[3] = Operand(write_data);
if (offset > 4095) {
@@ -9118,8 +9122,8 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(aco_opcode::buffer_load_dword, Format::MUBUF, 3, 1)};
mubuf->definitions[0] = bld.def(v1);
- mubuf->operands[0] = Operand(voffset);
- mubuf->operands[1] = Operand(gsvs_ring);
+ mubuf->operands[0] = Operand(gsvs_ring);
+ mubuf->operands[1] = Operand(voffset);
mubuf->operands[2] = Operand(0u);
mubuf->offen = true;
mubuf->offset = const_offset;
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 3f38e6aadae..1ccaf2a0158 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -781,8 +781,8 @@ struct DS_instruction : public Instruction {
/**
* Vector Memory Untyped-buffer Instructions
- * Operand(0): VADDR - Address source. Can carry an index and/or offset
- * Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
+ * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
+ * Operand(1): VADDR - Address source. Can carry an index and/or offset
* Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
* Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
*
@@ -804,8 +804,8 @@ struct MUBUF_instruction : public Instruction {
/**
* Vector Memory Typed-buffer Instructions
- * Operand(0): VADDR - Address source. Can carry an index and/or offset
- * Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
+ * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
+ * Operand(1): VADDR - Address source. Can carry an index and/or offset
* Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
* Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
*
@@ -827,10 +827,11 @@ struct MTBUF_instruction : public Instruction {
/**
* Vector Memory Image Instructions
- * Operand(0): VADDR - Address source. Can carry an offset or an index.
- * Operand(1): SRSRC - Scalar GPR that specifies the resource constant.
- * Operand(2): SSAMP - Scalar GPR that specifies sampler constant.
- * Operand(3) / Definition(0): VDATA - Vector GPR for read / write result.
+ * Operand(0) SRSRC - Scalar GPR that specifies the resource constant.
+ * Operand(1): SSAMP - Scalar GPR that specifies sampler constant.
+ * or VDATA - Vector GPR for write data.
+ * Operand(2): VADDR - Address source. Can carry an offset or an index.
+ * Definition(0): VDATA - Vector GPR for read result.
*
*/
struct MIMG_instruction : public Instruction {
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index 9e606c880f4..9ef94d4f697 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -528,9 +528,9 @@ void to_VOP3(opt_ctx& ctx, aco_ptr<Instruction>& instr)
}
/* only covers special cases */
-bool can_accept_constant(aco_ptr<Instruction>& instr, unsigned operand)
+bool alu_can_accept_constant(aco_opcode opcode, unsigned operand)
{
- switch (instr->opcode) {
+ switch (opcode) {
case aco_opcode::v_interp_p2_f32:
case aco_opcode::v_mac_f32:
case aco_opcode::v_writelane_b32:
@@ -547,12 +547,6 @@ bool can_accept_constant(aco_ptr<Instruction>& instr, unsigned operand)
case aco_opcode::v_readfirstlane_b32:
return operand != 0;
default:
- if ((instr->format == Format::MUBUF ||
- instr->format == Format::MIMG) &&
- instr->definitions.size() == 1 &&
- instr->operands.size() == 4) {
- return operand != 3;
- }
return true;
}
}
@@ -719,7 +713,8 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
break;
}
}
- if ((info.is_constant() || info.is_constant_64bit() || (info.is_literal() && instr->format == Format::PSEUDO)) && !instr->operands[i].isFixed() && can_accept_constant(instr, i)) {
+ if ((info.is_constant() || info.is_constant_64bit() || (info.is_literal() && instr->format == Format::PSEUDO)) &&
+ !instr->operands[i].isFixed() && alu_can_accept_constant(instr->opcode, i)) {
instr->operands[i] = get_constant_op(ctx, info.val, info.is_constant_64bit());
continue;
}
@@ -754,7 +749,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
static_cast<VOP3A_instruction*>(instr.get())->neg[i] = true;
continue;
}
- if ((info.is_constant() || info.is_constant_64bit()) && can_accept_constant(instr, i)) {
+ if ((info.is_constant() || info.is_constant_64bit()) && alu_can_accept_constant(instr->opcode, i)) {
Operand op = get_constant_op(ctx, info.val, info.is_constant_64bit());
perfwarn(instr->opcode == aco_opcode::v_cndmask_b32 && i == 2, "v_cndmask_b32 with a constant selector", instr.get());
if (i == 0 || instr->opcode == aco_opcode::v_readlane_b32 || instr->opcode == aco_opcode::v_writelane_b32) {
@@ -780,9 +775,9 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
while (info.is_temp())
info = ctx.info[info.temp.id()];
- if (mubuf->offen && i == 0 && info.is_constant_or_literal() && mubuf->offset + info.val < 4096) {
+ if (mubuf->offen && i == 1 && info.is_constant_or_literal() && mubuf->offset + info.val < 4096) {
assert(!mubuf->idxen);
- instr->operands[i] = Operand(v1);
+ instr->operands[1] = Operand(v1);
mubuf->offset += info.val;
mubuf->offen = false;
continue;
@@ -790,9 +785,9 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
instr->operands[2] = Operand((uint32_t) 0);
mubuf->offset += info.val;
continue;
- } else if (mubuf->offen && i == 0 && parse_base_offset(ctx, instr.get(), i, &base, &offset) && base.regClass() == v1 && mubuf->offset + offset < 4096) {
+ } else if (mubuf->offen && i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset) && base.regClass() == v1 && mubuf->offset + offset < 4096) {
assert(!mubuf->idxen);
- instr->operands[i].setTemp(base);
+ instr->operands[1].setTemp(base);
mubuf->offset += offset;
continue;
} else if (i == 2 && parse_base_offset(ctx, instr.get(), i, &base, &offset) && base.regClass() == s1 && mubuf->offset + offset < 4096) {
@@ -2698,7 +2693,7 @@ void select_instruction(opt_ctx &ctx, aco_ptr<Instruction>& instr)
continue;
}
- if (!can_accept_constant(instr, i))
+ if (!alu_can_accept_constant(instr->opcode, i))
continue;
if (ctx.uses[op.tempId()] < literal_uses) {
diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp
index 8370effdbc3..55c41dcdb06 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -1532,11 +1532,14 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
} else if (instr->opcode == aco_opcode::s_addk_i32 ||
instr->opcode == aco_opcode::s_mulk_i32) {
instr->definitions[0].setFixed(instr->operands[0].physReg());
- } else if ((instr->format == Format::MUBUF ||
- instr->format == Format::MIMG) &&
- instr->definitions.size() == 1 &&
- instr->operands.size() == 4) {
+ } else if (instr->format == Format::MUBUF &&
+ instr->definitions.size() == 1 &&
+ instr->operands.size() == 4) {
instr->definitions[0].setFixed(instr->operands[3].physReg());
+ } else if (instr->format == Format::MIMG &&
+ instr->definitions.size() == 1 &&
+ instr->operands[1].regClass() == instr->definitions[0].regClass()) {
+ instr->definitions[0].setFixed(instr->operands[1].physReg());
}
ctx.defs_done.reset();
diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp
index d5f2d913a65..5634a55766c 100644
--- a/src/amd/compiler/aco_scheduler.cpp
+++ b/src/amd/compiler/aco_scheduler.cpp
@@ -548,7 +548,7 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
if (current->isVMEM() == candidate->isVMEM()) {
bool same_resource = true;
if (current->isVMEM())
- same_resource = candidate->operands[1].tempId() == current->operands[1].tempId();
+ same_resource = candidate->operands[0].tempId() == current->operands[0].tempId();
bool can_reorder = can_reorder_vmem || can_reorder_candidate;
int grab_dist = clause_insert_idx - candidate_idx;
/* We can't easily tell how much this will decrease the def-to-use
diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp
index bfa3c266a76..dfcdab2f003 100644
--- a/src/amd/compiler/aco_spill.cpp
+++ b/src/amd/compiler/aco_spill.cpp
@@ -1575,9 +1575,9 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
split->definitions[i] = bld.def(v1);
bld.insert(split);
for (unsigned i = 0; i < temp.size(); i++)
- bld.mubuf(opcode, Operand(), scratch_rsrc, scratch_offset, split->definitions[i].getTemp(), offset + i * 4, false);
+ bld.mubuf(opcode, scratch_rsrc, Operand(), scratch_offset, split->definitions[i].getTemp(), offset + i * 4, false);
} else {
- bld.mubuf(opcode, Operand(), scratch_rsrc, scratch_offset, temp, offset, false);
+ bld.mubuf(opcode, scratch_rsrc, Operand(), scratch_offset, temp, offset, false);
}
} else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
ctx.program->config->spilled_sgprs += (*it)->operands[0].size();
@@ -1641,11 +1641,11 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
for (unsigned i = 0; i < def.size(); i++) {
Temp tmp = bld.tmp(v1);
vec->operands[i] = Operand(tmp);
- bld.mubuf(opcode, Definition(tmp), Operand(), scratch_rsrc, scratch_offset, offset + i * 4, false);
+ bld.mubuf(opcode, Definition(tmp), scratch_rsrc, Operand(), scratch_offset, offset + i * 4, false);
}
bld.insert(vec);
} else {
- bld.mubuf(opcode, def, Operand(), scratch_rsrc, scratch_offset, offset, false);
+ bld.mubuf(opcode, def, scratch_rsrc, Operand(), scratch_offset, offset, false);
}
} else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
uint32_t spill_slot = sgpr_slot[spill_id];
diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp
index 8d69952c811..293ec32a330 100644
--- a/src/amd/compiler/aco_validate.cpp
+++ b/src/amd/compiler/aco_validate.cpp
@@ -99,8 +99,8 @@ void validate(Program* program, FILE * output)
bool flat = instr->format == Format::FLAT || instr->format == Format::SCRATCH || instr->format == Format::GLOBAL;
bool can_be_undef = is_phi(instr) || instr->format == Format::EXP ||
instr->format == Format::PSEUDO_REDUCTION ||
- (flat && i == 1) || (instr->format == Format::MIMG && i == 2) ||
- ((instr->format == Format::MUBUF || instr->format == Format::MTBUF) && i == 0);
+ (flat && i == 1) || (instr->format == Format::MIMG && i == 1) ||
+ ((instr->format == Format::MUBUF || instr->format == Format::MTBUF) && i == 1);
check(can_be_undef, "Undefs can only be used in certain operands", instr.get());
}
}
@@ -229,15 +229,29 @@ void validate(Program* program, FILE * output)
break;
}
case Format::MTBUF:
- case Format::MUBUF:
- case Format::MIMG: {
+ case Format::MUBUF: {
check(instr->operands.size() > 1, "VMEM instructions must have at least one operand", instr.get());
- check(instr->operands[0].hasRegClass() && instr->operands[0].regClass().type() == RegType::vgpr,
+ check(instr->operands[1].hasRegClass() && instr->operands[1].regClass().type() == RegType::vgpr,
"VADDR must be in vgpr for VMEM instructions", instr.get());
- check(instr->operands[1].isTemp() && instr->operands[1].regClass().type() == RegType::sgpr, "VMEM resource constant must be sgpr", instr.get());
+ check(instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::sgpr, "VMEM resource constant must be sgpr", instr.get());
check(instr->operands.size() < 4 || (instr->operands[3].isTemp() && instr->operands[3].regClass().type() == RegType::vgpr), "VMEM write data must be vgpr", instr.get());
break;
}
+ case Format::MIMG: {
+ check(instr->operands.size() == 3, "MIMG instructions must have exactly 3 operands", instr.get());
+ check(instr->operands[0].hasRegClass() && (instr->operands[0].regClass() == s4 || instr->operands[0].regClass() == s8),
+ "MIMG operands[0] (resource constant) must be in 4 or 8 SGPRs", instr.get());
+ if (instr->operands[1].hasRegClass() && instr->operands[1].regClass().type() == RegType::sgpr)
+ check(instr->operands[1].regClass() == s4, "MIMG operands[1] (sampler constant) must be 4 SGPRs", instr.get());
+ else if (instr->operands[1].hasRegClass() && instr->operands[1].regClass().type() == RegType::vgpr)
+ check(instr->definitions.empty() || instr->definitions[0].regClass() == instr->operands[1].regClass(),
+ "MIMG operands[1] (VDATA) must be the same as definitions[0] for atomics", instr.get());
+ check(instr->operands[2].hasRegClass() && instr->operands[2].regClass().type() == RegType::vgpr,
+ "MIMG operands[2] (VADDR) must be VGPR", instr.get());
+ check(instr->definitions.empty() || (instr->definitions[0].isTemp() && instr->definitions[0].regClass().type() == RegType::vgpr),
+ "MIMG definitions[0] (VDATA) must be VGPR", instr.get());
+ break;
+ }
case Format::DS: {
for (const Operand& op : instr->operands) {
check((op.isTemp() && op.regClass().type() == RegType::vgpr) || op.physReg() == m0,