summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/radeonsi/Makefile.sources5
-rw-r--r--src/gallium/drivers/radeonsi/meson.build5
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c31
-rw-r--r--src/gallium/drivers/radeonsi/si_debug_options.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_get.c27
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c6
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c1384
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h9
-rw-r--r--src/gallium/drivers/radeonsi/si_shader_internal.h104
-rw-r--r--src/gallium/drivers/radeonsi/si_shader_llvm.c239
-rw-r--r--src/gallium/drivers/radeonsi/si_shader_llvm_build.c219
-rw-r--r--src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c834
-rw-r--r--src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c1852
-rw-r--r--src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c1165
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c57
-rw-r--r--src/util/00-mesa-defaults.conf6
16 files changed, 598 insertions, 5346 deletions
diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources
index 886aaf6fa34..5d658b744d0 100644
--- a/src/gallium/drivers/radeonsi/Makefile.sources
+++ b/src/gallium/drivers/radeonsi/Makefile.sources
@@ -35,10 +35,9 @@ C_SOURCES := \
si_shader.c \
si_shader.h \
si_shader_internal.h \
+ si_shader_llvm.c \
+ si_shader_llvm_build.c \
si_shader_nir.c \
- si_shader_tgsi_alu.c \
- si_shader_tgsi_mem.c \
- si_shader_tgsi_setup.c \
si_shaderlib_tgsi.c \
si_state.c \
si_state_binning.c \
diff --git a/src/gallium/drivers/radeonsi/meson.build b/src/gallium/drivers/radeonsi/meson.build
index d2d3dd684b0..a0bd10f6ac9 100644
--- a/src/gallium/drivers/radeonsi/meson.build
+++ b/src/gallium/drivers/radeonsi/meson.build
@@ -50,10 +50,9 @@ files_libradeonsi = files(
'si_shader.c',
'si_shader.h',
'si_shader_internal.h',
+ 'si_shader_llvm.c',
+ 'si_shader_llvm_build.c',
'si_shader_nir.c',
- 'si_shader_tgsi_alu.c',
- 'si_shader_tgsi_mem.c',
- 'si_shader_tgsi_setup.c',
'si_shaderlib_tgsi.c',
'si_state.c',
'si_state.h',
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 7abea1927cd..f264b880d29 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -24,7 +24,6 @@
*/
#include "nir/tgsi_to_nir.h"
-#include "tgsi/tgsi_parse.h"
#include "util/u_async_debug.h"
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
@@ -124,13 +123,8 @@ static void si_create_compute_state_async(void *job, int thread_index)
if (!compiler->passes)
si_init_compiler(sscreen, compiler);
- if (program->ir_type == PIPE_SHADER_IR_TGSI) {
- tgsi_scan_shader(sel->tokens, &sel->info);
- } else {
- assert(program->ir_type == PIPE_SHADER_IR_NIR);
-
- si_nir_scan_shader(sel->nir, &sel->info);
- }
+ assert(program->ir_type == PIPE_SHADER_IR_NIR);
+ si_nir_scan_shader(sel->nir, &sel->info);
/* Store the declared LDS size into tgsi_shader_info for the shader
* cache to include it.
@@ -167,9 +161,6 @@ static void si_create_compute_state_async(void *job, int thread_index)
if (!si_shader_create(sscreen, compiler, &program->shader, debug)) {
program->shader.compilation_failed = true;
-
- if (program->ir_type == PIPE_SHADER_IR_TGSI)
- FREE(sel->tokens);
return;
}
@@ -209,8 +200,6 @@ static void si_create_compute_state_async(void *job, int thread_index)
simple_mtx_unlock(&sscreen->shader_cache_mutex);
}
- FREE(sel->tokens);
- sel->tokens = NULL;
ralloc_free(sel->nir);
sel->nir = NULL;
}
@@ -234,16 +223,9 @@ static void *si_create_compute_state(
program->input_size = cso->req_input_mem;
if (cso->ir_type != PIPE_SHADER_IR_NATIVE) {
- if (sscreen->options.enable_nir &&
- cso->ir_type == PIPE_SHADER_IR_TGSI) {
+ if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
program->ir_type = PIPE_SHADER_IR_NIR;
sel->nir = tgsi_to_nir(cso->prog, ctx->screen);
- } else if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
- sel->tokens = tgsi_dup_tokens(cso->prog);
- if (!sel->tokens) {
- FREE(program);
- return NULL;
- }
} else {
assert(cso->ir_type == PIPE_SHADER_IR_NIR);
sel->nir = (struct nir_shader *) cso->prog;
@@ -719,8 +701,8 @@ static bool si_upload_compute_input(struct si_context *sctx,
return true;
}
-static void si_setup_tgsi_user_data(struct si_context *sctx,
- const struct pipe_grid_info *info)
+static void si_setup_nir_user_data(struct si_context *sctx,
+ const struct pipe_grid_info *info)
{
struct si_compute *program = sctx->cs_shader_state.program;
struct si_shader_selector *sel = &program->sel;
@@ -944,7 +926,7 @@ static void si_launch_grid(
}
if (program->ir_type != PIPE_SHADER_IR_NATIVE)
- si_setup_tgsi_user_data(sctx, info);
+ si_setup_nir_user_data(sctx, info);
si_emit_dispatch_packets(sctx, info);
@@ -977,7 +959,6 @@ void si_destroy_compute(struct si_compute *program)
FREE(program->global_buffers);
si_shader_destroy(&program->shader);
- FREE(program->sel.tokens);
ralloc_free(program->sel.nir);
FREE(program);
}
diff --git a/src/gallium/drivers/radeonsi/si_debug_options.h b/src/gallium/drivers/radeonsi/si_debug_options.h
index 7ba835acf84..9a0dd0c9f78 100644
--- a/src/gallium/drivers/radeonsi/si_debug_options.h
+++ b/src/gallium/drivers/radeonsi/si_debug_options.h
@@ -1,5 +1,4 @@
OPT_BOOL(clear_db_cache_before_clear, false, "Clear DB cache before fast depth clear")
-OPT_BOOL(enable_nir, true, "Enable NIR")
OPT_BOOL(aux_debug, false, "Generate ddebug_dumps for the auxiliary context")
OPT_BOOL(sync_compile, false, "Always compile synchronously (will cause stalls)")
OPT_BOOL(dump_shader_binary, false, "Dump shader binary as part of ddebug_dumps")
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index c34c8649bcf..1adbafda53a 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -159,6 +159,9 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_IMAGE_LOAD_FORMATTED:
case PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA:
case PIPE_CAP_TGSI_DIV:
+ case PIPE_CAP_PACKED_UNIFORMS:
+ case PIPE_CAP_SHADER_SAMPLES_IDENTICAL:
+ case PIPE_CAP_GL_SPIRV:
return 1;
case PIPE_CAP_QUERY_SO_OVERFLOW:
@@ -195,7 +198,7 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
if (!sscreen->info.has_indirect_compute_dispatch)
return 420;
- return sscreen->options.enable_nir ? 460 : 450;
+ return 460;
case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET:
/* Optimal number for good TexSubImage performance on Polaris10. */
@@ -214,15 +217,6 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return sscreen->info.has_sparse_vm_mappings ?
RADEON_SPARSE_PAGE_SIZE : 0;
- case PIPE_CAP_PACKED_UNIFORMS:
- case PIPE_CAP_SHADER_SAMPLES_IDENTICAL:
- case PIPE_CAP_GL_SPIRV:
- return sscreen->options.enable_nir;
-
- case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF:
- if (sscreen->options.enable_nir)
- return 0;
- return 1;
/* Unsupported features. */
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
@@ -246,6 +240,7 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE:
case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS:
case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
+ case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF:
return 0;
case PIPE_CAP_FENCE_SIGNAL:
@@ -395,14 +390,14 @@ static int si_get_shader_param(struct pipe_screen* pscreen,
int ir = 1 << PIPE_SHADER_IR_NATIVE;
if (sscreen->info.has_indirect_compute_dispatch)
- ir |= 1 << PIPE_SHADER_IR_TGSI;
+ ir |= 1 << PIPE_SHADER_IR_NIR;
return ir;
}
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: {
uint64_t max_const_buffer_size;
- pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI,
+ pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_NIR,
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
&max_const_buffer_size);
return MIN2(max_const_buffer_size, INT_MAX);
@@ -444,13 +439,9 @@ static int si_get_shader_param(struct pipe_screen* pscreen,
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return SI_NUM_IMAGES;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
- if (sscreen->options.enable_nir)
- return 0;
- return 32;
+ return 0;
case PIPE_SHADER_CAP_PREFERRED_IR:
- if (sscreen->options.enable_nir)
- return PIPE_SHADER_IR_NIR;
- return PIPE_SHADER_IR_TGSI;
+ return PIPE_SHADER_IR_NIR;
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
return 4;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 2e3232d1cf0..755c768fb0b 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -660,7 +660,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
}
uint64_t max_threads_per_block;
- screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
+ screen->get_compute_param(screen, PIPE_SHADER_IR_NIR,
PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
&max_threads_per_block);
@@ -910,10 +910,6 @@ static void si_disk_cache_create(struct si_screen *sscreen)
/* These flags affect shader compilation. */
#define ALL_FLAGS (DBG(SI_SCHED) | DBG(GISEL))
uint64_t shader_debug_flags = sscreen->debug_flags & ALL_FLAGS;
- /* Reserve left-most bit for tgsi/nir selector */
- assert(!(shader_debug_flags & (1u << 31)));
- shader_debug_flags |= (uint32_t)
- ((sscreen->options.enable_nir & 0x1) << 31);
/* Add the high bits of 32-bit addresses, which affects
* how 32-bit addresses are expanded to 64 bits.
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index e6678e026cd..65a070b4570 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -25,14 +25,9 @@
#include <llvm/Config/llvm-config.h>
#include "util/u_memory.h"
-#include "util/u_string.h"
-#include "tgsi/tgsi_build.h"
#include "tgsi/tgsi_strings.h"
-#include "tgsi/tgsi_util.h"
-#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_from_mesa.h"
-#include "ac_binary.h"
#include "ac_exp_param.h"
#include "ac_shader_util.h"
#include "ac_rtld.h"
@@ -50,15 +45,7 @@ static const char scratch_rsrc_dword0_symbol[] =
static const char scratch_rsrc_dword1_symbol[] =
"SCRATCH_RSRC_DWORD1";
-static void si_init_shader_ctx(struct si_shader_context *ctx,
- struct si_screen *sscreen,
- struct ac_llvm_compiler *compiler,
- unsigned wave_size,
- bool nir);
-
-static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data);
+static void si_llvm_emit_barrier(struct si_shader_context *ctx);
static void si_dump_shader_key(const struct si_shader *shader, FILE *f);
@@ -596,15 +583,6 @@ void si_llvm_load_input_vs(
out[i] = ac_to_float(&ctx->ac, fetches[i]);
}
-static void declare_input_vs(
- struct si_shader_context *ctx,
- unsigned input_index,
- const struct tgsi_full_declaration *decl,
- LLVMValueRef out[4])
-{
- si_llvm_load_input_vs(ctx, input_index, out);
-}
-
LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx,
unsigned swizzle)
{
@@ -626,53 +604,6 @@ LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx,
}
}
-/**
- * Return the value of tgsi_ind_register for indexing.
- * This is the indirect index with the constant offset added to it.
- */
-LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx,
- const struct tgsi_ind_register *ind,
- unsigned addr_mul,
- int rel_index)
-{
- LLVMValueRef result;
-
- if (ind->File == TGSI_FILE_ADDRESS) {
- result = ctx->addrs[ind->Index][ind->Swizzle];
- result = LLVMBuildLoad(ctx->ac.builder, result, "");
- } else {
- struct tgsi_full_src_register src = {};
-
- src.Register.File = ind->File;
- src.Register.Index = ind->Index;
-
- /* Set the second index to 0 for constants. */
- if (ind->File == TGSI_FILE_CONSTANT)
- src.Register.Dimension = 1;
-
- result = ctx->bld_base.emit_fetch_funcs[ind->File](&ctx->bld_base, &src,
- TGSI_TYPE_SIGNED,
- ind->Swizzle);
- result = ac_to_integer(&ctx->ac, result);
- }
-
- return ac_build_imad(&ctx->ac, result, LLVMConstInt(ctx->i32, addr_mul, 0),
- LLVMConstInt(ctx->i32, rel_index, 0));
-}
-
-/**
- * Like si_get_indirect_index, but restricts the return value to a (possibly
- * undefined) value inside [0..num).
- */
-LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx,
- const struct tgsi_ind_register *ind,
- int rel_index, unsigned num)
-{
- LLVMValueRef result = si_get_indirect_index(ctx, ind, 1, rel_index);
-
- return si_llvm_bound_index(ctx, result, num);
-}
-
static LLVMValueRef get_dw_address_from_generic_indices(struct si_shader_context *ctx,
LLVMValueRef vertex_dw_stride,
LLVMValueRef base_addr,
@@ -701,78 +632,6 @@ static LLVMValueRef get_dw_address_from_generic_indices(struct si_shader_context
LLVMConstInt(ctx->i32, param * 4, 0), "");
}
-/**
- * Calculate a dword address given an input or output register and a stride.
- */
-static LLVMValueRef get_dw_address(struct si_shader_context *ctx,
- const struct tgsi_full_dst_register *dst,
- const struct tgsi_full_src_register *src,
- LLVMValueRef vertex_dw_stride,
- LLVMValueRef base_addr)
-{
- struct tgsi_shader_info *info = &ctx->shader->selector->info;
- ubyte *name, *index, *array_first;
- int input_index;
- struct tgsi_full_dst_register reg;
- LLVMValueRef vertex_index = NULL;
- LLVMValueRef ind_index = NULL;
-
- /* Set the register description. The address computation is the same
- * for sources and destinations. */
- if (src) {
- reg.Register.File = src->Register.File;
- reg.Register.Index = src->Register.Index;
- reg.Register.Indirect = src->Register.Indirect;
- reg.Register.Dimension = src->Register.Dimension;
- reg.Indirect = src->Indirect;
- reg.Dimension = src->Dimension;
- reg.DimIndirect = src->DimIndirect;
- } else
- reg = *dst;
-
- /* If the register is 2-dimensional (e.g. an array of vertices
- * in a primitive), calculate the base address of the vertex. */
- if (reg.Register.Dimension) {
- if (reg.Dimension.Indirect)
- vertex_index = si_get_indirect_index(ctx, &reg.DimIndirect,
- 1, reg.Dimension.Index);
- else
- vertex_index = LLVMConstInt(ctx->i32, reg.Dimension.Index, 0);
- }
-
- /* Get information about the register. */
- if (reg.Register.File == TGSI_FILE_INPUT) {
- name = info->input_semantic_name;
- index = info->input_semantic_index;
- array_first = info->input_array_first;
- } else if (reg.Register.File == TGSI_FILE_OUTPUT) {
- name = info->output_semantic_name;
- index = info->output_semantic_index;
- array_first = info->output_array_first;
- } else {
- assert(0);
- return NULL;
- }
-
- if (reg.Register.Indirect) {
- /* Add the relative address of the element. */
- if (reg.Indirect.ArrayID)
- input_index = array_first[reg.Indirect.ArrayID];
- else
- input_index = reg.Register.Index;
-
- ind_index = si_get_indirect_index(ctx, &reg.Indirect,
- 1, reg.Register.Index - input_index);
- } else {
- input_index = reg.Register.Index;
- }
-
- return get_dw_address_from_generic_indices(ctx, vertex_dw_stride,
- base_addr, vertex_index,
- ind_index, name[input_index],
- index[input_index]);
-}
-
/* The offchip buffer layout for TCS->TES is
*
* - attribute 0 of patch 0 vertex 0
@@ -854,65 +713,24 @@ static LLVMValueRef get_tcs_tes_buffer_address_from_generic_indices(
vertex_index, param_index);
}
-static LLVMValueRef get_tcs_tes_buffer_address_from_reg(
- struct si_shader_context *ctx,
- const struct tgsi_full_dst_register *dst,
- const struct tgsi_full_src_register *src)
+static LLVMValueRef si_build_gather_64bit(struct si_shader_context *ctx,
+ LLVMTypeRef type,
+ LLVMValueRef val1,
+ LLVMValueRef val2)
{
- struct tgsi_shader_info *info = &ctx->shader->selector->info;
- ubyte *name, *index, *array_first;
- struct tgsi_full_src_register reg;
- LLVMValueRef vertex_index = NULL;
- LLVMValueRef param_index = NULL;
- unsigned param_base;
-
- reg = src ? *src : tgsi_full_src_register_from_dst(dst);
-
- if (reg.Register.Dimension) {
- if (reg.Dimension.Indirect)
- vertex_index = si_get_indirect_index(ctx, &reg.DimIndirect,
- 1, reg.Dimension.Index);
- else
- vertex_index = LLVMConstInt(ctx->i32, reg.Dimension.Index, 0);
- }
-
- /* Get information about the register. */
- if (reg.Register.File == TGSI_FILE_INPUT) {
- name = info->input_semantic_name;
- index = info->input_semantic_index;
- array_first = info->input_array_first;
- } else if (reg.Register.File == TGSI_FILE_OUTPUT) {
- name = info->output_semantic_name;
- index = info->output_semantic_index;
- array_first = info->output_array_first;
- } else {
- assert(0);
- return NULL;
- }
-
- if (reg.Register.Indirect) {
- if (reg.Indirect.ArrayID)
- param_base = array_first[reg.Indirect.ArrayID];
- else
- param_base = reg.Register.Index;
-
- param_index = si_get_indirect_index(ctx, &reg.Indirect,
- 1, reg.Register.Index - param_base);
- } else {
- param_base = reg.Register.Index;
- }
-
- return get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index,
- param_index, name[param_base],
- index[param_base]);
+ LLVMValueRef values[2] = {
+ ac_to_integer(&ctx->ac, val1),
+ ac_to_integer(&ctx->ac, val2),
+ };
+ LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
+ return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
}
-static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
+static LLVMValueRef buffer_load(struct si_shader_context *ctx,
LLVMTypeRef type, unsigned swizzle,
LLVMValueRef buffer, LLVMValueRef offset,
LLVMValueRef base, bool can_speculate)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef value, value2;
LLVMTypeRef vec_type = LLVMVectorType(type, 4);
@@ -938,7 +756,7 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
value2 = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset,
swizzle * 4 + 4, ac_glc, can_speculate, false);
- return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
+ return si_build_gather_64bit(ctx, type, value, value2);
}
/**
@@ -948,30 +766,28 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
* \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
* \param dw_addr address in dwords
*/
-static LLVMValueRef lshs_lds_load(struct lp_build_tgsi_context *bld_base,
- LLVMTypeRef type, unsigned swizzle,
- LLVMValueRef dw_addr)
+static LLVMValueRef lshs_lds_load(struct si_shader_context *ctx,
+ LLVMTypeRef type, unsigned swizzle,
+ LLVMValueRef dw_addr)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef value;
if (swizzle == ~0) {
- LLVMValueRef values[TGSI_NUM_CHANNELS];
+ LLVMValueRef values[4];
- for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
- values[chan] = lshs_lds_load(bld_base, type, chan, dw_addr);
+ for (unsigned chan = 0; chan < 4; chan++)
+ values[chan] = lshs_lds_load(ctx, type, chan, dw_addr);
- return ac_build_gather_values(&ctx->ac, values,
- TGSI_NUM_CHANNELS);
+ return ac_build_gather_values(&ctx->ac, values, 4);
}
/* Split 64-bit loads. */
if (llvm_type_is_64bit(ctx, type)) {
LLVMValueRef lo, hi;
- lo = lshs_lds_load(bld_base, ctx->i32, swizzle, dw_addr);
- hi = lshs_lds_load(bld_base, ctx->i32, swizzle + 1, dw_addr);
- return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
+ lo = lshs_lds_load(ctx, ctx->i32, swizzle, dw_addr);
+ hi = lshs_lds_load(ctx, ctx->i32, swizzle + 1, dw_addr);
+ return si_build_gather_64bit(ctx, type, lo, hi);
}
dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
@@ -1049,21 +865,6 @@ static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx,
return ac_build_gather_values(&ctx->ac, desc, 4);
}
-static LLVMValueRef fetch_input_tcs(
- struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_src_register *reg,
- enum tgsi_opcode_type type, unsigned swizzle_in)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef dw_addr, stride;
- unsigned swizzle = swizzle_in & 0xffff;
- stride = get_tcs_in_vertex_dw_stride(ctx);
- dw_addr = get_tcs_in_current_patch_offset(ctx);
- dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
-
- return lshs_lds_load(bld_base, tgsi2llvmtype(bld_base, type), swizzle, dw_addr);
-}
-
static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi,
LLVMTypeRef type,
LLVMValueRef vertex_index,
@@ -1079,7 +880,6 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi,
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct tgsi_shader_info *info = &ctx->shader->selector->info;
- struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
LLVMValueRef dw_addr, stride;
ubyte name, index;
@@ -1125,49 +925,12 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi,
offset *= 2;
offset += component;
- value[i + component] = lshs_lds_load(bld_base, type, offset, dw_addr);
+ value[i + component] = lshs_lds_load(ctx, type, offset, dw_addr);
}
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
}
-static LLVMValueRef fetch_output_tcs(
- struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_src_register *reg,
- enum tgsi_opcode_type type, unsigned swizzle_in)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef dw_addr, stride;
- unsigned swizzle = (swizzle_in & 0xffff);
-
- if (reg->Register.Dimension) {
- stride = get_tcs_out_vertex_dw_stride(ctx);
- dw_addr = get_tcs_out_current_patch_offset(ctx);
- dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
- } else {
- dw_addr = get_tcs_out_current_patch_data_offset(ctx);
- dw_addr = get_dw_address(ctx, NULL, reg, NULL, dw_addr);
- }
-
- return lshs_lds_load(bld_base, tgsi2llvmtype(bld_base, type), swizzle, dw_addr);
-}
-
-static LLVMValueRef fetch_input_tes(
- struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_src_register *reg,
- enum tgsi_opcode_type type, unsigned swizzle_in)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef base, addr;
- unsigned swizzle = (swizzle_in & 0xffff);
-
- base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
- addr = get_tcs_tes_buffer_address_from_reg(ctx, NULL, reg);
-
- return buffer_load(bld_base, tgsi2llvmtype(bld_base, type), swizzle,
- ctx->tess_offchip_ring, base, addr, true);
-}
-
LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi,
LLVMTypeRef type,
LLVMValueRef vertex_index,
@@ -1226,110 +989,13 @@ LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi,
}
offset += component;
- value[i + component] = buffer_load(&ctx->bld_base, type, offset,
+ value[i + component] = buffer_load(ctx, type, offset,
ctx->tess_offchip_ring, base, addr, true);
}
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
}
-static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_instruction *inst,
- const struct tgsi_opcode_info *info,
- unsigned index,
- LLVMValueRef dst[4])
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- const struct tgsi_full_dst_register *reg = &inst->Dst[index];
- const struct tgsi_shader_info *sh_info = &ctx->shader->selector->info;
- unsigned chan_index;
- LLVMValueRef dw_addr, stride;
- LLVMValueRef buffer, base, buf_addr;
- LLVMValueRef values[4];
- bool skip_lds_store;
- bool is_tess_factor = false, is_tess_inner = false;
-
- /* Only handle per-patch and per-vertex outputs here.
- * Vectors will be lowered to scalars and this function will be called again.
- */
- if (reg->Register.File != TGSI_FILE_OUTPUT ||
- (dst[0] && LLVMGetTypeKind(LLVMTypeOf(dst[0])) == LLVMVectorTypeKind)) {
- si_llvm_emit_store(bld_base, inst, info, index, dst);
- return;
- }
-
- if (reg->Register.Dimension) {
- stride = get_tcs_out_vertex_dw_stride(ctx);
- dw_addr = get_tcs_out_current_patch_offset(ctx);
- dw_addr = get_dw_address(ctx, reg, NULL, stride, dw_addr);
- skip_lds_store = !sh_info->reads_pervertex_outputs;
- } else {
- dw_addr = get_tcs_out_current_patch_data_offset(ctx);
- dw_addr = get_dw_address(ctx, reg, NULL, NULL, dw_addr);
- skip_lds_store = !sh_info->reads_perpatch_outputs;
-
- if (!reg->Register.Indirect) {
- int name = sh_info->output_semantic_name[reg->Register.Index];
-
- /* Always write tess factors into LDS for the TCS epilog. */
- if (name == TGSI_SEMANTIC_TESSINNER ||
- name == TGSI_SEMANTIC_TESSOUTER) {
- /* The epilog doesn't read LDS if invocation 0 defines tess factors. */
- skip_lds_store = !sh_info->reads_tessfactor_outputs &&
- ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs;
- is_tess_factor = true;
- is_tess_inner = name == TGSI_SEMANTIC_TESSINNER;
- }
- }
- }
-
- buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
-
- base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
- buf_addr = get_tcs_tes_buffer_address_from_reg(ctx, reg, NULL);
-
- uint32_t writemask = reg->Register.WriteMask;
- while (writemask) {
- chan_index = u_bit_scan(&writemask);
- LLVMValueRef value = dst[chan_index];
-
- if (inst->Instruction.Saturate)
- value = ac_build_clamp(&ctx->ac, value);
-
- /* Skip LDS stores if there is no LDS read of this output. */
- if (!skip_lds_store)
- lshs_lds_store(ctx, chan_index, dw_addr, value);
-
- value = ac_to_integer(&ctx->ac, value);
- values[chan_index] = value;
-
- if (reg->Register.WriteMask != 0xF && !is_tess_factor) {
- ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
- buf_addr, base,
- 4 * chan_index, ac_glc);
- }
-
- /* Write tess factors into VGPRs for the epilog. */
- if (is_tess_factor &&
- ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs) {
- if (!is_tess_inner) {
- LLVMBuildStore(ctx->ac.builder, value, /* outer */
- ctx->invoc0_tess_factors[chan_index]);
- } else if (chan_index < 2) {
- LLVMBuildStore(ctx->ac.builder, value, /* inner */
- ctx->invoc0_tess_factors[4 + chan_index]);
- }
- }
- }
-
- if (reg->Register.WriteMask == 0xF && !is_tess_factor) {
- LLVMValueRef value = ac_build_gather_values(&ctx->ac,
- values, 4);
- ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr,
- base, 0, ac_glc);
- }
-}
-
static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
const struct nir_variable *var,
LLVMValueRef vertex_index,
@@ -1452,14 +1118,13 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
}
}
-LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
- unsigned input_index,
- unsigned vtx_offset_param,
- LLVMTypeRef type,
- unsigned swizzle)
+static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
+ unsigned input_index,
+ unsigned vtx_offset_param,
+ LLVMTypeRef type,
+ unsigned swizzle)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
struct si_shader *shader = ctx->shader;
LLVMValueRef vtx_offset, soffset;
struct tgsi_shader_info *info = &shader->selector->info;
@@ -1512,14 +1177,13 @@ LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
/* GFX6: input load from the ESGS ring in memory. */
if (swizzle == ~0) {
- LLVMValueRef values[TGSI_NUM_CHANNELS];
+ LLVMValueRef values[4];
unsigned chan;
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ for (chan = 0; chan < 4; chan++) {
values[chan] = si_llvm_load_input_gs(abi, input_index, vtx_offset_param,
type, chan);
}
- return ac_build_gather_values(&ctx->ac, values,
- TGSI_NUM_CHANNELS);
+ return ac_build_gather_values(&ctx->ac, values, 4);
}
/* Get the vertex offset parameter on GFX6. */
@@ -1540,7 +1204,7 @@ LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
value2 = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1,
ctx->i32_0, vtx_offset, soffset,
0, ac_glc, true, false);
- return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
+ return si_build_gather_64bit(ctx, type, value, value2);
}
return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
}
@@ -1570,58 +1234,6 @@ static LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
}
-static LLVMValueRef fetch_input_gs(
- struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_src_register *reg,
- enum tgsi_opcode_type type,
- unsigned swizzle_in)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- struct tgsi_shader_info *info = &ctx->shader->selector->info;
- unsigned swizzle = swizzle_in & 0xffff;
-
- unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
- if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
- return si_get_primitive_id(ctx, swizzle);
-
- if (!reg->Register.Dimension)
- return NULL;
-
- return si_llvm_load_input_gs(&ctx->abi, reg->Register.Index,
- reg->Dimension.Index,
- tgsi2llvmtype(bld_base, type),
- swizzle);
-}
-
-static int lookup_interp_param_index(unsigned interpolate, unsigned location)
-{
- switch (interpolate) {
- case TGSI_INTERPOLATE_CONSTANT:
- return 0;
-
- case TGSI_INTERPOLATE_LINEAR:
- if (location == TGSI_INTERPOLATE_LOC_SAMPLE)
- return SI_PARAM_LINEAR_SAMPLE;
- else if (location == TGSI_INTERPOLATE_LOC_CENTROID)
- return SI_PARAM_LINEAR_CENTROID;
- else
- return SI_PARAM_LINEAR_CENTER;
- break;
- case TGSI_INTERPOLATE_COLOR:
- case TGSI_INTERPOLATE_PERSPECTIVE:
- if (location == TGSI_INTERPOLATE_LOC_SAMPLE)
- return SI_PARAM_PERSP_SAMPLE;
- else if (location == TGSI_INTERPOLATE_LOC_CENTROID)
- return SI_PARAM_PERSP_CENTROID;
- else
- return SI_PARAM_PERSP_CENTER;
- break;
- default:
- fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
- return -1;
- }
-}
-
static LLVMValueRef si_build_fs_interp(struct si_shader_context *ctx,
unsigned attr_index, unsigned chan,
LLVMValueRef prim_mask,
@@ -1654,9 +1266,8 @@ static LLVMValueRef si_build_fs_interp(struct si_shader_context *ctx,
* @param face SI_PARAM_FRONT_FACE
* @param result the return value (4 components)
*/
-static void interp_fs_input(struct si_shader_context *ctx,
+static void interp_fs_color(struct si_shader_context *ctx,
unsigned input_index,
- unsigned semantic_name,
unsigned semantic_index,
unsigned num_interp_inputs,
unsigned colors_read_mask,
@@ -1693,8 +1304,7 @@ static void interp_fs_input(struct si_shader_context *ctx,
ctx->i32_1, "");
}
- if (semantic_name == TGSI_SEMANTIC_COLOR &&
- ctx->shader->key.part.ps.prolog.color_two_side) {
+ if (ctx->shader->key.part.ps.prolog.color_two_side) {
LLVMValueRef is_face_positive;
/* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1",
@@ -1707,7 +1317,7 @@ static void interp_fs_input(struct si_shader_context *ctx,
is_face_positive = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE,
face, ctx->i32_0, "");
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ for (chan = 0; chan < 4; chan++) {
LLVMValueRef front, back;
front = si_build_fs_interp(ctx,
@@ -1723,14 +1333,8 @@ static void interp_fs_input(struct si_shader_context *ctx,
back,
"");
}
- } else if (semantic_name == TGSI_SEMANTIC_FOG) {
- result[0] = si_build_fs_interp(ctx, input_index,
- 0, prim_mask, i, j);
- result[1] =
- result[2] = LLVMConstReal(ctx->f32, 0.0f);
- result[3] = LLVMConstReal(ctx->f32, 1.0f);
} else {
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ for (chan = 0; chan < 4; chan++) {
result[chan] = si_build_fs_interp(ctx,
input_index, chan,
prim_mask, i, j);
@@ -1738,60 +1342,6 @@ static void interp_fs_input(struct si_shader_context *ctx,
}
}
-void si_llvm_load_input_fs(
- struct si_shader_context *ctx,
- unsigned input_index,
- LLVMValueRef out[4])
-{
- struct si_shader *shader = ctx->shader;
- struct tgsi_shader_info *info = &shader->selector->info;
- LLVMValueRef main_fn = ctx->main_fn;
- LLVMValueRef interp_param = NULL;
- int interp_param_idx;
- enum tgsi_semantic semantic_name = info->input_semantic_name[input_index];
- unsigned semantic_index = info->input_semantic_index[input_index];
- enum tgsi_interpolate_mode interp_mode = info->input_interpolate[input_index];
- enum tgsi_interpolate_loc interp_loc = info->input_interpolate_loc[input_index];
-
- /* Get colors from input VGPRs (set by the prolog). */
- if (semantic_name == TGSI_SEMANTIC_COLOR) {
- unsigned colors_read = shader->selector->info.colors_read;
- unsigned mask = colors_read >> (semantic_index * 4);
- unsigned offset = SI_PARAM_POS_FIXED_PT + 1 +
- (semantic_index ? util_bitcount(colors_read & 0xf) : 0);
- LLVMValueRef undef = LLVMGetUndef(ctx->f32);
-
- out[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef;
- out[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef;
- out[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef;
- out[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef;
- return;
- }
-
- interp_param_idx = lookup_interp_param_index(interp_mode, interp_loc);
- if (interp_param_idx == -1)
- return;
- else if (interp_param_idx) {
- interp_param = LLVMGetParam(ctx->main_fn, interp_param_idx);
- }
-
- interp_fs_input(ctx, input_index, semantic_name,
- semantic_index, 0, /* this param is unused */
- shader->selector->info.colors_read, interp_param,
- ac_get_arg(&ctx->ac, ctx->args.prim_mask),
- LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE),
- &out[0]);
-}
-
-static void declare_input_fs(
- struct si_shader_context *ctx,
- unsigned input_index,
- const struct tgsi_full_declaration *decl,
- LLVMValueRef out[4])
-{
- si_llvm_load_input_fs(ctx, input_index, out);
-}
-
LLVMValueRef si_get_sample_id(struct si_shader_context *ctx)
{
return si_unpack_param(ctx, ctx->args.ancillary, 8, 4);
@@ -1913,7 +1463,7 @@ static LLVMValueRef load_tess_level(struct si_shader_context *ctx,
addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL,
LLVMConstInt(ctx->i32, param, 0));
- return buffer_load(&ctx->bld_base, ctx->f32,
+ return buffer_load(ctx, ctx->f32,
~0, ctx->tess_offchip_ring, base, addr, true);
}
@@ -1982,211 +1532,6 @@ static LLVMValueRef si_load_patch_vertices_in(struct ac_shader_abi *abi)
unreachable("invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
}
-void si_load_system_value(struct si_shader_context *ctx,
- unsigned index,
- const struct tgsi_full_declaration *decl)
-{
- LLVMValueRef value = 0;
-
- assert(index < RADEON_LLVM_MAX_SYSTEM_VALUES);
-
- switch (decl->Semantic.Name) {
- case TGSI_SEMANTIC_INSTANCEID:
- value = ctx->abi.instance_id;
- break;
-
- case TGSI_SEMANTIC_VERTEXID:
- value = LLVMBuildAdd(ctx->ac.builder,
- ctx->abi.vertex_id,
- ac_get_arg(&ctx->ac, ctx->args.base_vertex), "");
- break;
-
- case TGSI_SEMANTIC_VERTEXID_NOBASE:
- /* Unused. Clarify the meaning in indexed vs. non-indexed
- * draws if this is ever used again. */
- assert(false);
- break;
-
- case TGSI_SEMANTIC_BASEVERTEX:
- value = get_base_vertex(&ctx->abi);
- break;
-
- case TGSI_SEMANTIC_BASEINSTANCE:
- value = ac_get_arg(&ctx->ac, ctx->args.start_instance);
- break;
-
- case TGSI_SEMANTIC_DRAWID:
- value = ac_get_arg(&ctx->ac, ctx->args.draw_id);
- break;
-
- case TGSI_SEMANTIC_INVOCATIONID:
- if (ctx->type == PIPE_SHADER_TESS_CTRL) {
- value = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
- } else if (ctx->type == PIPE_SHADER_GEOMETRY) {
- if (ctx->screen->info.chip_class >= GFX10) {
- value = LLVMBuildAnd(ctx->ac.builder,
- ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id),
- LLVMConstInt(ctx->i32, 127, 0), "");
- } else {
- value = ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id);
- }
- } else {
- assert(!"INVOCATIONID not implemented");
- }
- break;
-
- case TGSI_SEMANTIC_POSITION:
- {
- LLVMValueRef pos[4] = {
- LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT),
- LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT),
- LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT),
- ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
- LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT)),
- };
- value = ac_build_gather_values(&ctx->ac, pos, 4);
- break;
- }
-
- case TGSI_SEMANTIC_FACE:
- value = ac_get_arg(&ctx->ac, ctx->args.front_face);
- break;
-
- case TGSI_SEMANTIC_SAMPLEID:
- value = si_get_sample_id(ctx);
- break;
-
- case TGSI_SEMANTIC_SAMPLEPOS: {
- LLVMValueRef pos[4] = {
- LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT),
- LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT),
- LLVMConstReal(ctx->f32, 0),
- LLVMConstReal(ctx->f32, 0)
- };
- pos[0] = ac_build_fract(&ctx->ac, pos[0], 32);
- pos[1] = ac_build_fract(&ctx->ac, pos[1], 32);
- value = ac_build_gather_values(&ctx->ac, pos, 4);
- break;
- }
-
- case TGSI_SEMANTIC_SAMPLEMASK:
- /* This can only occur with the OpenGL Core profile, which
- * doesn't support smoothing.
- */
- value = LLVMGetParam(ctx->main_fn, SI_PARAM_SAMPLE_COVERAGE);
- break;
-
- case TGSI_SEMANTIC_TESSCOORD:
- value = si_load_tess_coord(&ctx->abi);
- break;
-
- case TGSI_SEMANTIC_VERTICESIN:
- value = si_load_patch_vertices_in(&ctx->abi);
- break;
-
- case TGSI_SEMANTIC_TESSINNER:
- case TGSI_SEMANTIC_TESSOUTER:
- value = load_tess_level(ctx, decl->Semantic.Name);
- break;
-
- case TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL:
- case TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL:
- value = load_tess_level_default(ctx, decl->Semantic.Name);
- break;
-
- case TGSI_SEMANTIC_PRIMID:
- value = si_get_primitive_id(ctx, 0);
- break;
-
- case TGSI_SEMANTIC_GRID_SIZE:
- value = ac_get_arg(&ctx->ac, ctx->args.num_work_groups);
- break;
-
- case TGSI_SEMANTIC_BLOCK_SIZE:
- value = get_block_size(&ctx->abi);
- break;
-
- case TGSI_SEMANTIC_BLOCK_ID:
- {
- LLVMValueRef values[3];
-
- for (int i = 0; i < 3; i++) {
- values[i] = ctx->i32_0;
- if (ctx->args.workgroup_ids[i].used) {
- values[i] = ac_get_arg(&ctx->ac, ctx->args.workgroup_ids[i]);
- }
- }
- value = ac_build_gather_values(&ctx->ac, values, 3);
- break;
- }
-
- case TGSI_SEMANTIC_THREAD_ID:
- value = ac_get_arg(&ctx->ac, ctx->args.local_invocation_ids);
- break;
-
- case TGSI_SEMANTIC_HELPER_INVOCATION:
- value = ac_build_load_helper_invocation(&ctx->ac);
- break;
-
- case TGSI_SEMANTIC_SUBGROUP_SIZE:
- value = LLVMConstInt(ctx->i32, ctx->ac.wave_size, 0);
- break;
-
- case TGSI_SEMANTIC_SUBGROUP_INVOCATION:
- value = ac_get_thread_id(&ctx->ac);
- break;
-
- case TGSI_SEMANTIC_SUBGROUP_EQ_MASK:
- {
- LLVMValueRef id = ac_get_thread_id(&ctx->ac);
- if (ctx->ac.wave_size == 64)
- id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, "");
- value = LLVMBuildShl(ctx->ac.builder,
- LLVMConstInt(ctx->ac.iN_wavemask, 1, 0), id, "");
- if (ctx->ac.wave_size == 32)
- value = LLVMBuildZExt(ctx->ac.builder, value, ctx->i64, "");
- value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, "");
- break;
- }
-
- case TGSI_SEMANTIC_SUBGROUP_GE_MASK:
- case TGSI_SEMANTIC_SUBGROUP_GT_MASK:
- case TGSI_SEMANTIC_SUBGROUP_LE_MASK:
- case TGSI_SEMANTIC_SUBGROUP_LT_MASK:
- {
- LLVMValueRef id = ac_get_thread_id(&ctx->ac);
- if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_GT_MASK ||
- decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK) {
- /* All bits set except LSB */
- value = LLVMConstInt(ctx->ac.iN_wavemask, -2, 0);
- } else {
- /* All bits set */
- value = LLVMConstInt(ctx->ac.iN_wavemask, -1, 0);
- }
- if (ctx->ac.wave_size == 64)
- id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, "");
- value = LLVMBuildShl(ctx->ac.builder, value, id, "");
- if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK ||
- decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LT_MASK)
- value = LLVMBuildNot(ctx->ac.builder, value, "");
- if (ctx->ac.wave_size == 32)
- value = LLVMBuildZExt(ctx->ac.builder, value, ctx->i64, "");
- value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, "");
- break;
- }
-
- case TGSI_SEMANTIC_CS_USER_DATA_AMD:
- value = ac_get_arg(&ctx->ac, ctx->cs_user_data);
- break;
-
- default:
- assert(!"unknown system value");
- return;
- }
-
- ctx->system_values[index] = value;
-}
-
void si_declare_compute_memory(struct si_shader_context *ctx)
{
struct si_shader_selector *sel = ctx->shader->selector;
@@ -2206,15 +1551,6 @@ void si_declare_compute_memory(struct si_shader_context *ctx)
ctx->ac.lds = LLVMBuildBitCast(ctx->ac.builder, var, i8p, "");
}
-void si_tgsi_declare_compute_memory(struct si_shader_context *ctx,
- const struct tgsi_full_declaration *decl)
-{
- assert(decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED);
- assert(decl->Range.First == decl->Range.Last);
-
- si_declare_compute_memory(ctx);
-}
-
static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *ctx)
{
LLVMValueRef ptr =
@@ -2256,15 +1592,6 @@ static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *c
return ac_build_gather_values(&ctx->ac, desc_elems, 4);
}
-static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)
-{
- LLVMValueRef list_ptr = ac_get_arg(&ctx->ac,
- ctx->const_and_shader_buffers);
-
- return ac_build_load_to_sgpr(&ctx->ac, list_ptr,
- LLVMConstInt(ctx->i32, si_get_constbuf_slot(i), 0));
-}
-
static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
@@ -2299,72 +1626,6 @@ load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, bool write)
return ac_build_load_to_sgpr(&ctx->ac, rsrc_ptr, index);
}
-static LLVMValueRef fetch_constant(
- struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_src_register *reg,
- enum tgsi_opcode_type type,
- unsigned swizzle_in)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- struct si_shader_selector *sel = ctx->shader->selector;
- const struct tgsi_ind_register *ireg = &reg->Indirect;
- unsigned buf, idx;
- unsigned swizzle = swizzle_in & 0xffff;
-
- LLVMValueRef addr, bufp;
-
- if (swizzle_in == LP_CHAN_ALL) {
- unsigned chan;
- LLVMValueRef values[4];
- for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
- values[chan] = fetch_constant(bld_base, reg, type, chan);
-
- return ac_build_gather_values(&ctx->ac, values, 4);
- }
-
- /* Split 64-bit loads. */
- if (tgsi_type_is_64bit(type)) {
- LLVMValueRef lo, hi;
-
- lo = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, swizzle);
- hi = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, (swizzle_in >> 16));
- return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
- lo, hi);
- }
-
- idx = reg->Register.Index * 4 + swizzle;
- if (reg->Register.Indirect) {
- addr = si_get_indirect_index(ctx, ireg, 16, idx * 4);
- } else {
- addr = LLVMConstInt(ctx->i32, idx * 4, 0);
- }
-
- /* Fast path when user data SGPRs point to constant buffer 0 directly. */
- if (sel->info.const_buffers_declared == 1 &&
- sel->info.shader_buffers_declared == 0) {
- LLVMValueRef desc = load_const_buffer_desc_fast_path(ctx);
- LLVMValueRef result = buffer_load_const(ctx, desc, addr);
- return bitcast(bld_base, type, result);
- }
-
- assert(reg->Register.Dimension);
- buf = reg->Dimension.Index;
-
- if (reg->Dimension.Indirect) {
- LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers);
- LLVMValueRef index;
- index = si_get_bounded_indirect_index(ctx, &reg->DimIndirect,
- reg->Dimension.Index,
- ctx->num_const_buffers);
- index = LLVMBuildAdd(ctx->ac.builder, index,
- LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
- bufp = ac_build_load_to_sgpr(&ctx->ac, ptr, index);
- } else
- bufp = load_const_buffer_desc(ctx, buf);
-
- return bitcast(bld_base, type, buffer_load_const(ctx, bufp, addr));
-}
-
/* Initialize arguments for the shader export intrinsic */
static void si_llvm_init_export_args(struct si_shader_context *ctx,
LLVMValueRef *values,
@@ -2495,11 +1756,8 @@ static void si_llvm_init_export_args(struct si_shader_context *ctx,
}
}
-static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
- LLVMValueRef alpha)
+static void si_alpha_test(struct si_shader_context *ctx, LLVMValueRef alpha)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
if (ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_NEVER) {
static LLVMRealPredicate cond_map[PIPE_FUNC_ALWAYS + 1] = {
[PIPE_FUNC_LESS] = LLVMRealOLT,
@@ -2522,11 +1780,10 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
}
}
-static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
+static LLVMValueRef si_scale_alpha_by_sample_mask(struct si_shader_context *ctx,
LLVMValueRef alpha,
unsigned samplemask_param)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef coverage;
/* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
@@ -2569,8 +1826,8 @@ static void si_llvm_emit_clipvertex(struct si_shader_context *ctx,
args->out[3] = LLVMConstReal(ctx->f32, 0.0f);
/* Compute dot products of position and user clip plane vectors */
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- for (const_chan = 0; const_chan < TGSI_NUM_CHANNELS; const_chan++) {
+ for (chan = 0; chan < 4; chan++) {
+ for (const_chan = 0; const_chan < 4; const_chan++) {
LLVMValueRef addr =
LLVMConstInt(ctx->i32, ((reg_index * 4 + chan) * 4 +
const_chan) * 4, 0);
@@ -3030,9 +2287,8 @@ void si_llvm_export_vs(struct si_shader_context *ctx,
* Forward all outputs from the vertex shader to the TES. This is only used
* for the fixed function TCS.
*/
-static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
+static void si_copy_tcs_inputs(struct si_shader_context *ctx)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef invocation_id, buffer, buffer_offset;
LLVMValueRef lds_vertex_stride, lds_base;
uint64_t inputs;
@@ -3059,21 +2315,20 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
invocation_id,
LLVMConstInt(ctx->i32, i, 0));
- LLVMValueRef value = lshs_lds_load(bld_base, ctx->ac.i32, ~0, lds_ptr);
+ LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr);
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr,
buffer_offset, 0, ac_glc);
}
}
-static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
+static void si_write_tess_factors(struct si_shader_context *ctx,
LLVMValueRef rel_patch_id,
LLVMValueRef invocation_id,
LLVMValueRef tcs_out_current_patch_data_offset,
LLVMValueRef invoc0_tf_outer[4],
LLVMValueRef invoc0_tf_inner[2])
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
struct si_shader *shader = ctx->shader;
unsigned tess_inner_index, tess_outer_index;
LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
@@ -3082,7 +2337,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
/* Add a barrier before loading tess factors from LDS. */
if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def)
- si_llvm_emit_barrier(NULL, bld_base, NULL);
+ si_llvm_emit_barrier(ctx);
/* Do this only for invocation 0, because the tess levels are per-patch,
* not per-vertex.
@@ -3144,11 +2399,11 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
for (i = 0; i < outer_comps; i++) {
outer[i] = out[i] =
- lshs_lds_load(bld_base, ctx->ac.i32, i, lds_outer);
+ lshs_lds_load(ctx, ctx->ac.i32, i, lds_outer);
}
for (i = 0; i < inner_comps; i++) {
inner[i] = out[outer_comps+i] =
- lshs_lds_load(bld_base, ctx->ac.i32, i, lds_inner);
+ lshs_lds_load(ctx, ctx->ac.i32, i, lds_inner);
}
}
@@ -3279,11 +2534,10 @@ static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi,
LLVMValueRef *addrs)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
- si_copy_tcs_inputs(bld_base);
+ si_copy_tcs_inputs(ctx);
rel_patch_id = get_rel_patch_id(ctx);
invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
@@ -3595,12 +2849,6 @@ static void si_llvm_emit_gs_epilogue(struct ac_shader_abi *abi,
emit_gs_epilogue(ctx);
}
-static void si_tgsi_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_gs_epilogue(ctx);
-}
-
static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi,
unsigned max_outputs,
LLVMValueRef *addrs)
@@ -3677,24 +2925,15 @@ static void si_llvm_emit_prim_discard_cs_epilogue(struct ac_shader_abi *abi,
ctx->return_value = ret;
}
-static void si_tgsi_emit_epilogue(struct lp_build_tgsi_context *bld_base)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- ctx->abi.emit_outputs(&ctx->abi, RADEON_LLVM_MAX_OUTPUTS,
- &ctx->outputs[0][0]);
-}
-
struct si_ps_exports {
unsigned num;
struct ac_export_args args[10];
};
-static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
+static void si_export_mrt_z(struct si_shader_context *ctx,
LLVMValueRef depth, LLVMValueRef stencil,
LLVMValueRef samplemask, struct si_ps_exports *exp)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
struct ac_export_args args;
ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args);
@@ -3702,12 +2941,11 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
memcpy(&exp->args[exp->num++], &args, sizeof(args));
}
-static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
+static void si_export_mrt_color(struct si_shader_context *ctx,
LLVMValueRef *color, unsigned index,
unsigned samplemask_param,
bool is_last, struct si_ps_exports *exp)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
int i;
/* Clamp color */
@@ -3722,11 +2960,11 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
/* Alpha test */
if (index == 0 &&
ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS)
- si_alpha_test(bld_base, color[3]);
+ si_alpha_test(ctx, color[3]);
/* Line & polygon smoothing */
if (ctx->shader->key.part.ps.epilog.poly_line_smoothing)
- color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3],
+ color[3] = si_scale_alpha_by_sample_mask(ctx, color[3],
samplemask_param);
/* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
@@ -3873,345 +3111,6 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi,
ctx->return_value = ret;
}
-static void membar_emit(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef src0 = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
- unsigned flags = LLVMConstIntGetZExtValue(src0);
- unsigned wait_flags = 0;
-
- if (flags & TGSI_MEMBAR_THREAD_GROUP)
- wait_flags |= AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE;
-
- if (flags & (TGSI_MEMBAR_ATOMIC_BUFFER |
- TGSI_MEMBAR_SHADER_BUFFER |
- TGSI_MEMBAR_SHADER_IMAGE))
- wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
-
- if (flags & TGSI_MEMBAR_SHARED)
- wait_flags |= AC_WAIT_LGKM;
-
- ac_build_waitcnt(&ctx->ac, wait_flags);
-}
-
-static void clock_emit(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef tmp = ac_build_shader_clock(&ctx->ac);
-
- emit_data->output[0] =
- LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->i32_0, "");
- emit_data->output[1] =
- LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->i32_1, "");
-}
-
-static void si_llvm_emit_ddxy(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- unsigned opcode = emit_data->info->opcode;
- LLVMValueRef val;
- int idx;
- unsigned mask;
-
- if (opcode == TGSI_OPCODE_DDX_FINE)
- mask = AC_TID_MASK_LEFT;
- else if (opcode == TGSI_OPCODE_DDY_FINE)
- mask = AC_TID_MASK_TOP;
- else
- mask = AC_TID_MASK_TOP_LEFT;
-
- /* for DDX we want to next X pixel, DDY next Y pixel. */
- idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
-
- val = ac_to_integer(&ctx->ac, emit_data->args[0]);
- val = ac_build_ddxy(&ctx->ac, mask, idx, val);
- emit_data->output[emit_data->chan] = val;
-}
-
-static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- struct si_shader *shader = ctx->shader;
- const struct tgsi_shader_info *info = &shader->selector->info;
- LLVMValueRef interp_param;
- const struct tgsi_full_instruction *inst = emit_data->inst;
- const struct tgsi_full_src_register *input = &inst->Src[0];
- int input_base, input_array_size;
- int chan;
- int i;
- LLVMValueRef prim_mask = ac_get_arg(&ctx->ac, ctx->args.prim_mask);
- LLVMValueRef array_idx, offset_x = NULL, offset_y = NULL;
- int interp_param_idx;
- unsigned interp;
- unsigned location;
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
- /* offset is in second src, first two channels */
- offset_x = lp_build_emit_fetch(bld_base, emit_data->inst, 1,
- TGSI_CHAN_X);
- offset_y = lp_build_emit_fetch(bld_base, emit_data->inst, 1,
- TGSI_CHAN_Y);
- } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
- LLVMValueRef sample_position;
- LLVMValueRef sample_id;
- LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f);
-
- /* fetch sample ID, then fetch its sample position,
- * and place into first two channels.
- */
- sample_id = lp_build_emit_fetch(bld_base,
- emit_data->inst, 1, TGSI_CHAN_X);
- sample_id = ac_to_integer(&ctx->ac, sample_id);
-
- /* Section 8.13.2 (Interpolation Functions) of the OpenGL Shading
- * Language 4.50 spec says about interpolateAtSample:
- *
- * "Returns the value of the input interpolant variable at
- * the location of sample number sample. If multisample
- * buffers are not available, the input variable will be
- * evaluated at the center of the pixel. If sample sample
- * does not exist, the position used to interpolate the
- * input variable is undefined."
- *
- * This means that sample_id values outside of the valid are
- * in fact valid input, and the usual mechanism for loading the
- * sample position doesn't work.
- */
- if (ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center) {
- LLVMValueRef center[4] = {
- LLVMConstReal(ctx->f32, 0.5),
- LLVMConstReal(ctx->f32, 0.5),
- ctx->ac.f32_0,
- ctx->ac.f32_0,
- };
-
- sample_position = ac_build_gather_values(&ctx->ac, center, 4);
- } else {
- sample_position = load_sample_position(&ctx->abi, sample_id);
- }
-
- offset_x = LLVMBuildExtractElement(ctx->ac.builder, sample_position,
- ctx->i32_0, "");
-
- offset_x = LLVMBuildFSub(ctx->ac.builder, offset_x, halfval, "");
- offset_y = LLVMBuildExtractElement(ctx->ac.builder, sample_position,
- ctx->i32_1, "");
- offset_y = LLVMBuildFSub(ctx->ac.builder, offset_y, halfval, "");
- }
-
- assert(input->Register.File == TGSI_FILE_INPUT);
-
- if (input->Register.Indirect) {
- unsigned array_id = input->Indirect.ArrayID;
-
- if (array_id) {
- input_base = info->input_array_first[array_id];
- input_array_size = info->input_array_last[array_id] - input_base + 1;
- } else {
- input_base = inst->Src[0].Register.Index;
- input_array_size = info->num_inputs - input_base;
- }
-
- array_idx = si_get_indirect_index(ctx, &input->Indirect,
- 1, input->Register.Index - input_base);
- } else {
- input_base = inst->Src[0].Register.Index;
- input_array_size = 1;
- array_idx = ctx->i32_0;
- }
-
- interp = shader->selector->info.input_interpolate[input_base];
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
- inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE)
- location = TGSI_INTERPOLATE_LOC_CENTER;
- else
- location = TGSI_INTERPOLATE_LOC_CENTROID;
-
- interp_param_idx = lookup_interp_param_index(interp, location);
- if (interp_param_idx == -1)
- return;
- else if (interp_param_idx)
- interp_param = LLVMGetParam(ctx->main_fn, interp_param_idx);
- else
- interp_param = NULL;
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
- inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
- LLVMValueRef ij_out[2];
- LLVMValueRef ddxy_out = ac_build_ddxy_interp(&ctx->ac, interp_param);
-
- /*
- * take the I then J parameters, and the DDX/Y for it, and
- * calculate the IJ inputs for the interpolator.
- * temp1 = ddx * offset/sample.x + I;
- * interp_param.I = ddy * offset/sample.y + temp1;
- * temp1 = ddx * offset/sample.x + J;
- * interp_param.J = ddy * offset/sample.y + temp1;
- */
- for (i = 0; i < 2; i++) {
- LLVMValueRef ix_ll = LLVMConstInt(ctx->i32, i, 0);
- LLVMValueRef iy_ll = LLVMConstInt(ctx->i32, i + 2, 0);
- LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder,
- ddxy_out, ix_ll, "");
- LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder,
- ddxy_out, iy_ll, "");
- LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder,
- interp_param, ix_ll, "");
- LLVMValueRef temp;
-
- interp_el = ac_to_float(&ctx->ac, interp_el);
-
- temp = ac_build_fmad(&ctx->ac, ddx_el, offset_x, interp_el);
- ij_out[i] = ac_build_fmad(&ctx->ac, ddy_el, offset_y, temp);
- }
- interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
- }
-
- if (interp_param)
- interp_param = ac_to_float(&ctx->ac, interp_param);
-
- for (chan = 0; chan < 4; chan++) {
- LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->f32, input_array_size));
- unsigned schan = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], chan);
-
- for (unsigned idx = 0; idx < input_array_size; ++idx) {
- LLVMValueRef v, i = NULL, j = NULL;
-
- if (interp_param) {
- i = LLVMBuildExtractElement(
- ctx->ac.builder, interp_param, ctx->i32_0, "");
- j = LLVMBuildExtractElement(
- ctx->ac.builder, interp_param, ctx->i32_1, "");
- }
- v = si_build_fs_interp(ctx, input_base + idx, schan,
- prim_mask, i, j);
-
- gather = LLVMBuildInsertElement(ctx->ac.builder,
- gather, v, LLVMConstInt(ctx->i32, idx, false), "");
- }
-
- emit_data->output[chan] = LLVMBuildExtractElement(
- ctx->ac.builder, gather, array_idx, "");
- }
-}
-
-static void vote_all_emit(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, emit_data->args[0]);
- emit_data->output[emit_data->chan] =
- LLVMBuildSExt(ctx->ac.builder, tmp, ctx->i32, "");
-}
-
-static void vote_any_emit(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, emit_data->args[0]);
- emit_data->output[emit_data->chan] =
- LLVMBuildSExt(ctx->ac.builder, tmp, ctx->i32, "");
-}
-
-static void vote_eq_emit(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- LLVMValueRef tmp = ac_build_vote_eq(&ctx->ac, emit_data->args[0]);
- emit_data->output[emit_data->chan] =
- LLVMBuildSExt(ctx->ac.builder, tmp, ctx->i32, "");
-}
-
-static void ballot_emit(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef tmp;
-
- tmp = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
- tmp = ac_build_ballot(&ctx->ac, tmp);
-
- emit_data->output[0] = LLVMBuildTrunc(builder, tmp, ctx->i32, "");
-
- if (ctx->ac.wave_size == 32) {
- emit_data->output[1] = ctx->i32_0;
- } else {
- tmp = LLVMBuildLShr(builder, tmp, LLVMConstInt(ctx->i64, 32, 0), "");
- emit_data->output[1] = LLVMBuildTrunc(builder, tmp, ctx->i32, "");
- }
-}
-
-static void read_lane_emit(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_READ_INVOC) {
- emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
- 0, emit_data->src_chan);
-
- /* Always read the source invocation (= lane) from the X channel. */
- emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
- 1, TGSI_CHAN_X);
- emit_data->arg_count = 2;
- }
-
- /* We currently have no other way to prevent LLVM from lifting the icmp
- * calls to a dominating basic block.
- */
- ac_build_optimization_barrier(&ctx->ac, &emit_data->args[0]);
-
- for (unsigned i = 0; i < emit_data->arg_count; ++i)
- emit_data->args[i] = ac_to_integer(&ctx->ac, emit_data->args[i]);
-
- emit_data->output[emit_data->chan] =
- ac_build_intrinsic(&ctx->ac, action->intr_name,
- ctx->i32, emit_data->args, emit_data->arg_count,
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_CONVERGENT);
-}
-
-static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- struct tgsi_src_register src0 = emit_data->inst->Src[0].Register;
- LLVMValueRef imm;
- unsigned stream;
-
- assert(src0.File == TGSI_FILE_IMMEDIATE);
-
- imm = ctx->imms[src0.Index * TGSI_NUM_CHANNELS + src0.SwizzleX];
- stream = LLVMConstIntGetZExtValue(imm) & 0x3;
- return stream;
-}
-
/* Emit one vertex from the geometry shader */
static void si_llvm_emit_vertex(struct ac_shader_abi *abi,
unsigned stream,
@@ -4296,18 +3195,6 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi,
ac_build_endif(&ctx->ac, 6505);
}
-/* Emit one vertex from the geometry shader */
-static void si_tgsi_emit_vertex(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- unsigned stream = si_llvm_get_stream(bld_base, emit_data);
-
- si_llvm_emit_vertex(&ctx->abi, stream, ctx->outputs[0]);
-}
-
/* Cut one primitive from the geometry shader */
static void si_llvm_emit_primitive(struct ac_shader_abi *abi,
unsigned stream)
@@ -4324,23 +3211,8 @@ static void si_llvm_emit_primitive(struct ac_shader_abi *abi,
si_get_gs_wave_id(ctx));
}
-/* Cut one primitive from the geometry shader */
-static void si_tgsi_emit_primitive(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- si_llvm_emit_primitive(&ctx->abi, si_llvm_get_stream(bld_base, emit_data));
-}
-
-static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
+static void si_llvm_emit_barrier(struct si_shader_context *ctx)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
/* GFX6 only (thanks to a hw bug workaround):
* The real barrier instruction isn’t needed, because an entire patch
* always fits into a single wave.
@@ -5654,9 +4526,9 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
shader->selector = gs_selector;
shader->is_gs_copy_shader = true;
- si_init_shader_ctx(&ctx, sscreen, compiler,
- si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false),
- false);
+ si_llvm_context_init(&ctx, sscreen, compiler,
+ si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false),
+ 64);
ctx.shader = shader;
ctx.type = PIPE_SHADER_VERTEX;
@@ -5917,47 +4789,6 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
}
}
-static void si_init_shader_ctx(struct si_shader_context *ctx,
- struct si_screen *sscreen,
- struct ac_llvm_compiler *compiler,
- unsigned wave_size,
- bool nir)
-{
- struct lp_build_tgsi_context *bld_base;
-
- si_llvm_context_init(ctx, sscreen, compiler, wave_size,
- nir ? 64 : wave_size);
-
- bld_base = &ctx->bld_base;
- bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
-
- bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID].emit = build_interp_intrinsic;
- bld_base->op_actions[TGSI_OPCODE_INTERP_SAMPLE].emit = build_interp_intrinsic;
- bld_base->op_actions[TGSI_OPCODE_INTERP_OFFSET].emit = build_interp_intrinsic;
-
- bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
-
- bld_base->op_actions[TGSI_OPCODE_CLOCK].emit = clock_emit;
-
- bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
- bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
- bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
- bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
-
- bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit;
- bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit;
- bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit;
- bld_base->op_actions[TGSI_OPCODE_BALLOT].emit = ballot_emit;
- bld_base->op_actions[TGSI_OPCODE_READ_FIRST].intr_name = "llvm.amdgcn.readfirstlane";
- bld_base->op_actions[TGSI_OPCODE_READ_FIRST].emit = read_lane_emit;
- bld_base->op_actions[TGSI_OPCODE_READ_INVOC].intr_name = "llvm.amdgcn.readlane";
- bld_base->op_actions[TGSI_OPCODE_READ_INVOC].emit = read_lane_emit;
-
- bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_tgsi_emit_vertex;
- bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_tgsi_emit_primitive;
- bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
-}
-
static void si_optimize_vs_outputs(struct si_shader_context *ctx)
{
struct si_shader *shader = ctx->shader;
@@ -6014,17 +4845,34 @@ LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx)
si_unpack_param(ctx, ctx->merged_wave_info, 8, 8), "");
}
+static void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible)
+{
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ LLVMBuilderRef builder = ctx->ac.builder;
+
+ if (ctx->shader->selector->force_correct_derivs_after_kill) {
+ /* Kill immediately while maintaining WQM. */
+ ac_build_kill_if_false(&ctx->ac,
+ ac_build_wqm_vote(&ctx->ac, visible));
+
+ LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, "");
+ mask = LLVMBuildAnd(builder, mask, visible, "");
+ LLVMBuildStore(builder, mask, ctx->postponed_kill);
+ return;
+ }
+
+ ac_build_kill_if_false(&ctx->ac, visible);
+}
+
static bool si_compile_tgsi_main(struct si_shader_context *ctx,
struct nir_shader *nir, bool free_nir)
{
struct si_shader *shader = ctx->shader;
struct si_shader_selector *sel = shader->selector;
- struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
// TODO clean all this up!
switch (ctx->type) {
case PIPE_SHADER_VERTEX:
- ctx->load_input = declare_input_vs;
if (shader->key.as_ls)
ctx->abi.emit_outputs = si_llvm_emit_ls_epilogue;
else if (shader->key.as_es)
@@ -6035,22 +4883,16 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue;
else
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
- bld_base->emit_epilogue = si_tgsi_emit_epilogue;
ctx->abi.load_base_vertex = get_base_vertex;
break;
case PIPE_SHADER_TESS_CTRL:
- bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tcs;
ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings;
ctx->abi.load_tess_level = si_load_tess_level;
- bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = fetch_output_tcs;
- bld_base->emit_store = store_output_tcs;
ctx->abi.store_tcs_outputs = si_nir_store_output_tcs;
ctx->abi.emit_outputs = si_llvm_emit_tcs_epilogue;
ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
- bld_base->emit_epilogue = si_tgsi_emit_epilogue;
break;
case PIPE_SHADER_TESS_EVAL:
- bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
ctx->abi.load_tess_varyings = si_nir_load_input_tes;
ctx->abi.load_tess_coord = si_load_tess_coord;
ctx->abi.load_tess_level = si_load_tess_level;
@@ -6061,20 +4903,15 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue;
else
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
- bld_base->emit_epilogue = si_tgsi_emit_epilogue;
break;
case PIPE_SHADER_GEOMETRY:
- bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
ctx->abi.load_inputs = si_nir_load_input_gs;
ctx->abi.emit_vertex = si_llvm_emit_vertex;
ctx->abi.emit_primitive = si_llvm_emit_primitive;
ctx->abi.emit_outputs = si_llvm_emit_gs_epilogue;
- bld_base->emit_epilogue = si_tgsi_emit_gs_epilogue;
break;
case PIPE_SHADER_FRAGMENT:
- ctx->load_input = declare_input_fs;
ctx->abi.emit_outputs = si_llvm_return_fs_outputs;
- bld_base->emit_epilogue = si_tgsi_emit_epilogue;
ctx->abi.load_sample_position = load_sample_position;
ctx->abi.load_sample_mask_in = load_sample_mask_in;
ctx->abi.emit_fbfetch = si_nir_emit_fbfetch;
@@ -6229,7 +5066,7 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
* and contains a barrier, it will wait there and then
* reach s_endpgm.
*/
- si_llvm_emit_barrier(NULL, bld_base, NULL);
+ si_llvm_emit_barrier(ctx);
}
}
}
@@ -6241,19 +5078,12 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
ctx->postponed_kill);
}
- if (sel->tokens) {
- if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
- fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
- return false;
- }
- } else {
- bool success = si_nir_build_llvm(ctx, nir);
- if (free_nir)
- ralloc_free(nir);
- if (!success) {
- fprintf(stderr, "Failed to translate shader from NIR to LLVM\n");
- return false;
- }
+ bool success = si_nir_build_llvm(ctx, nir);
+ if (free_nir)
+ ralloc_free(nir);
+ if (!success) {
+ fprintf(stderr, "Failed to translate shader from NIR to LLVM\n");
+ return false;
}
si_llvm_build_ret(ctx, ctx->return_value);
@@ -6899,10 +5729,10 @@ static struct nir_shader *get_nir_shader(struct si_shader_selector *sel,
return NULL;
}
-int si_compile_tgsi_shader(struct si_screen *sscreen,
- struct ac_llvm_compiler *compiler,
- struct si_shader *shader,
- struct pipe_debug_callback *debug)
+int si_compile_shader(struct si_screen *sscreen,
+ struct ac_llvm_compiler *compiler,
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug)
{
struct si_shader_selector *sel = shader->selector;
struct si_shader_context ctx;
@@ -6914,16 +5744,12 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
* conversion fails. */
if (si_can_dump_shader(sscreen, sel->type) &&
!(sscreen->debug_flags & DBG(NO_TGSI))) {
- if (sel->tokens)
- tgsi_dump(sel->tokens, 0);
- else
- nir_print_shader(nir, stderr);
+ nir_print_shader(nir, stderr);
si_dump_streamout(&sel->so);
}
- si_init_shader_ctx(&ctx, sscreen, compiler, si_get_shader_wave_size(shader),
- nir != NULL);
- si_llvm_context_set_ir(&ctx, shader, nir);
+ si_llvm_context_init(&ctx, sscreen, compiler, si_get_shader_wave_size(shader), 64);
+ si_llvm_context_set_ir(&ctx, shader);
memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
sizeof(shader->info.vs_output_param_offset));
@@ -6982,7 +5808,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
shader_ls.key.mono = shader->key.mono;
shader_ls.key.opt = shader->key.opt;
shader_ls.is_monolithic = true;
- si_llvm_context_set_ir(&ctx, &shader_ls, nir);
+ si_llvm_context_set_ir(&ctx, &shader_ls);
if (!si_compile_tgsi_main(&ctx, nir, free_nir)) {
si_llvm_dispose(&ctx);
@@ -7050,7 +5876,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
shader_es.key.mono = shader->key.mono;
shader_es.key.opt = shader->key.opt;
shader_es.is_monolithic = true;
- si_llvm_context_set_ir(&ctx, &shader_es, nir);
+ si_llvm_context_set_ir(&ctx, &shader_es);
if (!si_compile_tgsi_main(&ctx, nir, free_nir)) {
si_llvm_dispose(&ctx);
@@ -7269,10 +6095,10 @@ si_get_shader_part(struct si_screen *sscreen,
}
struct si_shader_context ctx;
- si_init_shader_ctx(&ctx, sscreen, compiler,
- si_get_wave_size(sscreen, type, shader.key.as_ngg,
- shader.key.as_es),
- false);
+ si_llvm_context_init(&ctx, sscreen, compiler,
+ si_get_wave_size(sscreen, type, shader.key.as_ngg,
+ shader.key.as_es),
+ 64);
ctx.shader = &shader;
ctx.type = type;
@@ -7540,8 +6366,6 @@ static bool si_shader_select_vs_parts(struct si_screen *sscreen,
static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
union si_shader_part_key *key)
{
- struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
-
memset(&ctx->args, 0, sizeof(ctx->args));
if (ctx->screen->info.chip_class >= GFX9) {
@@ -7608,7 +6432,7 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
for (unsigned i = 0; i < 6; i++)
invoc0_tess_factors[i] = ac_get_arg(&ctx->ac, tess_factors[i]);
- si_write_tess_factors(bld_base,
+ si_write_tess_factors(ctx,
ac_get_arg(&ctx->ac, rel_patch_id),
ac_get_arg(&ctx->ac, invocation_id),
ac_get_arg(&ctx->ac, tcs_out_current_patch_data_offset),
@@ -7914,9 +6738,8 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx,
face = ac_to_integer(&ctx->ac, face);
}
- interp_fs_input(ctx,
- key->ps_prolog.color_attr_index[i],
- TGSI_SEMANTIC_COLOR, i,
+ interp_fs_color(ctx,
+ key->ps_prolog.color_attr_index[i], i,
key->ps_prolog.num_interp_inputs,
key->ps_prolog.colors_read, interp_ij,
prim_mask, face, color);
@@ -7990,7 +6813,6 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx,
static void si_build_ps_epilog_function(struct si_shader_context *ctx,
union si_shader_part_key *key)
{
- struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
int i;
struct si_ps_exports exp = {};
@@ -8060,7 +6882,7 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx,
for (i = 0; i < 4; i++)
color[i] = LLVMGetParam(ctx->main_fn, vgpr++);
- si_export_mrt_color(bld_base, color, mrt,
+ si_export_mrt_color(ctx, color, mrt,
ctx->args.arg_count - 1,
mrt == last_color_export, &exp);
}
@@ -8074,7 +6896,7 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx,
samplemask = LLVMGetParam(ctx->main_fn, vgpr++);
if (depth || stencil || samplemask)
- si_export_mrt_z(bld_base, depth, stencil, samplemask, &exp);
+ si_export_mrt_z(ctx, depth, stencil, samplemask, &exp);
else if (last_color_export == -1)
ac_build_export_null(&ctx->ac);
@@ -8240,7 +7062,7 @@ bool si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compil
/* Monolithic shader (compiled as a whole, has many variants,
* may take a long time to compile).
*/
- r = si_compile_tgsi_shader(sscreen, compiler, shader, debug);
+ r = si_compile_shader(sscreen, compiler, shader, debug);
if (r)
return false;
} else {
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index d9a199bfa3c..30dbe1c6a6e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -326,7 +326,6 @@ struct si_shader_selector {
struct si_shader *gs_copy_shader;
- struct tgsi_token *tokens;
struct nir_shader *nir;
void *nir_binary;
unsigned nir_size;
@@ -730,10 +729,10 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
struct ac_llvm_compiler *compiler,
struct si_shader_selector *gs_selector,
struct pipe_debug_callback *debug);
-int si_compile_tgsi_shader(struct si_screen *sscreen,
- struct ac_llvm_compiler *compiler,
- struct si_shader *shader,
- struct pipe_debug_callback *debug);
+int si_compile_shader(struct si_screen *sscreen,
+ struct ac_llvm_compiler *compiler,
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug);
bool si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
struct si_shader *shader,
struct pipe_debug_callback *debug);
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index a9b40f41b4c..1ec74a84a69 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -26,10 +26,6 @@
#define SI_SHADER_PRIVATE_H
#include "si_shader.h"
-#include "gallivm/lp_bld_flow.h"
-#include "gallivm/lp_bld_init.h"
-#include "gallivm/lp_bld_tgsi.h"
-#include "tgsi/tgsi_parse.h"
#include "ac_shader_abi.h"
#include <llvm-c/Core.h>
@@ -37,12 +33,7 @@
struct pipe_debug_callback;
-#define RADEON_LLVM_MAX_INPUT_SLOTS 32
#define RADEON_LLVM_MAX_INPUTS 32 * 4
-#define RADEON_LLVM_MAX_OUTPUTS 32 * 4
-
-#define RADEON_LLVM_MAX_SYSTEM_VALUES 11
-#define RADEON_LLVM_MAX_ADDRS 16
struct si_shader_output_values {
LLVMValueRef values[4];
@@ -52,8 +43,6 @@ struct si_shader_output_values {
};
struct si_shader_context {
- struct lp_build_tgsi_context bld_base;
- struct gallivm_state gallivm;
struct ac_llvm_context ac;
struct si_shader *shader;
struct si_screen *screen;
@@ -69,42 +58,11 @@ struct si_shader_context {
struct ac_shader_args args;
struct ac_shader_abi abi;
- /** This function is responsible for initilizing the inputs array and will be
- * called once for each input declared in the TGSI shader.
- */
- void (*load_input)(struct si_shader_context *,
- unsigned input_index,
- const struct tgsi_full_declaration *decl,
- LLVMValueRef out[4]);
-
- /** This array contains the input values for the shader. Typically these
- * values will be in the form of a target intrinsic that will inform the
- * backend how to load the actual inputs to the shader.
- */
- struct tgsi_full_declaration input_decls[RADEON_LLVM_MAX_INPUT_SLOTS];
LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS];
- LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS];
- LLVMValueRef addrs[RADEON_LLVM_MAX_ADDRS][TGSI_NUM_CHANNELS];
-
- /** This pointer is used to contain the temporary values.
- * The amount of temporary used in tgsi can't be bound to a max value and
- * thus we must allocate this array at runtime.
- */
- LLVMValueRef *temps;
- unsigned temps_count;
- LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES];
-
- LLVMValueRef *imms;
- unsigned imms_num;
LLVMBasicBlockRef merged_wrap_if_entry_block;
int merged_wrap_if_label;
- struct tgsi_array_info *temp_arrays;
- LLVMValueRef *temp_array_allocas;
-
- LLVMValueRef undef_alloca;
-
LLVMValueRef main_fn;
LLVMTypeRef return_type;
@@ -234,12 +192,6 @@ struct si_shader_context {
};
static inline struct si_shader_context *
-si_shader_context(struct lp_build_tgsi_context *bld_base)
-{
- return (struct si_shader_context*)bld_base;
-}
-
-static inline struct si_shader_context *
si_shader_context_from_abi(struct ac_shader_abi *abi)
{
struct si_shader_context *ctx = NULL;
@@ -255,12 +207,6 @@ unsigned si_llvm_compile(LLVMModuleRef M, struct si_shader_binary *binary,
struct pipe_debug_callback *debug,
bool less_optimized, unsigned wave_size);
-LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
- enum tgsi_opcode_type type);
-
-LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
- enum tgsi_opcode_type type, LLVMValueRef value);
-
LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
LLVMValueRef index,
unsigned num);
@@ -271,8 +217,7 @@ void si_llvm_context_init(struct si_shader_context *ctx,
unsigned wave_size,
unsigned ballot_mask_bits);
void si_llvm_context_set_ir(struct si_shader_context *ctx,
- struct si_shader *shader,
- struct nir_shader *nir);
+ struct si_shader *shader);
void si_llvm_create_func(struct si_shader_context *ctx,
const char *name,
@@ -282,18 +227,6 @@ void si_llvm_dispose(struct si_shader_context *ctx);
void si_llvm_optimize_module(struct si_shader_context *ctx);
-LLVMValueRef si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
- LLVMTypeRef type,
- LLVMValueRef ptr,
- LLVMValueRef ptr2);
-
-LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_src_register *reg,
- enum tgsi_opcode_type type,
- unsigned swizzle);
-
-void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible);
-
LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi,
LLVMTypeRef type,
LLVMValueRef vertex_index,
@@ -306,34 +239,10 @@ LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi,
bool is_patch,
bool is_compact,
bool load_input);
-
-LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
- unsigned input_index,
- unsigned vtx_offset_param,
- LLVMTypeRef type,
- unsigned swizzle);
-
LLVMValueRef si_nir_lookup_interp_param(struct ac_shader_abi *abi,
enum glsl_interp_mode interp,
unsigned location);
-
-void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_instruction *inst,
- const struct tgsi_opcode_info *info,
- unsigned index,
- LLVMValueRef dst[4]);
-
-LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx,
- const struct tgsi_ind_register *ind,
- unsigned addr_mul, int rel_index);
-LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx,
- const struct tgsi_ind_register *ind,
- int rel_index, unsigned num);
LLVMValueRef si_get_sample_id(struct si_shader_context *ctx);
-
-void si_shader_context_init_alu(struct si_shader_context *ctx);
-void si_shader_context_init_mem(struct si_shader_context *ctx);
-
LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
LLVMValueRef list, LLVMValueRef index,
enum ac_descriptor_type type);
@@ -342,14 +251,7 @@ LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
enum ac_descriptor_type desc_type,
bool uses_store, bool bindless);
LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi);
-
-void si_load_system_value(struct si_shader_context *ctx,
- unsigned index,
- const struct tgsi_full_declaration *decl);
void si_declare_compute_memory(struct si_shader_context *ctx);
-void si_tgsi_declare_compute_memory(struct si_shader_context *ctx,
- const struct tgsi_full_declaration *decl);
-
LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx,
unsigned swizzle);
void si_llvm_export_vs(struct si_shader_context *ctx,
@@ -365,10 +267,6 @@ void si_llvm_load_input_vs(
struct si_shader_context *ctx,
unsigned input_index,
LLVMValueRef out[4]);
-void si_llvm_load_input_fs(
- struct si_shader_context *ctx,
- unsigned input_index,
- LLVMValueRef out[4]);
bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir);
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c
new file mode 100644
index 00000000000..64ceaf7ed34
--- /dev/null
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "si_shader_internal.h"
+#include "si_pipe.h"
+#include "ac_llvm_util.h"
+#include "util/u_memory.h"
+
+struct si_llvm_diagnostics {
+ struct pipe_debug_callback *debug;
+ unsigned retval;
+};
+
+static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
+{
+ struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
+ LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
+ const char *severity_str = NULL;
+
+ switch (severity) {
+ case LLVMDSError:
+ severity_str = "error";
+ break;
+ case LLVMDSWarning:
+ severity_str = "warning";
+ break;
+ case LLVMDSRemark:
+ case LLVMDSNote:
+ default:
+ return;
+ }
+
+ char *description = LLVMGetDiagInfoDescription(di);
+
+ pipe_debug_message(diag->debug, SHADER_INFO,
+ "LLVM diagnostic (%s): %s", severity_str, description);
+
+ if (severity == LLVMDSError) {
+ diag->retval = 1;
+ fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
+ }
+
+ LLVMDisposeMessage(description);
+}
+
+/**
+ * Compile an LLVM module to machine code.
+ *
+ * @returns 0 for success, 1 for failure
+ */
+unsigned si_llvm_compile(LLVMModuleRef M, struct si_shader_binary *binary,
+ struct ac_llvm_compiler *compiler,
+ struct pipe_debug_callback *debug,
+ bool less_optimized, unsigned wave_size)
+{
+ struct ac_compiler_passes *passes = compiler->passes;
+
+ if (wave_size == 32)
+ passes = compiler->passes_wave32;
+ else if (less_optimized && compiler->low_opt_passes)
+ passes = compiler->low_opt_passes;
+
+ struct si_llvm_diagnostics diag;
+ LLVMContextRef llvm_ctx;
+
+ diag.debug = debug;
+ diag.retval = 0;
+
+ /* Setup Diagnostic Handler*/
+ llvm_ctx = LLVMGetModuleContext(M);
+
+ LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
+
+ /* Compile IR. */
+ if (!ac_compile_module_to_elf(passes, M, (char **)&binary->elf_buffer,
+ &binary->elf_size))
+ diag.retval = 1;
+
+ if (diag.retval != 0)
+ pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
+ return diag.retval;
+}
+
+void si_shader_binary_clean(struct si_shader_binary *binary)
+{
+ free((void *)binary->elf_buffer);
+ binary->elf_buffer = NULL;
+
+ free(binary->llvm_ir_string);
+ binary->llvm_ir_string = NULL;
+}
+
+void si_llvm_context_init(struct si_shader_context *ctx,
+ struct si_screen *sscreen,
+ struct ac_llvm_compiler *compiler,
+ unsigned wave_size,
+ unsigned ballot_mask_bits)
+{
+ /* Initialize the gallivm object:
+ * We are only using the module, context, and builder fields of this struct.
+ * This should be enough for us to be able to pass our gallivm struct to the
+ * helper functions in the gallivm module.
+ */
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->screen = sscreen;
+ ctx->compiler = compiler;
+
+ ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.chip_class,
+ sscreen->info.family,
+ AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH,
+ wave_size, ballot_mask_bits);
+
+ ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context);
+ ctx->i1 = LLVMInt1TypeInContext(ctx->ac.context);
+ ctx->i8 = LLVMInt8TypeInContext(ctx->ac.context);
+ ctx->i32 = LLVMInt32TypeInContext(ctx->ac.context);
+ ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context);
+ ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128);
+ ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context);
+ ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
+ ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
+ ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
+ ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
+
+ ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
+ ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
+ ctx->i1false = LLVMConstInt(ctx->i1, 0, 0);
+ ctx->i1true = LLVMConstInt(ctx->i1, 1, 0);
+}
+
+/* Set the context to a certain TGSI shader. Can be called repeatedly
+ * to change the shader. */
+void si_llvm_context_set_ir(struct si_shader_context *ctx,
+ struct si_shader *shader)
+{
+ struct si_shader_selector *sel = shader->selector;
+ const struct tgsi_shader_info *info = &sel->info;
+
+ ctx->shader = shader;
+ ctx->type = sel->type;
+
+ ctx->num_const_buffers = util_last_bit(info->const_buffers_declared);
+ ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared);
+
+ ctx->num_samplers = util_last_bit(info->samplers_declared);
+ ctx->num_images = util_last_bit(info->images_declared);
+}
+
+void si_llvm_create_func(struct si_shader_context *ctx,
+ const char *name,
+ LLVMTypeRef *return_types, unsigned num_return_elems)
+{
+ LLVMTypeRef ret_type;
+ enum ac_llvm_calling_convention call_conv;
+ enum pipe_shader_type real_shader_type;
+
+ if (num_return_elems)
+ ret_type = LLVMStructTypeInContext(ctx->ac.context,
+ return_types,
+ num_return_elems, true);
+ else
+ ret_type = ctx->voidt;
+
+ real_shader_type = ctx->type;
+
+ /* LS is merged into HS (TCS), and ES is merged into GS. */
+ if (ctx->screen->info.chip_class >= GFX9) {
+ if (ctx->shader->key.as_ls)
+ real_shader_type = PIPE_SHADER_TESS_CTRL;
+ else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg)
+ real_shader_type = PIPE_SHADER_GEOMETRY;
+ }
+
+ switch (real_shader_type) {
+ case PIPE_SHADER_VERTEX:
+ case PIPE_SHADER_TESS_EVAL:
+ call_conv = AC_LLVM_AMDGPU_VS;
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ call_conv = AC_LLVM_AMDGPU_HS;
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ call_conv = AC_LLVM_AMDGPU_GS;
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ call_conv = AC_LLVM_AMDGPU_PS;
+ break;
+ case PIPE_SHADER_COMPUTE:
+ call_conv = AC_LLVM_AMDGPU_CS;
+ break;
+ default:
+ unreachable("Unhandle shader type");
+ }
+
+ /* Setup the function */
+ ctx->return_type = ret_type;
+ ctx->main_fn = ac_build_main(&ctx->args, &ctx->ac, call_conv, name,
+ ret_type, ctx->ac.module);
+}
+
+void si_llvm_optimize_module(struct si_shader_context *ctx)
+{
+ /* Dump LLVM IR before any optimization passes */
+ if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
+ si_can_dump_shader(ctx->screen, ctx->type))
+ LLVMDumpModule(ctx->ac.module);
+
+ /* Run the pass */
+ LLVMRunPassManager(ctx->compiler->passmgr, ctx->ac.module);
+ LLVMDisposeBuilder(ctx->ac.builder);
+}
+
+void si_llvm_dispose(struct si_shader_context *ctx)
+{
+ LLVMDisposeModule(ctx->ac.module);
+ LLVMContextDispose(ctx->ac.context);
+ ac_llvm_context_dispose(&ctx->ac);
+}
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_build.c b/src/gallium/drivers/radeonsi/si_shader_llvm_build.c
new file mode 100644
index 00000000000..e3625214258
--- /dev/null
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_build.c
@@ -0,0 +1,219 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <llvm/Config/llvm-config.h>
+
+#include "si_shader_internal.h"
+#include "si_pipe.h"
+#include "sid.h"
+#include "ac_llvm_util.h"
+
+/**
+ * Return a value that is equal to the given i32 \p index if it lies in [0,num)
+ * or an undefined value in the same interval otherwise.
+ */
+LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
+ LLVMValueRef index,
+ unsigned num)
+{
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
+ LLVMValueRef cc;
+
+ if (util_is_power_of_two_or_zero(num)) {
+ index = LLVMBuildAnd(builder, index, c_max, "");
+ } else {
+ /* In theory, this MAX pattern should result in code that is
+ * as good as the bit-wise AND above.
+ *
+ * In practice, LLVM generates worse code (at the time of
+ * writing), because its value tracking is not strong enough.
+ */
+ cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
+ index = LLVMBuildSelect(builder, cc, index, c_max, "");
+ }
+
+ return index;
+}
+
+/**
+ * Given a 256-bit resource descriptor, force the DCC enable bit to off.
+ *
+ * At least on Tonga, executing image stores on images with DCC enabled and
+ * non-trivial can eventually lead to lockups. This can occur when an
+ * application binds an image as read-only but then uses a shader that writes
+ * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
+ * program termination) in this case, but it doesn't cost much to be a bit
+ * nicer: disabling DCC in the shader still leads to undefined results but
+ * avoids the lockup.
+ */
+static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
+ LLVMValueRef rsrc)
+{
+ if (ctx->screen->info.chip_class <= GFX7) {
+ return rsrc;
+ } else {
+ LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
+ LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
+ LLVMValueRef tmp;
+
+ tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
+ tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
+ return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
+ }
+}
+
+/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
+ * adjust "index" to point to FMASK. */
+LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
+ LLVMValueRef list, LLVMValueRef index,
+ enum ac_descriptor_type desc_type,
+ bool uses_store, bool bindless)
+{
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef rsrc;
+
+ if (desc_type == AC_DESC_BUFFER) {
+ index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
+ ctx->i32_1);
+ list = LLVMBuildPointerCast(builder, list,
+ ac_array_in_const32_addr_space(ctx->v4i32), "");
+ } else {
+ assert(desc_type == AC_DESC_IMAGE ||
+ desc_type == AC_DESC_FMASK);
+ }
+
+ if (bindless)
+ rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index);
+ else
+ rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
+
+ if (desc_type == AC_DESC_IMAGE && uses_store)
+ rsrc = force_dcc_off(ctx, rsrc);
+ return rsrc;
+}
+
+/**
+ * Load an image view, fmask view. or sampler state descriptor.
+ */
+LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
+ LLVMValueRef list, LLVMValueRef index,
+ enum ac_descriptor_type type)
+{
+ LLVMBuilderRef builder = ctx->ac.builder;
+
+ switch (type) {
+ case AC_DESC_IMAGE:
+ /* The image is at [0:7]. */
+ index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
+ break;
+ case AC_DESC_BUFFER:
+ /* The buffer is in [4:7]. */
+ index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
+ ctx->i32_1);
+ list = LLVMBuildPointerCast(builder, list,
+ ac_array_in_const32_addr_space(ctx->v4i32), "");
+ break;
+ case AC_DESC_FMASK:
+ /* The FMASK is at [8:15]. */
+ index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
+ ctx->i32_1);
+ break;
+ case AC_DESC_SAMPLER:
+ /* The sampler state is at [12:15]. */
+ index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
+ LLVMConstInt(ctx->i32, 3, 0));
+ list = LLVMBuildPointerCast(builder, list,
+ ac_array_in_const32_addr_space(ctx->v4i32), "");
+ break;
+ case AC_DESC_PLANE_0:
+ case AC_DESC_PLANE_1:
+ case AC_DESC_PLANE_2:
+ /* Only used for the multiplane image support for Vulkan. Should
+ * never be reached in radeonsi.
+ */
+ unreachable("Plane descriptor requested in radeonsi.");
+ }
+
+ return ac_build_load_to_sgpr(&ctx->ac, list, index);
+}
+
+LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi)
+{
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ struct ac_image_args args = {};
+ LLVMValueRef ptr, image, fmask;
+
+ /* Ignore src0, because KHR_blend_func_extended disallows multiple render
+ * targets.
+ */
+
+ /* Load the image descriptor. */
+ STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
+ ptr = ac_get_arg(&ctx->ac, ctx->rw_buffers);
+ ptr = LLVMBuildPointerCast(ctx->ac.builder, ptr,
+ ac_array_in_const32_addr_space(ctx->v8i32), "");
+ image = ac_build_load_to_sgpr(&ctx->ac, ptr,
+ LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0));
+
+ unsigned chan = 0;
+
+ args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 0, 16);
+
+ if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D)
+ args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 16, 16);
+
+ /* Get the current render target layer index. */
+ if (ctx->shader->key.mono.u.ps.fbfetch_layered)
+ args.coords[chan++] = si_unpack_param(ctx, ctx->args.ancillary, 16, 11);
+
+ if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
+ args.coords[chan++] = si_get_sample_id(ctx);
+
+ if (ctx->shader->key.mono.u.ps.fbfetch_msaa &&
+ !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
+ fmask = ac_build_load_to_sgpr(&ctx->ac, ptr,
+ LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0));
+
+ ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords,
+ ctx->shader->key.mono.u.ps.fbfetch_layered);
+ }
+
+ args.opcode = ac_image_load;
+ args.resource = image;
+ args.dmask = 0xf;
+ args.attributes = AC_FUNC_ATTR_READNONE;
+
+ if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
+ args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
+ ac_image_2darraymsaa : ac_image_2dmsaa;
+ else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D)
+ args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
+ ac_image_1darray : ac_image_1d;
+ else
+ args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
+ ac_image_2darray : ac_image_2d;
+
+ return ac_build_image_opcode(&ctx->ac, &args);
+}
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
deleted file mode 100644
index 4be410ec331..00000000000
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
+++ /dev/null
@@ -1,834 +0,0 @@
-/*
- * Copyright 2016 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "si_shader_internal.h"
-#include "si_pipe.h"
-#include "ac_llvm_util.h"
-
-void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible)
-{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- LLVMBuilderRef builder = ctx->ac.builder;
-
- if (ctx->shader->selector->force_correct_derivs_after_kill) {
- /* Kill immediately while maintaining WQM. */
- ac_build_kill_if_false(&ctx->ac,
- ac_build_wqm_vote(&ctx->ac, visible));
-
- LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, "");
- mask = LLVMBuildAnd(builder, mask, visible, "");
- LLVMBuildStore(builder, mask, ctx->postponed_kill);
- return;
- }
-
- ac_build_kill_if_false(&ctx->ac, visible);
-}
-
-static void kil_emit(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef visible;
-
- if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) {
- const struct tgsi_full_instruction *inst = emit_data->inst;
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMBuilderRef builder = ctx->ac.builder;
- unsigned i;
- LLVMValueRef conds[TGSI_NUM_CHANNELS];
-
- for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
- LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
- /* UGE because NaN shouldn't get killed */
- conds[i] = LLVMBuildFCmp(builder, LLVMRealUGE, value,
- ctx->ac.f32_0, "");
- }
-
- /* And the conditions together */
- for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
- conds[i - 1] = LLVMBuildAnd(builder, conds[i], conds[i - 1], "");
- }
- visible = conds[0];
- } else {
- assert(emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL);
- visible = ctx->i1false;
- }
-
- si_llvm_emit_kill(&ctx->abi, visible);
-}
-
-static void emit_icmp(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- unsigned pred;
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- switch (emit_data->inst->Instruction.Opcode) {
- case TGSI_OPCODE_USEQ:
- case TGSI_OPCODE_U64SEQ: pred = LLVMIntEQ; break;
- case TGSI_OPCODE_USNE:
- case TGSI_OPCODE_U64SNE: pred = LLVMIntNE; break;
- case TGSI_OPCODE_USGE:
- case TGSI_OPCODE_U64SGE: pred = LLVMIntUGE; break;
- case TGSI_OPCODE_USLT:
- case TGSI_OPCODE_U64SLT: pred = LLVMIntULT; break;
- case TGSI_OPCODE_ISGE:
- case TGSI_OPCODE_I64SGE: pred = LLVMIntSGE; break;
- case TGSI_OPCODE_ISLT:
- case TGSI_OPCODE_I64SLT: pred = LLVMIntSLT; break;
- default:
- assert(!"unknown instruction");
- pred = 0;
- break;
- }
-
- LLVMValueRef v = LLVMBuildICmp(ctx->ac.builder, pred,
- emit_data->args[0], emit_data->args[1],"");
-
- v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, "");
-
- emit_data->output[emit_data->chan] = v;
-}
-
-static void emit_ucmp(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef arg0 = ac_to_integer(&ctx->ac, emit_data->args[0]);
-
- LLVMValueRef v = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, arg0,
- ctx->i32_0, "");
-
- emit_data->output[emit_data->chan] =
- LLVMBuildSelect(ctx->ac.builder, v, emit_data->args[1], emit_data->args[2], "");
-}
-
-static void emit_cmp(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef cond, *args = emit_data->args;
-
- cond = LLVMBuildFCmp(ctx->ac.builder, LLVMRealOLT, args[0],
- ctx->ac.f32_0, "");
-
- emit_data->output[emit_data->chan] =
- LLVMBuildSelect(ctx->ac.builder, cond, args[1], args[2], "");
-}
-
-static void emit_set_cond(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMRealPredicate pred;
- LLVMValueRef cond;
-
- /* Use ordered for everything but NE (which is usual for
- * float comparisons)
- */
- switch (emit_data->inst->Instruction.Opcode) {
- case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
- case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
- case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
- case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
- case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
- case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
- default: assert(!"unknown instruction"); pred = 0; break;
- }
-
- cond = LLVMBuildFCmp(ctx->ac.builder,
- pred, emit_data->args[0], emit_data->args[1], "");
-
- emit_data->output[emit_data->chan] = LLVMBuildSelect(ctx->ac.builder,
- cond, ctx->ac.f32_1, ctx->ac.f32_0, "");
-}
-
-static void emit_fcmp(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMRealPredicate pred;
-
- /* Use ordered for everything but NE (which is usual for
- * float comparisons)
- */
- switch (emit_data->inst->Instruction.Opcode) {
- case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
- case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
- case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
- case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
- default: assert(!"unknown instruction"); pred = 0; break;
- }
-
- LLVMValueRef v = LLVMBuildFCmp(ctx->ac.builder, pred,
- emit_data->args[0], emit_data->args[1],"");
-
- v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, "");
-
- emit_data->output[emit_data->chan] = v;
-}
-
-static void emit_dcmp(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMRealPredicate pred;
-
- /* Use ordered for everything but NE (which is usual for
- * float comparisons)
- */
- switch (emit_data->inst->Instruction.Opcode) {
- case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break;
- case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break;
- case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break;
- case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break;
- default: assert(!"unknown instruction"); pred = 0; break;
- }
-
- LLVMValueRef v = LLVMBuildFCmp(ctx->ac.builder, pred,
- emit_data->args[0], emit_data->args[1],"");
-
- v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, "");
-
- emit_data->output[emit_data->chan] = v;
-}
-
-static void emit_not(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef v = ac_to_integer(&ctx->ac, emit_data->args[0]);
- emit_data->output[emit_data->chan] = LLVMBuildNot(ctx->ac.builder, v, "");
-}
-
-static void emit_arl(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef floor_index =
- ac_build_intrinsic(&ctx->ac, "llvm.floor.f32", ctx->f32,
- &emit_data->args[0], 1, AC_FUNC_ATTR_READNONE);
- emit_data->output[emit_data->chan] = LLVMBuildFPToSI(ctx->ac.builder,
- floor_index, ctx->i32, "");
-}
-
-static void emit_and(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_data->output[emit_data->chan] = LLVMBuildAnd(ctx->ac.builder,
- emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_or(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_data->output[emit_data->chan] = LLVMBuildOr(ctx->ac.builder,
- emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_uadd(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_data->output[emit_data->chan] = LLVMBuildAdd(ctx->ac.builder,
- emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_udiv(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_data->output[emit_data->chan] = LLVMBuildUDiv(ctx->ac.builder,
- emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_idiv(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_data->output[emit_data->chan] = LLVMBuildSDiv(ctx->ac.builder,
- emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_mod(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_data->output[emit_data->chan] = LLVMBuildSRem(ctx->ac.builder,
- emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_umod(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_data->output[emit_data->chan] = LLVMBuildURem(ctx->ac.builder,
- emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_shl(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_data->output[emit_data->chan] = LLVMBuildShl(ctx->ac.builder,
- emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_ushr(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_data->output[emit_data->chan] = LLVMBuildLShr(ctx->ac.builder,
- emit_data->args[0], emit_data->args[1], "");
-}
-static void emit_ishr(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_data->output[emit_data->chan] = LLVMBuildAShr(ctx->ac.builder,
- emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_xor(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_data->output[emit_data->chan] = LLVMBuildXor(ctx->ac.builder,
- emit_data->args[0], emit_data->args[1], "");
-}
-
-static void emit_ssg(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- LLVMValueRef val;
-
- if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_I64SSG) {
- val = ac_build_isign(&ctx->ac, emit_data->args[0], 64);
- } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
- val = ac_build_isign(&ctx->ac, emit_data->args[0], 32);
- } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_DSSG) {
- val = ac_build_fsign(&ctx->ac, emit_data->args[0], 64);
- } else {
- val = ac_build_fsign(&ctx->ac, emit_data->args[0], 32);
- }
-
- emit_data->output[emit_data->chan] = val;
-}
-
-static void emit_ineg(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_data->output[emit_data->chan] = LLVMBuildNeg(ctx->ac.builder,
- emit_data->args[0], "");
-}
-
-static void emit_dneg(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_data->output[emit_data->chan] = LLVMBuildFNeg(ctx->ac.builder,
- emit_data->args[0], "");
-}
-
-static void emit_frac(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- unsigned bitsize;
-
- if (emit_data->info->opcode == TGSI_OPCODE_FRC)
- bitsize = 32;
- else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC)
- bitsize = 64;
- else {
- assert(0);
- return;
- }
-
- emit_data->output[emit_data->chan] =
- ac_build_fract(&ctx->ac, emit_data->args[0], bitsize);
-}
-
-static void emit_f2i(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_data->output[emit_data->chan] = LLVMBuildFPToSI(ctx->ac.builder,
- emit_data->args[0], ctx->i32, "");
-}
-
-static void emit_f2u(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_data->output[emit_data->chan] = LLVMBuildFPToUI(ctx->ac.builder,
- emit_data->args[0], ctx->i32, "");
-}
-
-static void emit_i2f(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_data->output[emit_data->chan] = LLVMBuildSIToFP(ctx->ac.builder,
- emit_data->args[0], ctx->f32, "");
-}
-
-static void emit_u2f(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_data->output[emit_data->chan] = LLVMBuildUIToFP(ctx->ac.builder,
- emit_data->args[0], ctx->f32, "");
-}
-
-static void
-build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_data->output[emit_data->chan] =
- ac_build_intrinsic(&ctx->ac, action->intr_name,
- emit_data->dst_type, emit_data->args,
- emit_data->arg_count, AC_FUNC_ATTR_READNONE);
-}
-
-static void emit_bfi(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef bfi_args[3];
- LLVMValueRef bfi_sm5;
- LLVMValueRef cond;
-
- // Calculate the bitmask: (((1 << src3) - 1) << src2
- bfi_args[0] = LLVMBuildShl(builder,
- LLVMBuildSub(builder,
- LLVMBuildShl(builder,
- ctx->i32_1,
- emit_data->args[3], ""),
- ctx->i32_1, ""),
- emit_data->args[2], "");
-
- bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
- emit_data->args[2], "");
-
- bfi_args[2] = emit_data->args[0];
-
- /* Calculate:
- * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
- * Use the right-hand side, which the LLVM backend can convert to V_BFI.
- */
- bfi_sm5 =
- LLVMBuildXor(builder, bfi_args[2],
- LLVMBuildAnd(builder, bfi_args[0],
- LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
- ""), ""), "");
-
- /* Since shifts of >= 32 bits are undefined in LLVM IR, the backend
- * uses the convenient V_BFI lowering for the above, which follows SM5
- * and disagrees with GLSL semantics when bits (src3) is 32.
- */
- cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[3],
- LLVMConstInt(ctx->i32, 32, 0), "");
- emit_data->output[emit_data->chan] =
- LLVMBuildSelect(builder, cond, emit_data->args[1], bfi_sm5, "");
-}
-
-static void emit_bfe(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- /* FIXME: LLVM 7 returns incorrect result when count is 0.
- * https://bugs.freedesktop.org/show_bug.cgi?id=107276
- */
- LLVMValueRef zero = ctx->i32_0;
- LLVMValueRef bfe_sm5 =
- ac_build_bfe(&ctx->ac, emit_data->args[0],
- emit_data->args[1], emit_data->args[2],
- emit_data->info->opcode == TGSI_OPCODE_IBFE);
-
- /* Correct for GLSL semantics. */
- LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
- LLVMConstInt(ctx->i32, 32, 0), "");
- LLVMValueRef cond2 = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, emit_data->args[2],
- zero, "");
- bfe_sm5 = LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
- emit_data->output[emit_data->chan] =
- LLVMBuildSelect(ctx->ac.builder, cond2, zero, bfe_sm5, "");
-}
-
-/* this is ffs in C */
-static void emit_lsb(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- emit_data->output[emit_data->chan] = ac_find_lsb(&ctx->ac, emit_data->dst_type, emit_data->args[0]);
-}
-
-/* Find the last bit set. */
-static void emit_umsb(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- emit_data->output[emit_data->chan] =
- ac_build_umsb(&ctx->ac, emit_data->args[0], emit_data->dst_type);
-}
-
-/* Find the last bit opposite of the sign bit. */
-static void emit_imsb(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- emit_data->output[emit_data->chan] =
- ac_build_imsb(&ctx->ac, emit_data->args[0],
- emit_data->dst_type);
-}
-
-static void emit_iabs(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- emit_data->output[emit_data->chan] =
- ac_build_imax(&ctx->ac, emit_data->args[0],
- LLVMBuildNeg(ctx->ac.builder, emit_data->args[0], ""));
-}
-
-static void emit_minmax_int(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMIntPredicate op;
-
- switch (emit_data->info->opcode) {
- default:
- assert(0);
- case TGSI_OPCODE_IMAX:
- case TGSI_OPCODE_I64MAX:
- op = LLVMIntSGT;
- break;
- case TGSI_OPCODE_IMIN:
- case TGSI_OPCODE_I64MIN:
- op = LLVMIntSLT;
- break;
- case TGSI_OPCODE_UMAX:
- case TGSI_OPCODE_U64MAX:
- op = LLVMIntUGT;
- break;
- case TGSI_OPCODE_UMIN:
- case TGSI_OPCODE_U64MIN:
- op = LLVMIntULT;
- break;
- }
-
- emit_data->output[emit_data->chan] =
- LLVMBuildSelect(ctx->ac.builder,
- LLVMBuildICmp(ctx->ac.builder, op, emit_data->args[0],
- emit_data->args[1], ""),
- emit_data->args[0],
- emit_data->args[1], "");
-}
-
-static void emit_pk2h(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef v[] = {
- lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X),
- lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_Y),
- };
-
-
- /* From the GLSL 4.50 spec:
- * "The rounding mode cannot be set and is undefined."
- *
- * v_cvt_pkrtz_f16 rounds to zero, but it's fastest.
- */
- emit_data->output[emit_data->chan] =
- LLVMBuildBitCast(ctx->ac.builder, ac_build_cvt_pkrtz_f16(&ctx->ac, v),
- ctx->i32, "");
-}
-
-static void emit_up2h(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMTypeRef i16;
- LLVMValueRef const16, input, val;
- unsigned i;
-
- i16 = LLVMInt16TypeInContext(ctx->ac.context);
- const16 = LLVMConstInt(ctx->i32, 16, 0);
- input = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
-
- for (i = 0; i < 2; i++) {
- val = i == 1 ? LLVMBuildLShr(ctx->ac.builder, input, const16, "") : input;
- val = LLVMBuildTrunc(ctx->ac.builder, val, i16, "");
- val = ac_to_float(&ctx->ac, val);
- emit_data->output[i] = LLVMBuildFPExt(ctx->ac.builder, val, ctx->f32, "");
- }
-}
-
-static void emit_fdiv(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- emit_data->output[emit_data->chan] =
- ac_build_fdiv(&ctx->ac, emit_data->args[0], emit_data->args[1]);
-}
-
-/* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
- * the target machine. f64 needs global unsafe math flags to get rsq. */
-static void emit_rsq(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- LLVMValueRef sqrt =
- ac_build_intrinsic(&ctx->ac, "llvm.sqrt.f32", ctx->f32,
- &emit_data->args[0], 1, AC_FUNC_ATTR_READNONE);
-
- emit_data->output[emit_data->chan] =
- ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, sqrt);
-}
-
-static void dfracexp_emit(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef in = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
-
- emit_data->output[emit_data->chan] =
- ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.frexp.mant.f64",
- ctx->ac.f64, &in, 1, 0);
- emit_data->output1[emit_data->chan] =
- ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.frexp.exp.i32.f64",
- ctx->ac.i32, &in, 1, 0);
-}
-
-void si_shader_context_init_alu(struct si_shader_context *ctx)
-{
- struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
-
- lp_set_default_actions(bld_base);
-
- bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
- bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
- bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
- bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.bitreverse.i32";
- bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
- bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp;
- bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
- bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64";
- bld_base->op_actions[TGSI_OPCODE_DCEIL].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_DCEIL].intr_name = "llvm.ceil.f64";
- bld_base->op_actions[TGSI_OPCODE_DFLR].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_DFLR].intr_name = "llvm.floor.f64";
- bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
- bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
- bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv;
- bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
- bld_base->op_actions[TGSI_OPCODE_DROUND].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_DROUND].intr_name = "llvm.rint.f64";
- bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
- bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
- bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
- bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
- bld_base->op_actions[TGSI_OPCODE_DSSG].emit = emit_ssg;
- bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = "llvm.amdgcn.rsq.f64";
- bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
- bld_base->op_actions[TGSI_OPCODE_DTRUNC].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_DTRUNC].intr_name = "llvm.trunc.f64";
- bld_base->op_actions[TGSI_OPCODE_DFRACEXP].emit = dfracexp_emit;
- bld_base->op_actions[TGSI_OPCODE_DLDEXP].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_DLDEXP].intr_name = "llvm.amdgcn.ldexp.f64";
- bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.exp2.f32";
- bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
-
- /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
- if (ctx->screen->info.chip_class >= GFX10) {
- bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_FMA].intr_name = "llvm.fma.f32";
- } else {
- bld_base->op_actions[TGSI_OPCODE_FMA].emit =
- bld_base->op_actions[TGSI_OPCODE_MAD].emit;
- }
-
- bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
- bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
- bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
- bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
- bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
- bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
- bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
- bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
- bld_base->op_actions[TGSI_OPCODE_IBFE].emit = emit_bfe;
- bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
- bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
- bld_base->op_actions[TGSI_OPCODE_IMIN].emit = emit_minmax_int;
- bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb;
- bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
- bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
- bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
- bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
- bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
- bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
- bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
- bld_base->op_actions[TGSI_OPCODE_KILL].emit = kil_emit;
- bld_base->op_actions[TGSI_OPCODE_LDEXP].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_LDEXP].intr_name = "llvm.amdgcn.ldexp.f32";
- bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb;
- bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32";
- bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32";
- bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
- bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
- bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
- bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
- bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
- bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h;
- bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
- bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
- bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
- bld_base->op_actions[TGSI_OPCODE_RSQ].emit = emit_rsq;
- bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
- bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
- bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
- bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_set_cond;
- bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_set_cond;
- bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond;
- bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond;
- bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
- bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
- bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
- bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
- bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
- bld_base->op_actions[TGSI_OPCODE_UBFE].emit = emit_bfe;
- bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
- bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;
- bld_base->op_actions[TGSI_OPCODE_UMIN].emit = emit_minmax_int;
- bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
- bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
- bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
- bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
- bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
- bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
- bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
- bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
- bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
- bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
-
- bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = emit_minmax_int;
- bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = emit_minmax_int;
- bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = emit_minmax_int;
- bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = emit_minmax_int;
- bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = emit_iabs;
- bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = emit_ssg;
- bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = emit_ineg;
-
- bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = emit_icmp;
- bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = emit_icmp;
- bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = emit_icmp;
- bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = emit_icmp;
- bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = emit_icmp;
- bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = emit_icmp;
-
- bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = emit_uadd;
- bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = emit_shl;
- bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = emit_ushr;
- bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = emit_ishr;
-
- bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = emit_umod;
- bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = emit_mod;
- bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = emit_udiv;
- bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = emit_idiv;
-}
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
deleted file mode 100644
index 21b861b8244..00000000000
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ /dev/null
@@ -1,1852 +0,0 @@
-/*
- * Copyright 2017 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <llvm/Config/llvm-config.h>
-
-#include "si_shader_internal.h"
-#include "si_pipe.h"
-#include "sid.h"
-#include "tgsi/tgsi_build.h"
-#include "tgsi/tgsi_util.h"
-#include "ac_llvm_util.h"
-
-static void tex_fetch_ptrs(struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data,
- LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
- LLVMValueRef *fmask_ptr);
-
-/**
- * Given a v8i32 resource descriptor for a buffer, extract the size of the
- * buffer in number of elements and return it as an i32.
- */
-static LLVMValueRef get_buffer_size(
- struct lp_build_tgsi_context *bld_base,
- LLVMValueRef descriptor)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef size =
- LLVMBuildExtractElement(builder, descriptor,
- LLVMConstInt(ctx->i32, 2, 0), "");
-
- if (ctx->screen->info.chip_class == GFX8) {
- /* On GFX8, the descriptor contains the size in bytes,
- * but TXQ must return the size in elements.
- * The stride is always non-zero for resources using TXQ.
- */
- LLVMValueRef stride =
- LLVMBuildExtractElement(builder, descriptor,
- ctx->i32_1, "");
- stride = LLVMBuildLShr(builder, stride,
- LLVMConstInt(ctx->i32, 16, 0), "");
- stride = LLVMBuildAnd(builder, stride,
- LLVMConstInt(ctx->i32, 0x3FFF, 0), "");
-
- size = LLVMBuildUDiv(builder, size, stride, "");
- }
-
- return size;
-}
-
-static LLVMValueRef
-shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
- const struct tgsi_full_src_register *reg,
- bool ubo)
-{
- LLVMValueRef index;
-
- if (!reg->Register.Indirect) {
- index = LLVMConstInt(ctx->i32, reg->Register.Index, false);
- } else {
- index = si_get_indirect_index(ctx, &reg->Indirect,
- 1, reg->Register.Index);
- }
-
- if (ubo)
- return ctx->abi.load_ubo(&ctx->abi, index);
- else
- return ctx->abi.load_ssbo(&ctx->abi, index, false);
-}
-
-static enum ac_image_dim
-ac_texture_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type target)
-{
- switch (target) {
- case TGSI_TEXTURE_1D:
- case TGSI_TEXTURE_SHADOW1D:
- if (screen->info.chip_class == GFX9)
- return ac_image_2d;
- return ac_image_1d;
- case TGSI_TEXTURE_2D:
- case TGSI_TEXTURE_SHADOW2D:
- case TGSI_TEXTURE_RECT:
- case TGSI_TEXTURE_SHADOWRECT:
- return ac_image_2d;
- case TGSI_TEXTURE_3D:
- return ac_image_3d;
- case TGSI_TEXTURE_CUBE:
- case TGSI_TEXTURE_SHADOWCUBE:
- case TGSI_TEXTURE_CUBE_ARRAY:
- case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
- return ac_image_cube;
- case TGSI_TEXTURE_1D_ARRAY:
- case TGSI_TEXTURE_SHADOW1D_ARRAY:
- if (screen->info.chip_class == GFX9)
- return ac_image_2darray;
- return ac_image_1darray;
- case TGSI_TEXTURE_2D_ARRAY:
- case TGSI_TEXTURE_SHADOW2D_ARRAY:
- return ac_image_2darray;
- case TGSI_TEXTURE_2D_MSAA:
- return ac_image_2dmsaa;
- case TGSI_TEXTURE_2D_ARRAY_MSAA:
- return ac_image_2darraymsaa;
- default:
- unreachable("unhandled texture type");
- }
-}
-
-static enum ac_image_dim
-ac_image_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type target)
-{
- enum ac_image_dim dim = ac_texture_dim_from_tgsi_target(screen, target);
-
- /* Match the resource type set in the descriptor. */
- if (dim == ac_image_cube ||
- (screen->info.chip_class <= GFX8 && dim == ac_image_3d))
- dim = ac_image_2darray;
- else if (target == TGSI_TEXTURE_2D && screen->info.chip_class == GFX9) {
- /* When a single layer of a 3D texture is bound, the shader
- * will refer to a 2D target, but the descriptor has a 3D type.
- * Since the HW ignores BASE_ARRAY in this case, we need to
- * send 3 coordinates. This doesn't hurt when the underlying
- * texture is non-3D.
- */
- dim = ac_image_3d;
- }
-
- return dim;
-}
-
-/**
- * Given a 256-bit resource descriptor, force the DCC enable bit to off.
- *
- * At least on Tonga, executing image stores on images with DCC enabled and
- * non-trivial can eventually lead to lockups. This can occur when an
- * application binds an image as read-only but then uses a shader that writes
- * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
- * program termination) in this case, but it doesn't cost much to be a bit
- * nicer: disabling DCC in the shader still leads to undefined results but
- * avoids the lockup.
- */
-static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
- LLVMValueRef rsrc)
-{
- if (ctx->screen->info.chip_class <= GFX7) {
- return rsrc;
- } else {
- LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
- LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
- LLVMValueRef tmp;
-
- tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
- tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
- return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
- }
-}
-
-/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
- * adjust "index" to point to FMASK. */
-LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
- LLVMValueRef list, LLVMValueRef index,
- enum ac_descriptor_type desc_type,
- bool uses_store, bool bindless)
-{
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef rsrc;
-
- if (desc_type == AC_DESC_BUFFER) {
- index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
- ctx->i32_1);
- list = LLVMBuildPointerCast(builder, list,
- ac_array_in_const32_addr_space(ctx->v4i32), "");
- } else {
- assert(desc_type == AC_DESC_IMAGE ||
- desc_type == AC_DESC_FMASK);
- }
-
- if (bindless)
- rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index);
- else
- rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
-
- if (desc_type == AC_DESC_IMAGE && uses_store)
- rsrc = force_dcc_off(ctx, rsrc);
- return rsrc;
-}
-
-/**
- * Load the resource descriptor for \p image.
- */
-static void
-image_fetch_rsrc(
- struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_src_register *image,
- bool fmask, bool is_store, unsigned target,
- LLVMValueRef *rsrc)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- bool bindless = image->Register.File != TGSI_FILE_IMAGE;
- LLVMValueRef rsrc_ptr, index;
-
- if (bindless) {
- /* Bindless descriptors are accessible from a different pair of
- * user SGPR indices.
- */
- rsrc_ptr = ac_get_arg(&ctx->ac,
- ctx->bindless_samplers_and_images);
- index = lp_build_emit_fetch_src(bld_base, image, TGSI_TYPE_UNSIGNED, 0);
-
- /* Bindless image descriptors use 16-dword slots. */
- index = LLVMBuildMul(ctx->ac.builder, index,
- LLVMConstInt(ctx->i32, 2, 0), "");
- /* FMASK is right after the image. */
- if (fmask)
- index = LLVMBuildAdd(ctx->ac.builder, index, ctx->i32_1, "");
- } else {
- rsrc_ptr = ac_get_arg(&ctx->ac, ctx->samplers_and_images);
-
- if (!image->Register.Indirect) {
- index = LLVMConstInt(ctx->i32, image->Register.Index, 0);
- } else {
- /* From the GL_ARB_shader_image_load_store extension spec:
- *
- * If a shader performs an image load, store, or atomic
- * operation using an image variable declared as an array,
- * and if the index used to select an individual element is
- * negative or greater than or equal to the size of the
- * array, the results of the operation are undefined but may
- * not lead to termination.
- */
- index = si_get_bounded_indirect_index(ctx, &image->Indirect,
- image->Register.Index,
- ctx->num_images);
- }
- /* FMASKs are separate from images. */
- if (fmask) {
- index = LLVMBuildAdd(ctx->ac.builder, index,
- LLVMConstInt(ctx->i32, SI_NUM_IMAGES, 0), "");
- }
- index = LLVMBuildSub(ctx->ac.builder,
- LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS - 1, 0),
- index, "");
- }
-
- *rsrc = si_load_image_desc(ctx, rsrc_ptr, index,
- fmask ? AC_DESC_FMASK :
- target == TGSI_TEXTURE_BUFFER ? AC_DESC_BUFFER : AC_DESC_IMAGE,
- is_store, bindless);
-}
-
-static void image_fetch_coords(
- struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_instruction *inst,
- unsigned src, LLVMValueRef desc,
- LLVMValueRef *coords)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMBuilderRef builder = ctx->ac.builder;
- unsigned target = inst->Memory.Texture;
- unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
- LLVMValueRef tmp;
- int chan;
-
- for (chan = 0; chan < num_coords; ++chan) {
- tmp = lp_build_emit_fetch(bld_base, inst, src, chan);
- tmp = ac_to_integer(&ctx->ac, tmp);
- coords[chan] = tmp;
- }
-
- if (target == TGSI_TEXTURE_2D_MSAA ||
- target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
- /* Need the sample index as well. */
- tmp = lp_build_emit_fetch(bld_base, inst, src, TGSI_SWIZZLE_W);
- coords[chan] = ac_to_integer(&ctx->ac, tmp);
- }
-
- if (ctx->screen->info.chip_class == GFX9) {
- /* 1D textures are allocated and used as 2D on GFX9. */
- if (target == TGSI_TEXTURE_1D) {
- coords[1] = ctx->i32_0;
- } else if (target == TGSI_TEXTURE_1D_ARRAY) {
- coords[2] = coords[1];
- coords[1] = ctx->i32_0;
- } else if (target == TGSI_TEXTURE_2D) {
- /* The hw can't bind a slice of a 3D image as a 2D
- * image, because it ignores BASE_ARRAY if the target
- * is 3D. The workaround is to read BASE_ARRAY and set
- * it as the 3rd address operand for all 2D images.
- */
- LLVMValueRef first_layer, const5, mask;
-
- const5 = LLVMConstInt(ctx->i32, 5, 0);
- mask = LLVMConstInt(ctx->i32, S_008F24_BASE_ARRAY(~0), 0);
- first_layer = LLVMBuildExtractElement(builder, desc, const5, "");
- first_layer = LLVMBuildAnd(builder, first_layer, mask, "");
-
- coords[2] = first_layer;
- }
- }
-}
-
-static unsigned get_cache_policy(struct si_shader_context *ctx,
- const struct tgsi_full_instruction *inst,
- bool atomic, bool may_store_unaligned,
- bool writeonly_memory)
-{
- unsigned cache_policy = 0;
-
- if (!atomic &&
- /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores.
- * All store opcodes not aligned to a dword are affected.
- * The only way to get unaligned stores in radeonsi is through
- * shader images. */
- ((may_store_unaligned && ctx->screen->info.chip_class == GFX6) ||
- /* If this is write-only, don't keep data in L1 to prevent
- * evicting L1 cache lines that may be needed by other
- * instructions. */
- writeonly_memory ||
- inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE))) {
- cache_policy |= ac_glc;
- }
-
- if (inst->Memory.Qualifier & TGSI_MEMORY_STREAM_CACHE_POLICY)
- cache_policy |= ac_slc;
-
- return cache_policy;
-}
-
-static LLVMValueRef get_memory_ptr(struct si_shader_context *ctx,
- const struct tgsi_full_instruction *inst,
- LLVMTypeRef type, int arg)
-{
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef offset, ptr;
- int addr_space;
-
- offset = lp_build_emit_fetch(&ctx->bld_base, inst, arg, 0);
- offset = ac_to_integer(&ctx->ac, offset);
-
- ptr = ctx->ac.lds;
- ptr = LLVMBuildGEP(builder, ptr, &offset, 1, "");
- addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
- ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, addr_space), "");
-
- return ptr;
-}
-
-static void load_emit_memory(
- struct si_shader_context *ctx,
- struct lp_build_emit_data *emit_data)
-{
- const struct tgsi_full_instruction *inst = emit_data->inst;
- unsigned writemask = inst->Dst[0].Register.WriteMask;
- LLVMValueRef channels[4], ptr, derived_ptr, index;
- int chan;
-
- ptr = get_memory_ptr(ctx, inst, ctx->f32, 1);
-
- for (chan = 0; chan < 4; ++chan) {
- if (!(writemask & (1 << chan))) {
- channels[chan] = LLVMGetUndef(ctx->f32);
- continue;
- }
-
- index = LLVMConstInt(ctx->i32, chan, 0);
- derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
- channels[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
- }
- emit_data->output[emit_data->chan] = ac_build_gather_values(&ctx->ac, channels, 4);
-}
-
-/**
- * Return true if the memory accessed by a LOAD or STORE instruction is
- * read-only or write-only, respectively.
- *
- * \param shader_buffers_reverse_access_mask
- * For LOAD, set this to (store | atomic) slot usage in the shader.
- * For STORE, set this to (load | atomic) slot usage in the shader.
- * \param images_reverse_access_mask Same as above, but for images.
- * \param bindless_buffer_reverse_access_mask Same as above, but for bindless image buffers.
- * \param bindless_image_reverse_access_mask Same as above, but for bindless images.
- */
-static bool is_oneway_access_only(const struct tgsi_full_instruction *inst,
- const struct tgsi_shader_info *info,
- unsigned shader_buffers_reverse_access_mask,
- unsigned images_reverse_access_mask,
- bool bindless_buffer_reverse_access_mask,
- bool bindless_image_reverse_access_mask)
-{
- enum tgsi_file_type resource_file;
- unsigned resource_index;
- bool resource_indirect;
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_STORE) {
- resource_file = inst->Dst[0].Register.File;
- resource_index = inst->Dst[0].Register.Index;
- resource_indirect = inst->Dst[0].Register.Indirect;
- } else {
- resource_file = inst->Src[0].Register.File;
- resource_index = inst->Src[0].Register.Index;
- resource_indirect = inst->Src[0].Register.Indirect;
- }
-
- assert(resource_file == TGSI_FILE_BUFFER ||
- resource_file == TGSI_FILE_IMAGE ||
- /* bindless image */
- resource_file == TGSI_FILE_INPUT ||
- resource_file == TGSI_FILE_OUTPUT ||
- resource_file == TGSI_FILE_CONSTANT ||
- resource_file == TGSI_FILE_TEMPORARY ||
- resource_file == TGSI_FILE_IMMEDIATE);
-
- assert(resource_file != TGSI_FILE_BUFFER ||
- inst->Memory.Texture == TGSI_TEXTURE_BUFFER);
-
- bool bindless = resource_file != TGSI_FILE_BUFFER &&
- resource_file != TGSI_FILE_IMAGE;
-
- /* RESTRICT means NOALIAS.
- * If there are no writes, we can assume the accessed memory is read-only.
- * If there are no reads, we can assume the accessed memory is write-only.
- */
- if (inst->Memory.Qualifier & TGSI_MEMORY_RESTRICT && !bindless) {
- unsigned reverse_access_mask;
-
- if (resource_file == TGSI_FILE_BUFFER) {
- reverse_access_mask = shader_buffers_reverse_access_mask;
- } else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
- reverse_access_mask = info->images_buffers &
- images_reverse_access_mask;
- } else {
- reverse_access_mask = ~info->images_buffers &
- images_reverse_access_mask;
- }
-
- if (resource_indirect) {
- if (!reverse_access_mask)
- return true;
- } else {
- if (!(reverse_access_mask &
- (1u << resource_index)))
- return true;
- }
- }
-
- /* If there are no buffer writes (for both shader buffers & image
- * buffers), it implies that buffer memory is read-only.
- * If there are no buffer reads (for both shader buffers & image
- * buffers), it implies that buffer memory is write-only.
- *
- * Same for the case when there are no writes/reads for non-buffer
- * images.
- */
- if (resource_file == TGSI_FILE_BUFFER ||
- inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
- if (!shader_buffers_reverse_access_mask &&
- !(info->images_buffers & images_reverse_access_mask) &&
- !bindless_buffer_reverse_access_mask)
- return true;
- } else {
- if (!(~info->images_buffers & images_reverse_access_mask) &&
- !bindless_image_reverse_access_mask)
- return true;
- }
- return false;
-}
-
-static void load_emit(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- const struct tgsi_full_instruction * inst = emit_data->inst;
- const struct tgsi_shader_info *info = &ctx->shader->selector->info;
- bool can_speculate = false;
- LLVMValueRef vindex = ctx->i32_0;
- LLVMValueRef voffset = ctx->i32_0;
- struct ac_image_args args = {};
-
- if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
- load_emit_memory(ctx, emit_data);
- return;
- }
-
- if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
- inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) {
- bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF;
- args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo);
- voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 1, 0));
- } else {
- unsigned target = inst->Memory.Texture;
-
- image_fetch_rsrc(bld_base, &inst->Src[0], false, false, target, &args.resource);
- image_fetch_coords(bld_base, inst, 1, args.resource, args.coords);
-
- if ((inst->Memory.Texture == TGSI_TEXTURE_2D_MSAA ||
- inst->Memory.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA) &&
- !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
- LLVMValueRef fmask;
-
- image_fetch_rsrc(bld_base, &inst->Src[0], true, false, target, &fmask);
- ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords,
- inst->Memory.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA);
- }
- vindex = args.coords[0]; /* for buffers only */
- }
-
- if (inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) {
- emit_data->output[emit_data->chan] =
- ac_build_buffer_load(&ctx->ac, args.resource,
- util_last_bit(inst->Dst[0].Register.WriteMask),
- NULL, voffset, NULL, 0, 0, true, true);
- return;
- }
-
- if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
- ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE);
-
- can_speculate = !(inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) &&
- is_oneway_access_only(inst, info,
- info->shader_buffers_store |
- info->shader_buffers_atomic,
- info->images_store |
- info->images_atomic,
- info->uses_bindless_buffer_store |
- info->uses_bindless_buffer_atomic,
- info->uses_bindless_image_store |
- info->uses_bindless_image_atomic);
- args.cache_policy = get_cache_policy(ctx, inst, false, false, false);
-
- if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
- /* Don't use SMEM for shader buffer loads, because LLVM doesn't
- * select SMEM for SI.load.const with a non-constant offset, and
- * constant offsets practically don't exist with shader buffers.
- *
- * Also, SI.load.const doesn't use inst_offset when it's lowered
- * to VMEM, so we just end up with more VALU instructions in the end
- * and no benefit.
- *
- * TODO: Remove this line once LLVM can select SMEM with a non-constant
- * offset, and can derive inst_offset when VMEM is selected.
- * After that, si_memory_barrier should invalidate sL1 for shader
- * buffers.
- */
- emit_data->output[emit_data->chan] =
- ac_build_buffer_load(&ctx->ac, args.resource,
- util_last_bit(inst->Dst[0].Register.WriteMask),
- NULL, voffset, NULL, 0,
- args.cache_policy, can_speculate, false);
- return;
- }
-
- if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
- unsigned num_channels = util_last_bit(inst->Dst[0].Register.WriteMask);
- LLVMValueRef result =
- ac_build_buffer_load_format(&ctx->ac,
- args.resource,
- vindex,
- ctx->i32_0,
- num_channels,
- args.cache_policy,
- can_speculate);
- emit_data->output[emit_data->chan] =
- ac_build_expand_to_vec4(&ctx->ac, result, num_channels);
- } else {
- args.opcode = ac_image_load;
- args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
- args.attributes = ac_get_load_intr_attribs(can_speculate);
- args.dmask = 0xf;
-
- emit_data->output[emit_data->chan] =
- ac_build_image_opcode(&ctx->ac, &args);
- }
-}
-
-static void store_emit_buffer(struct si_shader_context *ctx,
- LLVMValueRef resource,
- unsigned writemask,
- LLVMValueRef value,
- LLVMValueRef voffset,
- unsigned cache_policy,
- bool writeonly_memory)
-{
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef base_data = value;
- LLVMValueRef base_offset = voffset;
-
- while (writemask) {
- int start, count;
- LLVMValueRef data, voff;
-
- u_bit_scan_consecutive_range(&writemask, &start, &count);
-
- if (count == 3 && ac_has_vec3_support(ctx->ac.chip_class, false)) {
- LLVMValueRef values[3] = {
- LLVMBuildExtractElement(builder, base_data,
- LLVMConstInt(ctx->i32, start, 0), ""),
- LLVMBuildExtractElement(builder, base_data,
- LLVMConstInt(ctx->i32, start + 1, 0), ""),
- LLVMBuildExtractElement(builder, base_data,
- LLVMConstInt(ctx->i32, start + 2, 0), ""),
- };
- data = ac_build_gather_values(&ctx->ac, values, 3);
- } else if (count >= 3) {
- data = base_data;
- } else if (count == 2) {
- LLVMValueRef values[2] = {
- LLVMBuildExtractElement(builder, base_data,
- LLVMConstInt(ctx->i32, start, 0), ""),
- LLVMBuildExtractElement(builder, base_data,
- LLVMConstInt(ctx->i32, start + 1, 0), ""),
- };
-
- data = ac_build_gather_values(&ctx->ac, values, 2);
- } else {
- assert(count == 1);
- data = LLVMBuildExtractElement(
- builder, base_data,
- LLVMConstInt(ctx->i32, start, 0), "");
- }
-
- voff = base_offset;
- if (start != 0) {
- voff = LLVMBuildAdd(
- builder, voff,
- LLVMConstInt(ctx->i32, start * 4, 0), "");
- }
-
- ac_build_buffer_store_dword(&ctx->ac, resource, data, count,
- voff, ctx->i32_0, 0, cache_policy);
- }
-}
-
-static void store_emit_memory(
- struct si_shader_context *ctx,
- struct lp_build_emit_data *emit_data)
-{
- const struct tgsi_full_instruction *inst = emit_data->inst;
- LLVMBuilderRef builder = ctx->ac.builder;
- unsigned writemask = inst->Dst[0].Register.WriteMask;
- LLVMValueRef ptr, derived_ptr, data, index;
- int chan;
-
- ptr = get_memory_ptr(ctx, inst, ctx->f32, 0);
-
- for (chan = 0; chan < 4; ++chan) {
- if (!(writemask & (1 << chan))) {
- continue;
- }
- data = lp_build_emit_fetch(&ctx->bld_base, inst, 1, chan);
- index = LLVMConstInt(ctx->i32, chan, 0);
- derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
- LLVMBuildStore(builder, data, derived_ptr);
- }
-}
-
-static void store_emit(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- const struct tgsi_full_instruction * inst = emit_data->inst;
- const struct tgsi_shader_info *info = &ctx->shader->selector->info;
- struct tgsi_full_src_register resource_reg =
- tgsi_full_src_register_from_dst(&inst->Dst[0]);
- unsigned target = inst->Memory.Texture;
-
- if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
- store_emit_memory(ctx, emit_data);
- return;
- }
-
- bool writeonly_memory = is_oneway_access_only(inst, info,
- info->shader_buffers_load |
- info->shader_buffers_atomic,
- info->images_load |
- info->images_atomic,
- info->uses_bindless_buffer_load |
- info->uses_bindless_buffer_atomic,
- info->uses_bindless_image_load |
- info->uses_bindless_image_atomic);
- LLVMValueRef chans[4];
- LLVMValueRef vindex = ctx->i32_0;
- LLVMValueRef voffset = ctx->i32_0;
- struct ac_image_args args = {};
-
- for (unsigned chan = 0; chan < 4; ++chan)
- chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
-
- if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
- args.resource = shader_buffer_fetch_rsrc(ctx, &resource_reg, false);
- voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 0, 0));
- } else {
- image_fetch_rsrc(bld_base, &resource_reg, false, true, target, &args.resource);
- image_fetch_coords(bld_base, inst, 0, args.resource, args.coords);
- vindex = args.coords[0]; /* for buffers only */
- }
-
- if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
- ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE);
-
- bool is_image = inst->Dst[0].Register.File != TGSI_FILE_BUFFER;
- args.cache_policy = get_cache_policy(ctx, inst,
- false, /* atomic */
- is_image, /* may_store_unaligned */
- writeonly_memory);
-
- if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
- store_emit_buffer(ctx, args.resource, inst->Dst[0].Register.WriteMask,
- ac_build_gather_values(&ctx->ac, chans, 4),
- voffset, args.cache_policy, writeonly_memory);
- return;
- }
-
- if (target == TGSI_TEXTURE_BUFFER) {
- unsigned num_channels = util_last_bit(inst->Dst[0].Register.WriteMask);
-
- ac_build_buffer_store_format(&ctx->ac, args.resource,
- ac_build_gather_values(&ctx->ac, chans, num_channels),
- vindex, ctx->i32_0 /* voffset */,
- num_channels,
- args.cache_policy);
- } else {
- args.opcode = ac_image_store;
- args.data[0] = ac_build_gather_values(&ctx->ac, chans, 4);
- args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
- args.attributes = AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY;
- args.dmask = 0xf;
-
- emit_data->output[emit_data->chan] =
- ac_build_image_opcode(&ctx->ac, &args);
- }
-}
-
-static void atomic_emit_memory(struct si_shader_context *ctx,
- struct lp_build_emit_data *emit_data) {
- LLVMBuilderRef builder = ctx->ac.builder;
- const struct tgsi_full_instruction * inst = emit_data->inst;
- LLVMValueRef ptr, result, arg;
- const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup";
-
- ptr = get_memory_ptr(ctx, inst, ctx->i32, 1);
-
- arg = lp_build_emit_fetch(&ctx->bld_base, inst, 2, 0);
- arg = ac_to_integer(&ctx->ac, arg);
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
- LLVMValueRef new_data;
- new_data = lp_build_emit_fetch(&ctx->bld_base,
- inst, 3, 0);
-
- new_data = ac_to_integer(&ctx->ac, new_data);
-
- result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, arg, new_data,
- sync_scope);
- result = LLVMBuildExtractValue(builder, result, 0, "");
- } else {
- LLVMAtomicRMWBinOp op;
-
- switch(inst->Instruction.Opcode) {
- case TGSI_OPCODE_ATOMUADD:
- op = LLVMAtomicRMWBinOpAdd;
- break;
- case TGSI_OPCODE_ATOMXCHG:
- op = LLVMAtomicRMWBinOpXchg;
- break;
- case TGSI_OPCODE_ATOMAND:
- op = LLVMAtomicRMWBinOpAnd;
- break;
- case TGSI_OPCODE_ATOMOR:
- op = LLVMAtomicRMWBinOpOr;
- break;
- case TGSI_OPCODE_ATOMXOR:
- op = LLVMAtomicRMWBinOpXor;
- break;
- case TGSI_OPCODE_ATOMUMIN:
- op = LLVMAtomicRMWBinOpUMin;
- break;
- case TGSI_OPCODE_ATOMUMAX:
- op = LLVMAtomicRMWBinOpUMax;
- break;
- case TGSI_OPCODE_ATOMIMIN:
- op = LLVMAtomicRMWBinOpMin;
- break;
- case TGSI_OPCODE_ATOMIMAX:
- op = LLVMAtomicRMWBinOpMax;
- break;
- default:
- unreachable("unknown atomic opcode");
- }
-
- result = ac_build_atomic_rmw(&ctx->ac, op, ptr, arg, sync_scope);
- }
- emit_data->output[emit_data->chan] =
- LLVMBuildBitCast(builder, result, ctx->f32, "");
-}
-
-static void atomic_emit(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- const struct tgsi_full_instruction * inst = emit_data->inst;
- struct ac_image_args args = {};
- unsigned num_data = 0;
- LLVMValueRef vindex = ctx->i32_0;
- LLVMValueRef voffset = ctx->i32_0;
-
- if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
- atomic_emit_memory(ctx, emit_data);
- return;
- }
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
- /* llvm.amdgcn.image/buffer.atomic.cmpswap reflect the hardware order
- * of arguments, which is reversed relative to TGSI (and GLSL)
- */
- args.data[num_data++] =
- ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 3, 0));
- }
-
- args.data[num_data++] =
- ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 2, 0));
-
- args.cache_policy = get_cache_policy(ctx, inst, true, false, false);
-
- if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
- args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false);
- voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 1, 0));
- } else {
- image_fetch_rsrc(bld_base, &inst->Src[0], false, true,
- inst->Memory.Texture, &args.resource);
- image_fetch_coords(bld_base, inst, 1, args.resource, args.coords);
- vindex = args.coords[0]; /* for buffers only */
- }
-
- if (inst->Src[0].Register.File != TGSI_FILE_BUFFER &&
- inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
- LLVMValueRef buf_args[7];
- unsigned num_args = 0;
-
- buf_args[num_args++] = args.data[0];
- if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
- buf_args[num_args++] = args.data[1];
-
- buf_args[num_args++] = args.resource;
- buf_args[num_args++] = vindex;
- buf_args[num_args++] = voffset;
- buf_args[num_args++] = ctx->i32_0; /* soffset */
- buf_args[num_args++] = LLVMConstInt(ctx->i32, args.cache_policy & ac_slc, 0);
-
- char intrinsic_name[64];
- snprintf(intrinsic_name, sizeof(intrinsic_name),
- "llvm.amdgcn.struct.buffer.atomic.%s", action->intr_name);
- emit_data->output[emit_data->chan] =
- ac_to_float(&ctx->ac,
- ac_build_intrinsic(&ctx->ac, intrinsic_name,
- ctx->i32, buf_args, num_args, 0));
- return;
- }
-
- if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
- LLVMValueRef buf_args[7];
- unsigned num_args = 0;
-
- buf_args[num_args++] = args.data[0];
- if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
- buf_args[num_args++] = args.data[1];
-
- buf_args[num_args++] = args.resource;
- buf_args[num_args++] = vindex;
- buf_args[num_args++] = voffset;
- buf_args[num_args++] = args.cache_policy & ac_slc ? ctx->i1true : ctx->i1false;
-
- char intrinsic_name[40];
- snprintf(intrinsic_name, sizeof(intrinsic_name),
- "llvm.amdgcn.buffer.atomic.%s", action->intr_name);
- emit_data->output[emit_data->chan] =
- ac_to_float(&ctx->ac,
- ac_build_intrinsic(&ctx->ac, intrinsic_name,
- ctx->i32, buf_args, num_args, 0));
- } else {
- if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
- args.opcode = ac_image_atomic_cmpswap;
- } else {
- args.opcode = ac_image_atomic;
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_ATOMXCHG: args.atomic = ac_atomic_swap; break;
- case TGSI_OPCODE_ATOMUADD: args.atomic = ac_atomic_add; break;
- case TGSI_OPCODE_ATOMAND: args.atomic = ac_atomic_and; break;
- case TGSI_OPCODE_ATOMOR: args.atomic = ac_atomic_or; break;
- case TGSI_OPCODE_ATOMXOR: args.atomic = ac_atomic_xor; break;
- case TGSI_OPCODE_ATOMUMIN: args.atomic = ac_atomic_umin; break;
- case TGSI_OPCODE_ATOMUMAX: args.atomic = ac_atomic_umax; break;
- case TGSI_OPCODE_ATOMIMIN: args.atomic = ac_atomic_smin; break;
- case TGSI_OPCODE_ATOMIMAX: args.atomic = ac_atomic_smax; break;
- case TGSI_OPCODE_ATOMINC_WRAP:
- args.atomic = ac_atomic_inc_wrap;
- break;
- case TGSI_OPCODE_ATOMDEC_WRAP:
- args.atomic = ac_atomic_dec_wrap;
- break;
- default: unreachable("unhandled image atomic");
- }
- }
-
- args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
- emit_data->output[emit_data->chan] =
- ac_to_float(&ctx->ac, ac_build_image_opcode(&ctx->ac, &args));
- }
-}
-
-static LLVMValueRef fix_resinfo(struct si_shader_context *ctx,
- unsigned target, LLVMValueRef out)
-{
- LLVMBuilderRef builder = ctx->ac.builder;
-
- /* 1D textures are allocated and used as 2D on GFX9. */
- if (ctx->screen->info.chip_class == GFX9 &&
- (target == TGSI_TEXTURE_1D_ARRAY ||
- target == TGSI_TEXTURE_SHADOW1D_ARRAY)) {
- LLVMValueRef layers =
- LLVMBuildExtractElement(builder, out,
- LLVMConstInt(ctx->i32, 2, 0), "");
- out = LLVMBuildInsertElement(builder, out, layers,
- ctx->i32_1, "");
- }
-
- /* Divide the number of layers by 6 to get the number of cubes. */
- if (target == TGSI_TEXTURE_CUBE_ARRAY ||
- target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
- LLVMValueRef imm2 = LLVMConstInt(ctx->i32, 2, 0);
-
- LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, "");
- z = LLVMBuildSDiv(builder, z, LLVMConstInt(ctx->i32, 6, 0), "");
-
- out = LLVMBuildInsertElement(builder, out, z, imm2, "");
- }
- return out;
-}
-
-static void resq_emit(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMBuilderRef builder = ctx->ac.builder;
- const struct tgsi_full_instruction *inst = emit_data->inst;
- const struct tgsi_full_src_register *reg =
- &inst->Src[inst->Instruction.Opcode == TGSI_OPCODE_TXQ ? 1 : 0];
-
- if (reg->Register.File == TGSI_FILE_BUFFER) {
- LLVMValueRef rsrc = shader_buffer_fetch_rsrc(ctx, reg, false);
-
- emit_data->output[emit_data->chan] =
- LLVMBuildExtractElement(builder, rsrc,
- LLVMConstInt(ctx->i32, 2, 0), "");
- return;
- }
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ &&
- inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
- LLVMValueRef rsrc;
-
- tex_fetch_ptrs(bld_base, emit_data, &rsrc, NULL, NULL);
- /* Read the size from the buffer descriptor directly. */
- emit_data->output[emit_data->chan] =
- get_buffer_size(bld_base, rsrc);
- return;
- }
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_RESQ &&
- inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
- LLVMValueRef rsrc;
-
- image_fetch_rsrc(bld_base, reg, false, false, inst->Memory.Texture, &rsrc);
- emit_data->output[emit_data->chan] =
- get_buffer_size(bld_base, rsrc);
- return;
- }
-
- unsigned target;
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) {
- target = inst->Texture.Texture;
- } else {
- if (inst->Memory.Texture == TGSI_TEXTURE_3D)
- target = TGSI_TEXTURE_2D_ARRAY;
- else
- target = inst->Memory.Texture;
- }
-
- struct ac_image_args args = {};
- args.opcode = ac_image_get_resinfo;
- args.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target);
- args.dmask = 0xf;
- args.attributes = AC_FUNC_ATTR_READNONE;
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) {
- tex_fetch_ptrs(bld_base, emit_data, &args.resource, NULL, NULL);
- args.lod = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
- } else {
- image_fetch_rsrc(bld_base, reg, false, false, target, &args.resource);
- args.lod = ctx->i32_0;
- }
-
- emit_data->output[emit_data->chan] =
- fix_resinfo(ctx, target, ac_build_image_opcode(&ctx->ac, &args));
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_RESQ &&
- (target == TGSI_TEXTURE_2D_MSAA ||
- target == TGSI_TEXTURE_2D_ARRAY_MSAA)) {
- LLVMValueRef samples =
- ac_build_image_get_sample_count(&ctx->ac, args.resource);
-
- emit_data->output[emit_data->chan] =
- LLVMBuildInsertElement(ctx->ac.builder,
- emit_data->output[emit_data->chan],
- samples,
- LLVMConstInt(ctx->i32, 3, 0), "");
- }
-}
-
-/**
- * Load an image view, fmask view. or sampler state descriptor.
- */
-LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
- LLVMValueRef list, LLVMValueRef index,
- enum ac_descriptor_type type)
-{
- LLVMBuilderRef builder = ctx->ac.builder;
-
- switch (type) {
- case AC_DESC_IMAGE:
- /* The image is at [0:7]. */
- index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
- break;
- case AC_DESC_BUFFER:
- /* The buffer is in [4:7]. */
- index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
- ctx->i32_1);
- list = LLVMBuildPointerCast(builder, list,
- ac_array_in_const32_addr_space(ctx->v4i32), "");
- break;
- case AC_DESC_FMASK:
- /* The FMASK is at [8:15]. */
- index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
- ctx->i32_1);
- break;
- case AC_DESC_SAMPLER:
- /* The sampler state is at [12:15]. */
- index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
- LLVMConstInt(ctx->i32, 3, 0));
- list = LLVMBuildPointerCast(builder, list,
- ac_array_in_const32_addr_space(ctx->v4i32), "");
- break;
- case AC_DESC_PLANE_0:
- case AC_DESC_PLANE_1:
- case AC_DESC_PLANE_2:
- /* Only used for the multiplane image support for Vulkan. Should
- * never be reached in radeonsi.
- */
- unreachable("Plane descriptor requested in radeonsi.");
- }
-
- return ac_build_load_to_sgpr(&ctx->ac, list, index);
-}
-
-/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
- *
- * GFX6-GFX7:
- * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
- * filtering manually. The driver sets img7 to a mask clearing
- * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
- * s_and_b32 samp0, samp0, img7
- *
- * GFX8:
- * The ANISO_OVERRIDE sampler field enables this fix in TA.
- */
-static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx,
- LLVMValueRef res, LLVMValueRef samp)
-{
- LLVMValueRef img7, samp0;
-
- if (ctx->screen->info.chip_class >= GFX8)
- return samp;
-
- img7 = LLVMBuildExtractElement(ctx->ac.builder, res,
- LLVMConstInt(ctx->i32, 7, 0), "");
- samp0 = LLVMBuildExtractElement(ctx->ac.builder, samp,
- ctx->i32_0, "");
- samp0 = LLVMBuildAnd(ctx->ac.builder, samp0, img7, "");
- return LLVMBuildInsertElement(ctx->ac.builder, samp, samp0,
- ctx->i32_0, "");
-}
-
-static void tex_fetch_ptrs(struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data,
- LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
- LLVMValueRef *fmask_ptr)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->samplers_and_images);
- const struct tgsi_full_instruction *inst = emit_data->inst;
- const struct tgsi_full_src_register *reg;
- unsigned target = inst->Texture.Texture;
- unsigned sampler_src;
- LLVMValueRef index;
-
- sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
- reg = &emit_data->inst->Src[sampler_src];
-
- if (reg->Register.Indirect) {
- index = si_get_bounded_indirect_index(ctx,
- &reg->Indirect,
- reg->Register.Index,
- ctx->num_samplers);
- index = LLVMBuildAdd(ctx->ac.builder, index,
- LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS / 2, 0), "");
- } else {
- index = LLVMConstInt(ctx->i32,
- si_get_sampler_slot(reg->Register.Index), 0);
- }
-
- if (reg->Register.File != TGSI_FILE_SAMPLER) {
- /* Bindless descriptors are accessible from a different pair of
- * user SGPR indices.
- */
- list = ac_get_arg(&ctx->ac, ctx->bindless_samplers_and_images);
- index = lp_build_emit_fetch_src(bld_base, reg,
- TGSI_TYPE_UNSIGNED, 0);
-
- /* Since bindless handle arithmetic can contain an unsigned integer
- * wraparound and si_load_sampler_desc assumes there isn't any,
- * use GEP without "inbounds" (inside ac_build_pointer_add)
- * to prevent incorrect code generation and hangs.
- */
- index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
- list = ac_build_pointer_add(&ctx->ac, list, index);
- index = ctx->i32_0;
- }
-
- if (target == TGSI_TEXTURE_BUFFER)
- *res_ptr = si_load_sampler_desc(ctx, list, index, AC_DESC_BUFFER);
- else
- *res_ptr = si_load_sampler_desc(ctx, list, index, AC_DESC_IMAGE);
-
- if (samp_ptr)
- *samp_ptr = NULL;
- if (fmask_ptr)
- *fmask_ptr = NULL;
-
- if (target == TGSI_TEXTURE_2D_MSAA ||
- target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
- if (fmask_ptr)
- *fmask_ptr = si_load_sampler_desc(ctx, list, index,
- AC_DESC_FMASK);
- } else if (target != TGSI_TEXTURE_BUFFER) {
- if (samp_ptr) {
- *samp_ptr = si_load_sampler_desc(ctx, list, index,
- AC_DESC_SAMPLER);
- *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
- }
- }
-}
-
-/* Gather4 should follow the same rules as bilinear filtering, but the hardware
- * incorrectly forces nearest filtering if the texture format is integer.
- * The only effect it has on Gather4, which always returns 4 texels for
- * bilinear filtering, is that the final coordinates are off by 0.5 of
- * the texel size.
- *
- * The workaround is to subtract 0.5 from the unnormalized coordinates,
- * or (0.5 / size) from the normalized coordinates.
- *
- * However, cube textures with 8_8_8_8 data formats require a different
- * workaround of overriding the num format to USCALED/SSCALED. This would lose
- * precision in 32-bit data formats, so it needs to be applied dynamically at
- * runtime. In this case, return an i1 value that indicates whether the
- * descriptor was overridden (and hence a fixup of the sampler result is needed).
- */
-static LLVMValueRef
-si_lower_gather4_integer(struct si_shader_context *ctx,
- struct ac_image_args *args,
- unsigned target,
- enum tgsi_return_type return_type)
-{
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef wa_8888 = NULL;
- LLVMValueRef half_texel[2];
-
- assert(return_type == TGSI_RETURN_TYPE_SINT ||
- return_type == TGSI_RETURN_TYPE_UINT);
-
- if (target == TGSI_TEXTURE_CUBE ||
- target == TGSI_TEXTURE_CUBE_ARRAY) {
- LLVMValueRef formats;
- LLVMValueRef data_format;
- LLVMValueRef wa_formats;
-
- formats = LLVMBuildExtractElement(builder, args->resource, ctx->i32_1, "");
-
- data_format = LLVMBuildLShr(builder, formats,
- LLVMConstInt(ctx->i32, 20, false), "");
- data_format = LLVMBuildAnd(builder, data_format,
- LLVMConstInt(ctx->i32, (1u << 6) - 1, false), "");
- wa_8888 = LLVMBuildICmp(
- builder, LLVMIntEQ, data_format,
- LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false),
- "");
-
- uint32_t wa_num_format =
- return_type == TGSI_RETURN_TYPE_UINT ?
- S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_USCALED) :
- S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_SSCALED);
- wa_formats = LLVMBuildAnd(builder, formats,
- LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false),
- "");
- wa_formats = LLVMBuildOr(builder, wa_formats,
- LLVMConstInt(ctx->i32, wa_num_format, false), "");
-
- formats = LLVMBuildSelect(builder, wa_8888, wa_formats, formats, "");
- args->resource = LLVMBuildInsertElement(
- builder, args->resource, formats, ctx->i32_1, "");
- }
-
- if (target == TGSI_TEXTURE_RECT ||
- target == TGSI_TEXTURE_SHADOWRECT) {
- assert(!wa_8888);
- half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
- } else {
- struct ac_image_args resinfo = {};
- struct lp_build_if_state if_ctx;
-
- if (wa_8888) {
- /* Skip the texture size query entirely if we don't need it. */
- lp_build_if(&if_ctx, &ctx->gallivm, LLVMBuildNot(builder, wa_8888, ""));
- }
-
- /* Query the texture size. */
- resinfo.opcode = ac_image_get_resinfo;
- resinfo.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target);
- resinfo.resource = args->resource;
- resinfo.sampler = args->sampler;
- resinfo.lod = ctx->ac.i32_0;
- resinfo.dmask = 0xf;
- resinfo.attributes = AC_FUNC_ATTR_READNONE;
-
- LLVMValueRef texsize =
- fix_resinfo(ctx, target,
- ac_build_image_opcode(&ctx->ac, &resinfo));
-
- /* Compute -0.5 / size. */
- for (unsigned c = 0; c < 2; c++) {
- half_texel[c] =
- LLVMBuildExtractElement(builder, texsize,
- LLVMConstInt(ctx->i32, c, 0), "");
- half_texel[c] = LLVMBuildUIToFP(builder, half_texel[c], ctx->f32, "");
- half_texel[c] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, half_texel[c]);
- half_texel[c] = LLVMBuildFMul(builder, half_texel[c],
- LLVMConstReal(ctx->f32, -0.5), "");
- }
-
- if (wa_8888) {
- lp_build_endif(&if_ctx);
-
- LLVMBasicBlockRef bb[2] = { if_ctx.true_block, if_ctx.entry_block };
-
- for (unsigned c = 0; c < 2; c++) {
- LLVMValueRef values[2] = { half_texel[c], ctx->ac.f32_0 };
- half_texel[c] = ac_build_phi(&ctx->ac, ctx->f32, 2,
- values, bb);
- }
- }
- }
-
- for (unsigned c = 0; c < 2; c++) {
- LLVMValueRef tmp;
- tmp = ac_to_float(&ctx->ac, args->coords[c]);
- tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], "");
- args->coords[c] = ac_to_integer(&ctx->ac, tmp);
- }
-
- return wa_8888;
-}
-
-/* The second half of the cube texture 8_8_8_8 integer workaround: adjust the
- * result after the gather operation.
- */
-static LLVMValueRef
-si_fix_gather4_integer_result(struct si_shader_context *ctx,
- LLVMValueRef result,
- enum tgsi_return_type return_type,
- LLVMValueRef wa)
-{
- LLVMBuilderRef builder = ctx->ac.builder;
-
- assert(return_type == TGSI_RETURN_TYPE_SINT ||
- return_type == TGSI_RETURN_TYPE_UINT);
-
- for (unsigned chan = 0; chan < 4; ++chan) {
- LLVMValueRef chanv = LLVMConstInt(ctx->i32, chan, false);
- LLVMValueRef value;
- LLVMValueRef wa_value;
-
- value = LLVMBuildExtractElement(builder, result, chanv, "");
-
- if (return_type == TGSI_RETURN_TYPE_UINT)
- wa_value = LLVMBuildFPToUI(builder, value, ctx->i32, "");
- else
- wa_value = LLVMBuildFPToSI(builder, value, ctx->i32, "");
- wa_value = ac_to_float(&ctx->ac, wa_value);
- value = LLVMBuildSelect(builder, wa, wa_value, value, "");
-
- result = LLVMBuildInsertElement(builder, result, value, chanv, "");
- }
-
- return result;
-}
-
-static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- const struct tgsi_full_instruction *inst = emit_data->inst;
- unsigned opcode = inst->Instruction.Opcode;
- unsigned target = inst->Texture.Texture;
- struct ac_image_args args = {};
- int ref_pos = tgsi_util_get_shadow_ref_src_index(target);
- unsigned chan;
- bool has_offset = inst->Texture.NumOffsets > 0;
- LLVMValueRef fmask_ptr = NULL;
-
- tex_fetch_ptrs(bld_base, emit_data, &args.resource, &args.sampler, &fmask_ptr);
-
- if (target == TGSI_TEXTURE_BUFFER) {
- LLVMValueRef vindex = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
- unsigned num_channels =
- util_last_bit(inst->Dst[0].Register.WriteMask);
- LLVMValueRef result =
- ac_build_buffer_load_format(&ctx->ac,
- args.resource,
- vindex,
- ctx->i32_0,
- num_channels, 0, true);
- emit_data->output[emit_data->chan] =
- ac_build_expand_to_vec4(&ctx->ac, result, num_channels);
- return;
- }
-
- /* Fetch and project texture coordinates */
- args.coords[3] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_W);
- for (chan = 0; chan < 3; chan++) {
- args.coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
- if (opcode == TGSI_OPCODE_TXP)
- args.coords[chan] = ac_build_fdiv(&ctx->ac,
- args.coords[chan], args.coords[3]);
- }
-
- if (opcode == TGSI_OPCODE_TXP)
- args.coords[3] = ctx->ac.f32_1;
-
- /* Pack offsets. */
- if (has_offset &&
- opcode != TGSI_OPCODE_TXF &&
- opcode != TGSI_OPCODE_TXF_LZ) {
- /* The offsets are six-bit signed integers packed like this:
- * X=[5:0], Y=[13:8], and Z=[21:16].
- */
- LLVMValueRef offset[3], pack;
-
- assert(inst->Texture.NumOffsets == 1);
-
- for (chan = 0; chan < 3; chan++) {
- offset[chan] = lp_build_emit_fetch_texoffset(bld_base, inst, 0, chan);
- offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
- LLVMConstInt(ctx->i32, 0x3f, 0), "");
- if (chan)
- offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
- LLVMConstInt(ctx->i32, chan*8, 0), "");
- }
-
- pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
- pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
- args.offset = pack;
- }
-
- /* Pack LOD bias value */
- if (opcode == TGSI_OPCODE_TXB)
- args.bias = args.coords[3];
- if (opcode == TGSI_OPCODE_TXB2)
- args.bias = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
-
- /* Pack depth comparison value */
- if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
- LLVMValueRef z;
-
- if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
- z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
- } else {
- assert(ref_pos >= 0);
- z = args.coords[ref_pos];
- }
-
- /* Section 8.23.1 (Depth Texture Comparison Mode) of the
- * OpenGL 4.5 spec says:
- *
- * "If the texture’s internal format indicates a fixed-point
- * depth texture, then D_t and D_ref are clamped to the
- * range [0, 1]; otherwise no clamping is performed."
- *
- * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
- * so the depth comparison value isn't clamped for Z16 and
- * Z24 anymore. Do it manually here for GFX8-9; GFX10 has
- * an explicitly clamped 32-bit float format.
- */
- if (ctx->screen->info.chip_class >= GFX8 &&
- ctx->screen->info.chip_class <= GFX9) {
- LLVMValueRef upgraded;
- LLVMValueRef clamped;
- upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler,
- LLVMConstInt(ctx->i32, 3, false), "");
- upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded,
- LLVMConstInt(ctx->i32, 29, false), "");
- upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->i1, "");
- clamped = ac_build_clamp(&ctx->ac, z);
- z = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, z, "");
- }
-
- args.compare = z;
- }
-
- /* Pack user derivatives */
- if (opcode == TGSI_OPCODE_TXD) {
- int param, num_src_deriv_channels, num_dst_deriv_channels;
-
- switch (target) {
- case TGSI_TEXTURE_3D:
- num_src_deriv_channels = 3;
- num_dst_deriv_channels = 3;
- break;
- case TGSI_TEXTURE_2D:
- case TGSI_TEXTURE_SHADOW2D:
- case TGSI_TEXTURE_RECT:
- case TGSI_TEXTURE_SHADOWRECT:
- case TGSI_TEXTURE_2D_ARRAY:
- case TGSI_TEXTURE_SHADOW2D_ARRAY:
- num_src_deriv_channels = 2;
- num_dst_deriv_channels = 2;
- break;
- case TGSI_TEXTURE_CUBE:
- case TGSI_TEXTURE_SHADOWCUBE:
- case TGSI_TEXTURE_CUBE_ARRAY:
- case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
- /* Cube derivatives will be converted to 2D. */
- num_src_deriv_channels = 3;
- num_dst_deriv_channels = 3;
- break;
- case TGSI_TEXTURE_1D:
- case TGSI_TEXTURE_SHADOW1D:
- case TGSI_TEXTURE_1D_ARRAY:
- case TGSI_TEXTURE_SHADOW1D_ARRAY:
- num_src_deriv_channels = 1;
-
- /* 1D textures are allocated and used as 2D on GFX9. */
- if (ctx->screen->info.chip_class == GFX9) {
- num_dst_deriv_channels = 2;
- } else {
- num_dst_deriv_channels = 1;
- }
- break;
- default:
- unreachable("invalid target");
- }
-
- for (param = 0; param < 2; param++) {
- for (chan = 0; chan < num_src_deriv_channels; chan++)
- args.derivs[param * num_dst_deriv_channels + chan] =
- lp_build_emit_fetch(bld_base, inst, param+1, chan);
-
- /* Fill in the rest with zeros. */
- for (chan = num_src_deriv_channels;
- chan < num_dst_deriv_channels; chan++)
- args.derivs[param * num_dst_deriv_channels + chan] =
- ctx->ac.f32_0;
- }
- }
-
- if (target == TGSI_TEXTURE_CUBE ||
- target == TGSI_TEXTURE_CUBE_ARRAY ||
- target == TGSI_TEXTURE_SHADOWCUBE ||
- target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
- ac_prepare_cube_coords(&ctx->ac,
- opcode == TGSI_OPCODE_TXD,
- target == TGSI_TEXTURE_CUBE_ARRAY ||
- target == TGSI_TEXTURE_SHADOWCUBE_ARRAY,
- opcode == TGSI_OPCODE_LODQ,
- args.coords, args.derivs);
- } else if (tgsi_is_array_sampler(target) &&
- opcode != TGSI_OPCODE_TXF &&
- opcode != TGSI_OPCODE_TXF_LZ &&
- ctx->screen->info.chip_class <= GFX8) {
- unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2;
- args.coords[array_coord] = ac_build_round(&ctx->ac, args.coords[array_coord]);
- }
-
- /* 1D textures are allocated and used as 2D on GFX9. */
- if (ctx->screen->info.chip_class == GFX9) {
- LLVMValueRef filler;
-
- /* Use 0.5, so that we don't sample the border color. */
- if (opcode == TGSI_OPCODE_TXF ||
- opcode == TGSI_OPCODE_TXF_LZ)
- filler = ctx->i32_0;
- else
- filler = LLVMConstReal(ctx->f32, 0.5);
-
- if (target == TGSI_TEXTURE_1D ||
- target == TGSI_TEXTURE_SHADOW1D) {
- args.coords[1] = filler;
- } else if (target == TGSI_TEXTURE_1D_ARRAY ||
- target == TGSI_TEXTURE_SHADOW1D_ARRAY) {
- args.coords[2] = args.coords[1];
- args.coords[1] = filler;
- }
- }
-
- /* Pack LOD or sample index */
- if (opcode == TGSI_OPCODE_TXL)
- args.lod = args.coords[3];
- else if (opcode == TGSI_OPCODE_TXL2)
- args.lod = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
- else if (opcode == TGSI_OPCODE_TXF) {
- if (target == TGSI_TEXTURE_2D_MSAA) {
- /* No LOD, but move sample index into the right place. */
- args.coords[2] = args.coords[3];
- } else if (target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
- args.lod = args.coords[3];
- }
- }
-
- if ((target == TGSI_TEXTURE_2D_MSAA ||
- target == TGSI_TEXTURE_2D_ARRAY_MSAA) &&
- !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
- ac_apply_fmask_to_sample(&ctx->ac, fmask_ptr, args.coords,
- target == TGSI_TEXTURE_2D_ARRAY_MSAA);
- }
-
- if (opcode == TGSI_OPCODE_TXF ||
- opcode == TGSI_OPCODE_TXF_LZ) {
- /* add tex offsets */
- if (inst->Texture.NumOffsets) {
- const struct tgsi_texture_offset *off = inst->TexOffsets;
-
- assert(inst->Texture.NumOffsets == 1);
-
- switch (target) {
- case TGSI_TEXTURE_3D:
- args.coords[2] =
- LLVMBuildAdd(ctx->ac.builder, args.coords[2],
- ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleZ], "");
- /* fall through */
- case TGSI_TEXTURE_2D:
- case TGSI_TEXTURE_SHADOW2D:
- case TGSI_TEXTURE_RECT:
- case TGSI_TEXTURE_SHADOWRECT:
- case TGSI_TEXTURE_2D_ARRAY:
- case TGSI_TEXTURE_SHADOW2D_ARRAY:
- args.coords[1] =
- LLVMBuildAdd(ctx->ac.builder, args.coords[1],
- ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleY], "");
- /* fall through */
- case TGSI_TEXTURE_1D:
- case TGSI_TEXTURE_SHADOW1D:
- case TGSI_TEXTURE_1D_ARRAY:
- case TGSI_TEXTURE_SHADOW1D_ARRAY:
- args.coords[0] =
- LLVMBuildAdd(ctx->ac.builder, args.coords[0],
- ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleX], "");
- break;
- /* texture offsets do not apply to other texture targets */
- }
- }
- }
-
- if (opcode == TGSI_OPCODE_TG4) {
- unsigned gather_comp = 0;
-
- /* DMASK was repurposed for GATHER4. 4 components are always
- * returned and DMASK works like a swizzle - it selects
- * the component to fetch. The only valid DMASK values are
- * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
- * (red,red,red,red) etc.) The ISA document doesn't mention
- * this.
- */
-
- /* Get the component index from src1.x for Gather4. */
- if (!tgsi_is_shadow_target(target)) {
- LLVMValueRef comp_imm;
- struct tgsi_src_register src1 = inst->Src[1].Register;
-
- assert(src1.File == TGSI_FILE_IMMEDIATE);
-
- comp_imm = ctx->imms[src1.Index * TGSI_NUM_CHANNELS + src1.SwizzleX];
- gather_comp = LLVMConstIntGetZExtValue(comp_imm);
- gather_comp = CLAMP(gather_comp, 0, 3);
- }
-
- args.dmask = 1 << gather_comp;
- } else {
- args.dmask = 0xf;
- }
-
- args.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target);
- args.unorm = target == TGSI_TEXTURE_RECT ||
- target == TGSI_TEXTURE_SHADOWRECT;
- args.opcode = ac_image_sample;
-
- switch (opcode) {
- case TGSI_OPCODE_TXF:
- case TGSI_OPCODE_TXF_LZ:
- args.opcode = opcode == TGSI_OPCODE_TXF_LZ ||
- target == TGSI_TEXTURE_2D_MSAA ||
- target == TGSI_TEXTURE_2D_ARRAY_MSAA ?
- ac_image_load : ac_image_load_mip;
- break;
- case TGSI_OPCODE_LODQ:
- args.opcode = ac_image_get_lod;
- break;
- case TGSI_OPCODE_TEX:
- case TGSI_OPCODE_TEX2:
- case TGSI_OPCODE_TXP:
- if (ctx->type != PIPE_SHADER_FRAGMENT)
- args.level_zero = true;
- break;
- case TGSI_OPCODE_TEX_LZ:
- args.level_zero = true;
- break;
- case TGSI_OPCODE_TXB:
- case TGSI_OPCODE_TXB2:
- assert(ctx->type == PIPE_SHADER_FRAGMENT);
- break;
- case TGSI_OPCODE_TXL:
- case TGSI_OPCODE_TXL2:
- break;
- case TGSI_OPCODE_TXD:
- break;
- case TGSI_OPCODE_TG4:
- args.opcode = ac_image_gather4;
- args.level_zero = true;
- break;
- default:
- assert(0);
- return;
- }
-
- /* The hardware needs special lowering for Gather4 with integer formats. */
- LLVMValueRef gather4_int_result_workaround = NULL;
-
- if (ctx->screen->info.chip_class <= GFX8 &&
- opcode == TGSI_OPCODE_TG4) {
- assert(inst->Texture.ReturnType != TGSI_RETURN_TYPE_UNKNOWN);
-
- if (inst->Texture.ReturnType == TGSI_RETURN_TYPE_SINT ||
- inst->Texture.ReturnType == TGSI_RETURN_TYPE_UINT) {
- gather4_int_result_workaround =
- si_lower_gather4_integer(ctx, &args, target,
- inst->Texture.ReturnType);
- }
- }
-
- args.attributes = AC_FUNC_ATTR_READNONE;
- LLVMValueRef result = ac_build_image_opcode(&ctx->ac, &args);
-
- if (gather4_int_result_workaround) {
- result = si_fix_gather4_integer_result(ctx, result,
- inst->Texture.ReturnType,
- gather4_int_result_workaround);
- }
-
- emit_data->output[emit_data->chan] = result;
-}
-
-static void si_llvm_emit_txqs(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef rsrc;
-
- tex_fetch_ptrs(bld_base, emit_data, &rsrc, NULL, NULL);
-
- rsrc = LLVMBuildBitCast(ctx->ac.builder, rsrc, ctx->v8i32, "");
- emit_data->output[emit_data->chan] =
- ac_build_image_get_sample_count(&ctx->ac, rsrc);
-}
-
-static LLVMValueRef si_llvm_emit_fbfetch(struct si_shader_context *ctx)
-{
- struct ac_image_args args = {};
- LLVMValueRef ptr, image, fmask;
-
- /* Ignore src0, because KHR_blend_func_extended disallows multiple render
- * targets.
- */
-
- /* Load the image descriptor. */
- STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
- ptr = ac_get_arg(&ctx->ac, ctx->rw_buffers);
- ptr = LLVMBuildPointerCast(ctx->ac.builder, ptr,
- ac_array_in_const32_addr_space(ctx->v8i32), "");
- image = ac_build_load_to_sgpr(&ctx->ac, ptr,
- LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0));
-
- unsigned chan = 0;
-
- args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 0, 16);
-
- if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D)
- args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 16, 16);
-
- /* Get the current render target layer index. */
- if (ctx->shader->key.mono.u.ps.fbfetch_layered)
- args.coords[chan++] = si_unpack_param(ctx, ctx->args.ancillary, 16, 11);
-
- if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
- args.coords[chan++] = si_get_sample_id(ctx);
-
- if (ctx->shader->key.mono.u.ps.fbfetch_msaa &&
- !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
- fmask = ac_build_load_to_sgpr(&ctx->ac, ptr,
- LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0));
-
- ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords,
- ctx->shader->key.mono.u.ps.fbfetch_layered);
- }
-
- args.opcode = ac_image_load;
- args.resource = image;
- args.dmask = 0xf;
- args.attributes = AC_FUNC_ATTR_READNONE;
-
- if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
- args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
- ac_image_2darraymsaa : ac_image_2dmsaa;
- else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D)
- args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
- ac_image_1darray : ac_image_1d;
- else
- args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
- ac_image_2darray : ac_image_2d;
-
- return ac_build_image_opcode(&ctx->ac, &args);
-}
-
-static void si_tgsi_emit_fbfetch(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- emit_data->output[emit_data->chan] = si_llvm_emit_fbfetch(ctx);
-}
-
-LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi)
-{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
-
- return si_llvm_emit_fbfetch(ctx);
-}
-
-/**
- * Setup actions for TGSI memory opcode, including texture opcodes.
- */
-void si_shader_context_init_mem(struct si_shader_context *ctx)
-{
- struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
-
- bld_base->op_actions[TGSI_OPCODE_TEX].emit = build_tex_intrinsic;
- bld_base->op_actions[TGSI_OPCODE_TEX_LZ].emit = build_tex_intrinsic;
- bld_base->op_actions[TGSI_OPCODE_TEX2].emit = build_tex_intrinsic;
- bld_base->op_actions[TGSI_OPCODE_TXB].emit = build_tex_intrinsic;
- bld_base->op_actions[TGSI_OPCODE_TXB2].emit = build_tex_intrinsic;
- bld_base->op_actions[TGSI_OPCODE_TXD].emit = build_tex_intrinsic;
- bld_base->op_actions[TGSI_OPCODE_TXF].emit = build_tex_intrinsic;
- bld_base->op_actions[TGSI_OPCODE_TXF_LZ].emit = build_tex_intrinsic;
- bld_base->op_actions[TGSI_OPCODE_TXL].emit = build_tex_intrinsic;
- bld_base->op_actions[TGSI_OPCODE_TXL2].emit = build_tex_intrinsic;
- bld_base->op_actions[TGSI_OPCODE_TXP].emit = build_tex_intrinsic;
- bld_base->op_actions[TGSI_OPCODE_TXQ].emit = resq_emit;
- bld_base->op_actions[TGSI_OPCODE_TG4].emit = build_tex_intrinsic;
- bld_base->op_actions[TGSI_OPCODE_LODQ].emit = build_tex_intrinsic;
- bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs;
-
- bld_base->op_actions[TGSI_OPCODE_FBFETCH].emit = si_tgsi_emit_fbfetch;
-
- bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
- bld_base->op_actions[TGSI_OPCODE_STORE].emit = store_emit;
- bld_base->op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
-
- bld_base->op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
- bld_base->op_actions[TGSI_OPCODE_ATOMUADD].intr_name = "add";
- bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
- bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].intr_name = "swap";
- bld_base->op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
- bld_base->op_actions[TGSI_OPCODE_ATOMCAS].intr_name = "cmpswap";
- bld_base->op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
- bld_base->op_actions[TGSI_OPCODE_ATOMAND].intr_name = "and";
- bld_base->op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
- bld_base->op_actions[TGSI_OPCODE_ATOMOR].intr_name = "or";
- bld_base->op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
- bld_base->op_actions[TGSI_OPCODE_ATOMXOR].intr_name = "xor";
- bld_base->op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
- bld_base->op_actions[TGSI_OPCODE_ATOMUMIN].intr_name = "umin";
- bld_base->op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
- bld_base->op_actions[TGSI_OPCODE_ATOMUMAX].intr_name = "umax";
- bld_base->op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
- bld_base->op_actions[TGSI_OPCODE_ATOMIMIN].intr_name = "smin";
- bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
- bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].intr_name = "smax";
- bld_base->op_actions[TGSI_OPCODE_ATOMINC_WRAP].emit = atomic_emit;
- bld_base->op_actions[TGSI_OPCODE_ATOMINC_WRAP].intr_name = "inc";
- bld_base->op_actions[TGSI_OPCODE_ATOMDEC_WRAP].emit = atomic_emit;
- bld_base->op_actions[TGSI_OPCODE_ATOMDEC_WRAP].intr_name = "dec";
-}
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
deleted file mode 100644
index 1443432d593..00000000000
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ /dev/null
@@ -1,1165 +0,0 @@
-/*
- * Copyright 2016 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "si_shader_internal.h"
-#include "si_pipe.h"
-#include "ac_llvm_util.h"
-#include "util/u_memory.h"
-
-struct si_llvm_diagnostics {
- struct pipe_debug_callback *debug;
- unsigned retval;
-};
-
-static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
-{
- struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
- LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
- const char *severity_str = NULL;
-
- switch (severity) {
- case LLVMDSError:
- severity_str = "error";
- break;
- case LLVMDSWarning:
- severity_str = "warning";
- break;
- case LLVMDSRemark:
- case LLVMDSNote:
- default:
- return;
- }
-
- char *description = LLVMGetDiagInfoDescription(di);
-
- pipe_debug_message(diag->debug, SHADER_INFO,
- "LLVM diagnostic (%s): %s", severity_str, description);
-
- if (severity == LLVMDSError) {
- diag->retval = 1;
- fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
- }
-
- LLVMDisposeMessage(description);
-}
-
-/**
- * Compile an LLVM module to machine code.
- *
- * @returns 0 for success, 1 for failure
- */
-unsigned si_llvm_compile(LLVMModuleRef M, struct si_shader_binary *binary,
- struct ac_llvm_compiler *compiler,
- struct pipe_debug_callback *debug,
- bool less_optimized, unsigned wave_size)
-{
- struct ac_compiler_passes *passes = compiler->passes;
-
- if (wave_size == 32)
- passes = compiler->passes_wave32;
- else if (less_optimized && compiler->low_opt_passes)
- passes = compiler->low_opt_passes;
-
- struct si_llvm_diagnostics diag;
- LLVMContextRef llvm_ctx;
-
- diag.debug = debug;
- diag.retval = 0;
-
- /* Setup Diagnostic Handler*/
- llvm_ctx = LLVMGetModuleContext(M);
-
- LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
-
- /* Compile IR. */
- if (!ac_compile_module_to_elf(passes, M, (char **)&binary->elf_buffer,
- &binary->elf_size))
- diag.retval = 1;
-
- if (diag.retval != 0)
- pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
- return diag.retval;
-}
-
-void si_shader_binary_clean(struct si_shader_binary *binary)
-{
- free((void *)binary->elf_buffer);
- binary->elf_buffer = NULL;
-
- free(binary->llvm_ir_string);
- binary->llvm_ir_string = NULL;
-}
-
-LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
- enum tgsi_opcode_type type)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- switch (type) {
- case TGSI_TYPE_UNSIGNED:
- case TGSI_TYPE_SIGNED:
- return ctx->ac.i32;
- case TGSI_TYPE_UNSIGNED64:
- case TGSI_TYPE_SIGNED64:
- return ctx->ac.i64;
- case TGSI_TYPE_DOUBLE:
- return ctx->ac.f64;
- case TGSI_TYPE_UNTYPED:
- case TGSI_TYPE_FLOAT:
- return ctx->ac.f32;
- default: break;
- }
- return 0;
-}
-
-LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
- enum tgsi_opcode_type type, LLVMValueRef value)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
-
- if (dst_type)
- return LLVMBuildBitCast(ctx->ac.builder, value, dst_type, "");
- else
- return value;
-}
-
-/**
- * Return a value that is equal to the given i32 \p index if it lies in [0,num)
- * or an undefined value in the same interval otherwise.
- */
-LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
- LLVMValueRef index,
- unsigned num)
-{
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
- LLVMValueRef cc;
-
- if (util_is_power_of_two_or_zero(num)) {
- index = LLVMBuildAnd(builder, index, c_max, "");
- } else {
- /* In theory, this MAX pattern should result in code that is
- * as good as the bit-wise AND above.
- *
- * In practice, LLVM generates worse code (at the time of
- * writing), because its value tracking is not strong enough.
- */
- cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
- index = LLVMBuildSelect(builder, cc, index, c_max, "");
- }
-
- return index;
-}
-
-static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
- LLVMValueRef value,
- unsigned swizzle_x,
- unsigned swizzle_y,
- unsigned swizzle_z,
- unsigned swizzle_w)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef swizzles[4];
-
- swizzles[0] = LLVMConstInt(ctx->i32, swizzle_x, 0);
- swizzles[1] = LLVMConstInt(ctx->i32, swizzle_y, 0);
- swizzles[2] = LLVMConstInt(ctx->i32, swizzle_z, 0);
- swizzles[3] = LLVMConstInt(ctx->i32, swizzle_w, 0);
-
- return LLVMBuildShuffleVector(ctx->ac.builder,
- value,
- LLVMGetUndef(LLVMTypeOf(value)),
- LLVMConstVector(swizzles, 4), "");
-}
-
-/**
- * Return the description of the array covering the given temporary register
- * index.
- */
-static unsigned
-get_temp_array_id(struct lp_build_tgsi_context *bld_base,
- unsigned reg_index,
- const struct tgsi_ind_register *reg)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
- unsigned i;
-
- if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
- return reg->ArrayID;
-
- for (i = 0; i < num_arrays; i++) {
- const struct tgsi_array_info *array = &ctx->temp_arrays[i];
-
- if (reg_index >= array->range.First && reg_index <= array->range.Last)
- return i + 1;
- }
-
- return 0;
-}
-
-static struct tgsi_declaration_range
-get_array_range(struct lp_build_tgsi_context *bld_base,
- unsigned File, unsigned reg_index,
- const struct tgsi_ind_register *reg)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- struct tgsi_declaration_range range;
-
- if (File == TGSI_FILE_TEMPORARY) {
- unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
- if (array_id)
- return ctx->temp_arrays[array_id - 1].range;
- }
-
- range.First = 0;
- range.Last = bld_base->info->file_max[File];
- return range;
-}
-
-/**
- * For indirect registers, construct a pointer directly to the requested
- * element using getelementptr if possible.
- *
- * Returns NULL if the insertelement/extractelement fallback for array access
- * must be used.
- */
-static LLVMValueRef
-get_pointer_into_array(struct si_shader_context *ctx,
- unsigned file,
- unsigned swizzle,
- unsigned reg_index,
- const struct tgsi_ind_register *reg_indirect)
-{
- unsigned array_id;
- struct tgsi_array_info *array;
- LLVMValueRef idxs[2];
- LLVMValueRef index;
- LLVMValueRef alloca;
-
- if (file != TGSI_FILE_TEMPORARY)
- return NULL;
-
- array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
- if (!array_id)
- return NULL;
-
- alloca = ctx->temp_array_allocas[array_id - 1];
- if (!alloca)
- return NULL;
-
- array = &ctx->temp_arrays[array_id - 1];
-
- if (!(array->writemask & (1 << swizzle)))
- return ctx->undef_alloca;
-
- index = si_get_indirect_index(ctx, reg_indirect, 1,
- reg_index - ctx->temp_arrays[array_id - 1].range.First);
-
- /* Ensure that the index is within a valid range, to guard against
- * VM faults and overwriting critical data (e.g. spilled resource
- * descriptors).
- *
- * TODO It should be possible to avoid the additional instructions
- * if LLVM is changed so that it guarantuees:
- * 1. the scratch space descriptor isolates the current wave (this
- * could even save the scratch offset SGPR at the cost of an
- * additional SALU instruction)
- * 2. the memory for allocas must be allocated at the _end_ of the
- * scratch space (after spilled registers)
- */
- index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
-
- index = ac_build_imad(&ctx->ac, index,
- LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
- LLVMConstInt(ctx->i32,
- util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0));
- idxs[0] = ctx->i32_0;
- idxs[1] = index;
- return LLVMBuildGEP(ctx->ac.builder, alloca, idxs, 2, "");
-}
-
-LLVMValueRef
-si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
- LLVMTypeRef type,
- LLVMValueRef ptr,
- LLVMValueRef ptr2)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef values[2] = {
- ac_to_integer(&ctx->ac, ptr),
- ac_to_integer(&ctx->ac, ptr2),
- };
- LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
- return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
-}
-
-static LLVMValueRef
-emit_array_fetch(struct lp_build_tgsi_context *bld_base,
- unsigned File, enum tgsi_opcode_type type,
- struct tgsi_declaration_range range,
- unsigned swizzle_in)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- unsigned i, size = range.Last - range.First + 1;
- LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
- LLVMValueRef result = LLVMGetUndef(vec);
- unsigned swizzle = swizzle_in;
- struct tgsi_full_src_register tmp_reg = {};
- tmp_reg.Register.File = File;
- if (tgsi_type_is_64bit(type))
- swizzle |= (swizzle_in + 1) << 16;
-
- for (i = 0; i < size; ++i) {
- tmp_reg.Register.Index = i + range.First;
-
- LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
- result = LLVMBuildInsertElement(ctx->ac.builder, result, temp,
- LLVMConstInt(ctx->i32, i, 0), "array_vector");
- }
- return result;
-}
-
-static LLVMValueRef
-load_value_from_array(struct lp_build_tgsi_context *bld_base,
- unsigned file,
- enum tgsi_opcode_type type,
- unsigned swizzle,
- unsigned reg_index,
- const struct tgsi_ind_register *reg_indirect)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef ptr;
-
- ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
- if (ptr) {
- LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
- if (tgsi_type_is_64bit(type)) {
- LLVMValueRef ptr_hi, val_hi;
- ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
- val_hi = LLVMBuildLoad(builder, ptr_hi, "");
- val = si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
- val, val_hi);
- }
-
- return val;
- } else {
- struct tgsi_declaration_range range =
- get_array_range(bld_base, file, reg_index, reg_indirect);
- LLVMValueRef index =
- si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
- LLVMValueRef array =
- emit_array_fetch(bld_base, file, type, range, swizzle);
- return LLVMBuildExtractElement(builder, array, index, "");
- }
-}
-
-static void
-store_value_to_array(struct lp_build_tgsi_context *bld_base,
- LLVMValueRef value,
- unsigned file,
- unsigned chan_index,
- unsigned reg_index,
- const struct tgsi_ind_register *reg_indirect)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef ptr;
-
- ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
- if (ptr) {
- LLVMBuildStore(builder, value, ptr);
- } else {
- unsigned i, size;
- struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
- LLVMValueRef index = si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
- LLVMValueRef array =
- emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
- LLVMValueRef temp_ptr;
-
- array = LLVMBuildInsertElement(builder, array, value, index, "");
-
- size = range.Last - range.First + 1;
- for (i = 0; i < size; ++i) {
- switch(file) {
- case TGSI_FILE_OUTPUT:
- temp_ptr = ctx->outputs[i + range.First][chan_index];
- break;
-
- case TGSI_FILE_TEMPORARY:
- if (range.First + i >= ctx->temps_count)
- continue;
- temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
- break;
-
- default:
- continue;
- }
- value = LLVMBuildExtractElement(builder, array,
- LLVMConstInt(ctx->i32, i, 0), "");
- LLVMBuildStore(builder, value, temp_ptr);
- }
- }
-}
-
-/* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
- * reload them at each use. This must be true if the shader is using
- * derivatives and KILL, because KILL can leave the WQM and then a lazy
- * input load isn't in the WQM anymore.
- */
-static bool si_preload_fs_inputs(struct si_shader_context *ctx)
-{
- struct si_shader_selector *sel = ctx->shader->selector;
-
- return sel->info.uses_derivatives &&
- sel->info.uses_kill;
-}
-
-static LLVMValueRef
-get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
- unsigned chan)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
- return ctx->outputs[index][chan];
-}
-
-LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_src_register *reg,
- enum tgsi_opcode_type type,
- unsigned swizzle_in)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef result = NULL, ptr, ptr2;
- unsigned swizzle = swizzle_in & 0xffff;
-
- if (swizzle_in == ~0) {
- LLVMValueRef values[TGSI_NUM_CHANNELS];
- unsigned chan;
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
- }
- return ac_build_gather_values(&ctx->ac, values,
- TGSI_NUM_CHANNELS);
- }
-
- if (reg->Register.Indirect) {
- LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
- swizzle, reg->Register.Index, &reg->Indirect);
- return bitcast(bld_base, type, load);
- }
-
- switch(reg->Register.File) {
- case TGSI_FILE_IMMEDIATE: {
- LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
- if (tgsi_type_is_64bit(type)) {
- result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
- result = LLVMConstInsertElement(result,
- ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
- ctx->i32_0);
- result = LLVMConstInsertElement(result,
- ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)],
- ctx->i32_1);
- return LLVMConstBitCast(result, ctype);
- } else {
- return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
- }
- }
-
- case TGSI_FILE_INPUT: {
- unsigned index = reg->Register.Index;
- LLVMValueRef input[4];
-
- /* I don't think doing this for vertex shaders is beneficial.
- * For those, we want to make sure the VMEM loads are executed
- * only once. Fragment shaders don't care much, because
- * v_interp instructions are much cheaper than VMEM loads.
- */
- if (!si_preload_fs_inputs(ctx) &&
- ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
- ctx->load_input(ctx, index, &ctx->input_decls[index], input);
- else
- memcpy(input, &ctx->inputs[index * 4], sizeof(input));
-
- result = input[swizzle];
-
- if (tgsi_type_is_64bit(type)) {
- ptr = result;
- ptr2 = input[swizzle_in >> 16];
- return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
- ptr, ptr2);
- }
- break;
- }
-
- case TGSI_FILE_TEMPORARY:
- if (reg->Register.Index >= ctx->temps_count)
- return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
- ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
- if (tgsi_type_is_64bit(type)) {
- ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)];
- return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
- LLVMBuildLoad(builder, ptr, ""),
- LLVMBuildLoad(builder, ptr2, ""));
- }
- result = LLVMBuildLoad(builder, ptr, "");
- break;
-
- case TGSI_FILE_OUTPUT:
- ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
- if (tgsi_type_is_64bit(type)) {
- ptr2 = get_output_ptr(bld_base, reg->Register.Index, (swizzle_in >> 16));
- return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
- LLVMBuildLoad(builder, ptr, ""),
- LLVMBuildLoad(builder, ptr2, ""));
- }
- result = LLVMBuildLoad(builder, ptr, "");
- break;
-
- default:
- return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
- }
-
- return bitcast(bld_base, type, result);
-}
-
-static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_src_register *reg,
- enum tgsi_opcode_type type,
- unsigned swizzle_in)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef cval = ctx->system_values[reg->Register.Index];
- unsigned swizzle = swizzle_in & 0xffff;
-
- if (tgsi_type_is_64bit(type)) {
- LLVMValueRef lo, hi;
-
- assert(swizzle == 0 || swizzle == 2);
-
- lo = LLVMBuildExtractElement(
- builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
- hi = LLVMBuildExtractElement(
- builder, cval, LLVMConstInt(ctx->i32, (swizzle_in >> 16), 0), "");
-
- return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
- lo, hi);
- }
-
- if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
- cval = LLVMBuildExtractElement(
- builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
- } else {
- assert(swizzle == 0);
- }
-
- return bitcast(bld_base, type, cval);
-}
-
-static void emit_declaration(struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_declaration *decl)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMBuilderRef builder = ctx->ac.builder;
- unsigned first, last, i;
- switch(decl->Declaration.File) {
- case TGSI_FILE_ADDRESS:
- {
- unsigned idx;
- for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
- unsigned chan;
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- ctx->addrs[idx][chan] = ac_build_alloca_undef(
- &ctx->ac, ctx->i32, "");
- }
- }
- break;
- }
-
- case TGSI_FILE_TEMPORARY:
- {
- char name[18] = "";
- LLVMValueRef array_alloca = NULL;
- unsigned decl_size;
- unsigned writemask = decl->Declaration.UsageMask;
- first = decl->Range.First;
- last = decl->Range.Last;
- decl_size = 4 * ((last - first) + 1);
-
- if (decl->Declaration.Array) {
- unsigned id = decl->Array.ArrayID - 1;
- unsigned array_size;
-
- writemask &= ctx->temp_arrays[id].writemask;
- ctx->temp_arrays[id].writemask = writemask;
- array_size = ((last - first) + 1) * util_bitcount(writemask);
-
- /* If the array has more than 16 elements, store it
- * in memory using an alloca that spans the entire
- * array.
- *
- * Otherwise, store each array element individually.
- * We will then generate vectors (per-channel, up to
- * <16 x float> if the usagemask is a single bit) for
- * indirect addressing.
- *
- * Note that 16 is the number of vector elements that
- * LLVM will store in a register, so theoretically an
- * array with up to 4 * 16 = 64 elements could be
- * handled this way, but whether that's a good idea
- * depends on VGPR register pressure elsewhere.
- *
- * FIXME: We shouldn't need to have the non-alloca
- * code path for arrays. LLVM should be smart enough to
- * promote allocas into registers when profitable.
- */
- if (array_size > 16 ||
- !ctx->screen->llvm_has_working_vgpr_indexing) {
- array_alloca = ac_build_alloca_undef(&ctx->ac,
- LLVMArrayType(ctx->f32,
- array_size), "array");
- ctx->temp_array_allocas[id] = array_alloca;
- }
- }
-
- if (!ctx->temps_count) {
- ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
- ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
- }
- if (!array_alloca) {
- for (i = 0; i < decl_size; ++i) {
-#ifndef NDEBUG
- snprintf(name, sizeof(name), "TEMP%d.%c",
- first + i / 4, "xyzw"[i % 4]);
-#endif
- ctx->temps[first * TGSI_NUM_CHANNELS + i] =
- ac_build_alloca_undef(&ctx->ac,
- ctx->f32,
- name);
- }
- } else {
- LLVMValueRef idxs[2] = {
- ctx->i32_0,
- NULL
- };
- unsigned j = 0;
-
- if (writemask != TGSI_WRITEMASK_XYZW &&
- !ctx->undef_alloca) {
- /* Create a dummy alloca. We use it so that we
- * have a pointer that is safe to load from if
- * a shader ever reads from a channel that
- * it never writes to.
- */
- ctx->undef_alloca = ac_build_alloca_undef(
- &ctx->ac, ctx->f32, "undef");
- }
-
- for (i = 0; i < decl_size; ++i) {
- LLVMValueRef ptr;
- if (writemask & (1 << (i % 4))) {
-#ifndef NDEBUG
- snprintf(name, sizeof(name), "TEMP%d.%c",
- first + i / 4, "xyzw"[i % 4]);
-#endif
- idxs[1] = LLVMConstInt(ctx->i32, j, 0);
- ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
- j++;
- } else {
- ptr = ctx->undef_alloca;
- }
- ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
- }
- }
- break;
- }
- case TGSI_FILE_INPUT:
- {
- unsigned idx;
- for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
- if (ctx->load_input &&
- ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
- ctx->input_decls[idx] = *decl;
- ctx->input_decls[idx].Range.First = idx;
- ctx->input_decls[idx].Range.Last = idx;
- ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
-
- if (si_preload_fs_inputs(ctx) ||
- bld_base->info->processor != PIPE_SHADER_FRAGMENT)
- ctx->load_input(ctx, idx, &ctx->input_decls[idx],
- &ctx->inputs[idx * 4]);
- }
- }
- }
- break;
-
- case TGSI_FILE_SYSTEM_VALUE:
- {
- unsigned idx;
- for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
- si_load_system_value(ctx, idx, decl);
- }
- }
- break;
-
- case TGSI_FILE_OUTPUT:
- {
- char name[16] = "";
- unsigned idx;
- for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
- unsigned chan;
- assert(idx < RADEON_LLVM_MAX_OUTPUTS);
- if (ctx->outputs[idx][0])
- continue;
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-#ifndef NDEBUG
- snprintf(name, sizeof(name), "OUT%d.%c",
- idx, "xyzw"[chan % 4]);
-#endif
- ctx->outputs[idx][chan] = ac_build_alloca_undef(
- &ctx->ac, ctx->f32, name);
- }
- }
- break;
- }
-
- case TGSI_FILE_MEMORY:
- si_tgsi_declare_compute_memory(ctx, decl);
- break;
-
- default:
- break;
- }
-}
-
-void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_instruction *inst,
- const struct tgsi_opcode_info *info,
- unsigned index,
- LLVMValueRef dst[4])
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- const struct tgsi_full_dst_register *reg = &inst->Dst[index];
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef temp_ptr, temp_ptr2 = NULL;
- bool is_vec_store = false;
- enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
-
- if (dst[0]) {
- LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
- is_vec_store = (k == LLVMVectorTypeKind);
- }
-
- if (is_vec_store) {
- LLVMValueRef values[4] = {};
- uint32_t writemask = reg->Register.WriteMask;
- while (writemask) {
- unsigned chan = u_bit_scan(&writemask);
- LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
- values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
- dst[0], index, "");
- }
- bld_base->emit_store(bld_base, inst, info, index, values);
- return;
- }
-
- uint32_t writemask = reg->Register.WriteMask;
- while (writemask) {
- unsigned chan_index = u_bit_scan(&writemask);
- LLVMValueRef value = dst[chan_index];
-
- if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
- continue;
- if (inst->Instruction.Saturate)
- value = ac_build_clamp(&ctx->ac, value);
-
- if (reg->Register.File == TGSI_FILE_ADDRESS) {
- temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
- LLVMBuildStore(builder, value, temp_ptr);
- continue;
- }
-
- if (!tgsi_type_is_64bit(dtype))
- value = ac_to_float(&ctx->ac, value);
-
- if (reg->Register.Indirect) {
- unsigned file = reg->Register.File;
- unsigned reg_index = reg->Register.Index;
- store_value_to_array(bld_base, value, file, chan_index,
- reg_index, &reg->Indirect);
- } else {
- switch(reg->Register.File) {
- case TGSI_FILE_OUTPUT:
- temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
- if (tgsi_type_is_64bit(dtype))
- temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
- break;
-
- case TGSI_FILE_TEMPORARY:
- {
- if (reg->Register.Index >= ctx->temps_count)
- continue;
-
- temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
- if (tgsi_type_is_64bit(dtype))
- temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
-
- break;
- }
- default:
- return;
- }
- if (!tgsi_type_is_64bit(dtype))
- LLVMBuildStore(builder, value, temp_ptr);
- else {
- LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
- LLVMVectorType(ctx->i32, 2), "");
- LLVMValueRef val2;
- value = LLVMBuildExtractElement(builder, ptr,
- ctx->i32_0, "");
- val2 = LLVMBuildExtractElement(builder, ptr,
- ctx->i32_1, "");
-
- LLVMBuildStore(builder, ac_to_float(&ctx->ac, value), temp_ptr);
- LLVMBuildStore(builder, ac_to_float(&ctx->ac, val2), temp_ptr2);
- }
- }
- }
-}
-
-static int get_line(int pc)
-{
- /* Subtract 1 so that the number shown is that of the corresponding
- * opcode in the TGSI dump, e.g. an if block has the same suffix as
- * the instruction number of the corresponding TGSI IF.
- */
- return pc - 1;
-}
-
-static void bgnloop_emit(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- ac_build_bgnloop(&ctx->ac, get_line(bld_base->pc));
-}
-
-static void brk_emit(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- ac_build_break(&ctx->ac);
-}
-
-static void cont_emit(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- ac_build_continue(&ctx->ac);
-}
-
-static void else_emit(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- ac_build_else(&ctx->ac, get_line(bld_base->pc));
-}
-
-static void endif_emit(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- ac_build_endif(&ctx->ac, get_line(bld_base->pc));
-}
-
-static void endloop_emit(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- ac_build_endloop(&ctx->ac, get_line(bld_base->pc));
-}
-
-static void if_emit(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- ac_build_if(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
-}
-
-static void uif_emit(const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
-{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- ac_build_uif(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
-}
-
-static void emit_immediate(struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_immediate *imm)
-{
- unsigned i;
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- for (i = 0; i < 4; ++i) {
- ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
- LLVMConstInt(ctx->i32, imm->u[i].Uint, false );
- }
-
- ctx->imms_num++;
-}
-
-void si_llvm_context_init(struct si_shader_context *ctx,
- struct si_screen *sscreen,
- struct ac_llvm_compiler *compiler,
- unsigned wave_size,
- unsigned ballot_mask_bits)
-{
- struct lp_type type;
-
- /* Initialize the gallivm object:
- * We are only using the module, context, and builder fields of this struct.
- * This should be enough for us to be able to pass our gallivm struct to the
- * helper functions in the gallivm module.
- */
- memset(ctx, 0, sizeof(*ctx));
- ctx->screen = sscreen;
- ctx->compiler = compiler;
-
- ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.chip_class,
- sscreen->info.family,
- AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH,
- wave_size, ballot_mask_bits);
-
- ctx->gallivm.context = ctx->ac.context;
- ctx->gallivm.module = ctx->ac.module;
- ctx->gallivm.builder = ctx->ac.builder;
-
- struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
-
- type.floating = true;
- type.fixed = false;
- type.sign = true;
- type.norm = false;
- type.width = 32;
- type.length = 1;
-
- lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
- lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
- lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
- type.width *= 2;
- lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
- lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
- lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
-
- bld_base->soa = 1;
- bld_base->emit_swizzle = emit_swizzle;
- bld_base->emit_declaration = emit_declaration;
- bld_base->emit_immediate = emit_immediate;
-
- bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
- bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
- bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
- bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
- bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
- bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
- bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
- bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
-
- si_shader_context_init_alu(ctx);
- si_shader_context_init_mem(ctx);
-
- ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context);
- ctx->i1 = LLVMInt1TypeInContext(ctx->ac.context);
- ctx->i8 = LLVMInt8TypeInContext(ctx->ac.context);
- ctx->i32 = LLVMInt32TypeInContext(ctx->ac.context);
- ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context);
- ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128);
- ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context);
- ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
- ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
- ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
- ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
-
- ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
- ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
- ctx->i1false = LLVMConstInt(ctx->i1, 0, 0);
- ctx->i1true = LLVMConstInt(ctx->i1, 1, 0);
-}
-
-/* Set the context to a certain TGSI shader. Can be called repeatedly
- * to change the shader. */
-void si_llvm_context_set_ir(struct si_shader_context *ctx,
- struct si_shader *shader,
- struct nir_shader *nir)
-{
- struct si_shader_selector *sel = shader->selector;
- const struct tgsi_shader_info *info = &sel->info;
-
- ctx->shader = shader;
- ctx->type = sel->type;
- ctx->bld_base.info = info;
-
- /* Clean up the old contents. */
- FREE(ctx->temp_arrays);
- ctx->temp_arrays = NULL;
- FREE(ctx->temp_array_allocas);
- ctx->temp_array_allocas = NULL;
-
- FREE(ctx->imms);
- ctx->imms = NULL;
- ctx->imms_num = 0;
-
- FREE(ctx->temps);
- ctx->temps = NULL;
- ctx->temps_count = 0;
-
- ctx->num_const_buffers = util_last_bit(info->const_buffers_declared);
- ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared);
-
- ctx->num_samplers = util_last_bit(info->samplers_declared);
- ctx->num_images = util_last_bit(info->images_declared);
-
- if (nir)
- return;
-
- if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
- int size = info->array_max[TGSI_FILE_TEMPORARY];
-
- ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
- ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
-
- tgsi_scan_arrays(sel->tokens, TGSI_FILE_TEMPORARY, size,
- ctx->temp_arrays);
- }
- if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
- int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
- ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
- }
-
- /* Re-set these to start with a clean slate. */
- ctx->bld_base.num_instructions = 0;
- ctx->bld_base.pc = 0;
- memset(ctx->input_decls, 0, sizeof(ctx->input_decls));
- memset(ctx->inputs, 0, sizeof(ctx->inputs));
- memset(ctx->outputs, 0, sizeof(ctx->outputs));
-
- ctx->bld_base.emit_store = si_llvm_emit_store;
- ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
- ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
- ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
- ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
- ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
-}
-
-void si_llvm_create_func(struct si_shader_context *ctx,
- const char *name,
- LLVMTypeRef *return_types, unsigned num_return_elems)
-{
- LLVMTypeRef ret_type;
- enum ac_llvm_calling_convention call_conv;
- enum pipe_shader_type real_shader_type;
-
- if (num_return_elems)
- ret_type = LLVMStructTypeInContext(ctx->ac.context,
- return_types,
- num_return_elems, true);
- else
- ret_type = ctx->voidt;
-
- real_shader_type = ctx->type;
-
- /* LS is merged into HS (TCS), and ES is merged into GS. */
- if (ctx->screen->info.chip_class >= GFX9) {
- if (ctx->shader->key.as_ls)
- real_shader_type = PIPE_SHADER_TESS_CTRL;
- else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg)
- real_shader_type = PIPE_SHADER_GEOMETRY;
- }
-
- switch (real_shader_type) {
- case PIPE_SHADER_VERTEX:
- case PIPE_SHADER_TESS_EVAL:
- call_conv = AC_LLVM_AMDGPU_VS;
- break;
- case PIPE_SHADER_TESS_CTRL:
- call_conv = AC_LLVM_AMDGPU_HS;
- break;
- case PIPE_SHADER_GEOMETRY:
- call_conv = AC_LLVM_AMDGPU_GS;
- break;
- case PIPE_SHADER_FRAGMENT:
- call_conv = AC_LLVM_AMDGPU_PS;
- break;
- case PIPE_SHADER_COMPUTE:
- call_conv = AC_LLVM_AMDGPU_CS;
- break;
- default:
- unreachable("Unhandle shader type");
- }
-
- /* Setup the function */
- ctx->return_type = ret_type;
- ctx->main_fn = ac_build_main(&ctx->args, &ctx->ac, call_conv, name,
- ret_type, ctx->gallivm.module);
-}
-
-void si_llvm_optimize_module(struct si_shader_context *ctx)
-{
- /* Dump LLVM IR before any optimization passes */
- if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
- si_can_dump_shader(ctx->screen, ctx->type))
- LLVMDumpModule(ctx->gallivm.module);
-
- /* Run the pass */
- LLVMRunPassManager(ctx->compiler->passmgr, ctx->gallivm.module);
- LLVMDisposeBuilder(ctx->ac.builder);
-}
-
-void si_llvm_dispose(struct si_shader_context *ctx)
-{
- LLVMDisposeModule(ctx->gallivm.module);
- LLVMContextDispose(ctx->gallivm.context);
- FREE(ctx->temp_arrays);
- ctx->temp_arrays = NULL;
- FREE(ctx->temp_array_allocas);
- ctx->temp_array_allocas = NULL;
- FREE(ctx->temps);
- ctx->temps = NULL;
- ctx->temps_count = 0;
- FREE(ctx->imms);
- ctx->imms = NULL;
- ctx->imms_num = 0;
- ac_llvm_context_dispose(&ctx->ac);
-}
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 941a397525e..bf4a22de13b 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -27,7 +27,6 @@
#include "compiler/nir/nir_serialize.h"
#include "nir/tgsi_to_nir.h"
-#include "tgsi/tgsi_parse.h"
#include "util/hash_table.h"
#include "util/crc32.h"
#include "util/u_async_debug.h"
@@ -51,11 +50,7 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
unsigned ir_size;
void *ir_binary;
- if (sel->tokens) {
- ir_binary = sel->tokens;
- ir_size = tgsi_num_tokens(sel->tokens) *
- sizeof(struct tgsi_token);
- } else if (sel->nir_binary) {
+ if (sel->nir_binary) {
ir_binary = sel->nir_binary;
ir_size = sel->nir_size;
} else {
@@ -2153,7 +2148,7 @@ static bool si_check_missing_main_part(struct si_screen *sscreen,
main_part->key.as_ngg = key->as_ngg;
main_part->is_monolithic = false;
- if (si_compile_tgsi_shader(sscreen, compiler_state->compiler,
+ if (si_compile_shader(sscreen, compiler_state->compiler,
main_part, &compiler_state->debug) != 0) {
FREE(main_part);
return false;
@@ -2516,7 +2511,7 @@ static void si_init_shader_selector_async(void *job, int thread_index)
sel->type == PIPE_SHADER_GEOMETRY))
shader->key.as_ngg = 1;
- if (sel->tokens || sel->nir) {
+ if (sel->nir) {
si_get_ir_cache_key(sel, shader->key.as_ngg,
shader->key.as_es, ir_sha1_cache_key);
}
@@ -2531,7 +2526,7 @@ static void si_init_shader_selector_async(void *job, int thread_index)
simple_mtx_unlock(&sscreen->shader_cache_mutex);
/* Compile the shader if it hasn't been loaded from the cache. */
- if (si_compile_tgsi_shader(sscreen, compiler, shader,
+ if (si_compile_shader(sscreen, compiler, shader,
debug) != 0) {
FREE(shader);
fprintf(stderr, "radeonsi: can't compile a main shader part\n");
@@ -2695,44 +2690,17 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
sel->so = state->stream_output;
- if (state->type == PIPE_SHADER_IR_TGSI &&
- !sscreen->options.enable_nir) {
- sel->tokens = tgsi_dup_tokens(state->tokens);
- if (!sel->tokens) {
- FREE(sel);
- return NULL;
- }
-
- tgsi_scan_shader(state->tokens, &sel->info);
- tgsi_scan_tess_ctrl(state->tokens, &sel->info, &sel->tcs_info);
-
- /* Fixup for TGSI: Set which opcode uses which (i,j) pair. */
- if (sel->info.uses_persp_opcode_interp_centroid)
- sel->info.uses_persp_centroid = true;
-
- if (sel->info.uses_linear_opcode_interp_centroid)
- sel->info.uses_linear_centroid = true;
-
- if (sel->info.uses_persp_opcode_interp_offset ||
- sel->info.uses_persp_opcode_interp_sample)
- sel->info.uses_persp_center = true;
-
- if (sel->info.uses_linear_opcode_interp_offset ||
- sel->info.uses_linear_opcode_interp_sample)
- sel->info.uses_linear_center = true;
+ if (state->type == PIPE_SHADER_IR_TGSI) {
+ sel->nir = tgsi_to_nir(state->tokens, ctx->screen);
} else {
- if (state->type == PIPE_SHADER_IR_TGSI) {
- sel->nir = tgsi_to_nir(state->tokens, ctx->screen);
- } else {
- assert(state->type == PIPE_SHADER_IR_NIR);
- sel->nir = state->ir.nir;
- }
-
- si_nir_scan_shader(sel->nir, &sel->info);
- si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info);
- si_nir_adjust_driver_locations(sel->nir);
+ assert(state->type == PIPE_SHADER_IR_NIR);
+ sel->nir = state->ir.nir;
}
+ si_nir_scan_shader(sel->nir, &sel->info);
+ si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info);
+ si_nir_adjust_driver_locations(sel->nir);
+
sel->type = sel->info.processor;
p_atomic_inc(&sscreen->num_shaders_created);
si_get_active_slot_masks(&sel->info,
@@ -3304,7 +3272,6 @@ void si_destroy_shader_selector(struct si_context *sctx,
util_queue_fence_destroy(&sel->ready);
simple_mtx_destroy(&sel->mutex);
- free(sel->tokens);
ralloc_free(sel->nir);
free(sel->nir_binary);
free(sel);
diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf
index ffec8770823..e5f333942b7 100644
--- a/src/util/00-mesa-defaults.conf
+++ b/src/util/00-mesa-defaults.conf
@@ -552,12 +552,6 @@ TODO: document the other workarounds.
<application name="Rocket League" executable="RocketLeague">
<option name="radeonsi_zerovram" value="true" />
</application>
- <application name="Civilization 6" executable="Civ6">
- <option name="radeonsi_enable_nir" value="true"/>
- </application>
- <application name="Civilization 6" executable="Civ6Sub">
- <option name="radeonsi_enable_nir" value="true"/>
- </application>
<application name="DiRT Rally" executable="DirtRally">
<option name="radeonsi_prim_restart_tri_strips_only" value="true"/>
</application>