summaryrefslogtreecommitdiff
path: root/src/panfrost/midgard
diff options
context:
space:
mode:
authorIcecream95 <ixn@disroot.org>2021-01-01 01:39:49 +1300
committerMarge Bot <eric+marge@anholt.net>2021-01-01 02:58:49 +0000
commit152bc5d15e1a3a6685dfd6bf955c9b4d66eaacb8 (patch)
tree1bc0452d6686436525a13286c13baa89fdd3ce14 /src/panfrost/midgard
parent684e5aa5b8b59814555103384225dcfad159e606 (diff)
pan/mdg: Support loads and stores to scratch memory
Similar to shared memory load/store, except giving a different memory type to the hardware. Add nir->scratch_size to ctx->tls_size to allocate the memory. Tested with the Piglit OpenCL test i32-stack-array.cl. Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8264>
Diffstat (limited to 'src/panfrost/midgard')
-rw-r--r--src/panfrost/midgard/midgard_compile.c10
-rw-r--r--src/panfrost/midgard/midgard_ra.c2
2 files changed, 9 insertions, 3 deletions
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index 6e325897c22..09230fa2f26 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -1502,6 +1502,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
case nir_intrinsic_load_global:
case nir_intrinsic_load_global_constant:
case nir_intrinsic_load_shared:
+ case nir_intrinsic_load_scratch:
case nir_intrinsic_load_input:
case nir_intrinsic_load_kernel_input:
case nir_intrinsic_load_interpolated_input: {
@@ -1510,6 +1511,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
bool is_global = instr->intrinsic == nir_intrinsic_load_global ||
instr->intrinsic == nir_intrinsic_load_global_constant;
bool is_shared = instr->intrinsic == nir_intrinsic_load_shared;
+ bool is_scratch = instr->intrinsic == nir_intrinsic_load_scratch;
bool is_flat = instr->intrinsic == nir_intrinsic_load_input;
bool is_kernel = instr->intrinsic == nir_intrinsic_load_kernel_input;
bool is_interp = instr->intrinsic == nir_intrinsic_load_interpolated_input;
@@ -1523,7 +1525,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
t = nir_alu_type_get_base_type(t);
- if (!(is_ubo || is_global)) {
+ if (!(is_ubo || is_global || is_scratch)) {
offset = nir_intrinsic_base(instr);
}
@@ -1554,7 +1556,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
uint32_t uindex = nir_src_as_uint(index) + 1;
emit_ubo_read(ctx, &instr->instr, reg, offset, indirect_offset, 0, uindex);
- } else if (is_global || is_shared) {
+ } else if (is_global || is_shared || is_scratch) {
unsigned seg = is_global ? LDST_GLOBAL : (is_shared ? LDST_SHARED : LDST_SCRATCH);
emit_global(ctx, &instr->instr, true, reg, src_offset, seg);
} else if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->is_blend) {
@@ -1780,6 +1782,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
case nir_intrinsic_store_global:
case nir_intrinsic_store_shared:
+ case nir_intrinsic_store_scratch:
reg = nir_src_index(ctx, &instr->src[0]);
emit_explicit_constant(ctx, reg, reg);
@@ -1788,6 +1791,8 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
seg = LDST_GLOBAL;
else if (instr->intrinsic == nir_intrinsic_store_shared)
seg = LDST_SHARED;
+ else
+ seg = LDST_SCRATCH;
emit_global(ctx, &instr->instr, false, reg, &instr->src[1], seg);
break;
@@ -2876,6 +2881,7 @@ midgard_compile_shader_nir(void *mem_ctx, nir_shader *nir,
panfrost_nir_assign_sysvals(&ctx->sysvals, ctx, nir);
program->sysval_count = ctx->sysvals.sysval_count;
memcpy(program->sysvals, ctx->sysvals.sysvals, sizeof(ctx->sysvals.sysvals[0]) * ctx->sysvals.sysval_count);
+ ctx->tls_size = nir->scratch_size;
nir_foreach_function(func, nir) {
if (!func->impl)
diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c
index 22477014a22..04be27da50e 100644
--- a/src/panfrost/midgard/midgard_ra.c
+++ b/src/panfrost/midgard/midgard_ra.c
@@ -1054,7 +1054,7 @@ mir_ra(compiler_context *ctx)
/* Report spilling information. spill_count is in 128-bit slots (vec4 x
* fp32), but tls_size is in bytes, so multiply by 16 */
- ctx->tls_size = spill_count * 16;
+ ctx->tls_size += spill_count * 16;
install_registers(ctx, l);