summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Ekstrand <jason@jlekstrand.net>2020-08-11 10:30:42 -0500
committerMarge Bot <eric+marge@anholt.net>2020-08-12 10:11:06 +0000
commit65eeb06a7f7afd1fbf48490f06051dfad9de3214 (patch)
tree722a1dd511ed75cb707d4bc522b5b00ef03e63d4
parentf5e7be386ffa8d5805fd0381ee0c921af65a6bcb (diff)
iris: Upload kernel inputs with system values
Clover doesn't upload a cbuf0 but instead provides the kernel inputs as part of the pipe_grid. The most obvious thing to do is to upload them along with system values. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6280>
-rw-r--r--src/gallium/drivers/iris/iris_context.h3
-rw-r--r--src/gallium/drivers/iris/iris_disk_cache.c7
-rw-r--r--src/gallium/drivers/iris/iris_program.c43
-rw-r--r--src/gallium/drivers/iris/iris_state.c25
4 files changed, 54 insertions, 24 deletions
diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h
index 1cdb035cfe5..8dc64f5d4be 100644
--- a/src/gallium/drivers/iris/iris_context.h
+++ b/src/gallium/drivers/iris/iris_context.h
@@ -378,6 +378,9 @@ struct iris_uncompiled_shader {
/* Whether shader uses atomic operations. */
bool uses_atomic_load_store;
+ /** Size (in bytes) of the kernel input data */
+ unsigned kernel_input_size;
+
/** Constant data scraped from the shader by nir_opt_large_constants */
struct pipe_resource *const_data;
diff --git a/src/gallium/drivers/iris/iris_disk_cache.c b/src/gallium/drivers/iris/iris_disk_cache.c
index 4913f309d0c..0383512b295 100644
--- a/src/gallium/drivers/iris/iris_disk_cache.c
+++ b/src/gallium/drivers/iris/iris_disk_cache.c
@@ -106,8 +106,9 @@ iris_disk_cache_store(struct disk_cache *cache,
* 2. Assembly code
* 3. Number of entries in the system value array
* 4. System value array
- * 5. Legacy param array (only used for compute workgroup ID)
- * 6. Binding table
+ * 5. Size (in bytes) of kernel inputs
+ * 6. Legacy param array (only used for compute workgroup ID)
+ * 7. Binding table
*/
blob_write_bytes(&blob, shader->prog_data, brw_prog_data_size(stage));
blob_write_bytes(&blob, shader->map, shader->prog_data->program_size);
@@ -222,7 +223,7 @@ iris_disk_cache_retrieve(struct iris_context *ice,
if (num_cbufs || ish->nir->num_uniforms)
num_cbufs++;
- if (num_system_values)
+ if (num_system_values || kernel_input_size)
num_cbufs++;
assert(stage < ARRAY_SIZE(cache_id_for_stage));
diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c
index 8470022eb9d..7d63bc185e0 100644
--- a/src/gallium/drivers/iris/iris_program.c
+++ b/src/gallium/drivers/iris/iris_program.c
@@ -377,12 +377,15 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
void *mem_ctx,
nir_shader *nir,
struct brw_stage_prog_data *prog_data,
+ unsigned kernel_input_size,
enum brw_param_builtin **out_system_values,
unsigned *out_num_system_values,
unsigned *out_num_cbufs)
{
UNUSED const struct gen_device_info *devinfo = compiler->devinfo;
+ unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t));
+
const unsigned IRIS_MAX_SYSTEM_VALUES =
PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
enum brw_param_builtin *system_values =
@@ -460,7 +463,8 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
}
b.cursor = nir_before_instr(instr);
- offset = nir_imm_int(&b, ucp_idx[ucp] * sizeof(uint32_t));
+ offset = nir_imm_int(&b, system_values_start +
+ ucp_idx[ucp] * sizeof(uint32_t));
break;
}
case nir_intrinsic_load_patch_vertices_in:
@@ -471,7 +475,8 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
b.cursor = nir_before_instr(instr);
- offset = nir_imm_int(&b, patch_vert_idx * sizeof(uint32_t));
+ offset = nir_imm_int(&b, system_values_start +
+ patch_vert_idx * sizeof(uint32_t));
break;
case nir_intrinsic_image_deref_load_param_intel: {
assert(devinfo->gen < 9);
@@ -512,7 +517,8 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
b.cursor = nir_before_instr(instr);
offset = nir_iadd(&b,
get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
- nir_imm_int(&b, img_idx[var->data.binding] * 4 +
+ nir_imm_int(&b, system_values_start +
+ img_idx[var->data.binding] * 4 +
nir_intrinsic_base(intrin) * 16));
break;
}
@@ -528,7 +534,16 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
}
b.cursor = nir_before_instr(instr);
- offset = nir_imm_int(&b, variable_group_size_idx * sizeof(uint32_t));
+ offset = nir_imm_int(&b, system_values_start +
+ variable_group_size_idx * sizeof(uint32_t));
+ break;
+ }
+ case nir_intrinsic_load_kernel_input: {
+ assert(nir_intrinsic_base(intrin) +
+ nir_intrinsic_range(intrin) <= kernel_input_size);
+ b.cursor = nir_before_instr(instr);
+ offset = nir_iadd_imm(&b, intrin->src[0].ssa,
+ nir_intrinsic_base(intrin));
break;
}
default:
@@ -562,7 +577,7 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
num_cbufs++;
/* Place the new params in a new cbuf. */
- if (num_system_values > 0) {
+ if (num_system_values > 0 || kernel_input_size > 0) {
unsigned sysval_cbuf_index = num_cbufs;
num_cbufs++;
@@ -1101,7 +1116,7 @@ iris_compile_vs(struct iris_context *ice,
prog_data->use_alt_mode = ish->use_alt_mode;
- iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
+ iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
&num_system_values, &num_cbufs);
struct iris_binding_table bt;
@@ -1281,7 +1296,7 @@ iris_compile_tcs(struct iris_context *ice,
if (ish) {
nir = nir_shader_clone(mem_ctx, ish->nir);
- iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
+ iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
&num_system_values, &num_cbufs);
iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
num_system_values, num_cbufs);
@@ -1435,7 +1450,7 @@ iris_compile_tes(struct iris_context *ice,
nir_shader_gather_info(nir, impl);
}
- iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
+ iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
&num_system_values, &num_cbufs);
struct iris_binding_table bt;
@@ -1557,7 +1572,7 @@ iris_compile_gs(struct iris_context *ice,
nir_shader_gather_info(nir, impl);
}
- iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
+ iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
&num_system_values, &num_cbufs);
struct iris_binding_table bt;
@@ -1665,7 +1680,7 @@ iris_compile_fs(struct iris_context *ice,
prog_data->use_alt_mode = ish->use_alt_mode;
- iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
+ iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
&num_system_values, &num_cbufs);
/* Lower output variables to load_output intrinsics before setting up
@@ -1964,8 +1979,9 @@ iris_compile_cs(struct iris_context *ice,
NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics);
- iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
- &num_system_values, &num_cbufs);
+ iris_setup_uniforms(compiler, mem_ctx, nir, prog_data,
+ ish->kernel_input_size,
+ &system_values, &num_system_values, &num_cbufs);
struct iris_binding_table bt;
iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
@@ -1992,7 +2008,7 @@ iris_compile_cs(struct iris_context *ice,
struct iris_compiled_shader *shader =
iris_upload_shader(ice, IRIS_CACHE_CS, sizeof(*key), key, program,
prog_data, NULL, system_values, num_system_values,
- 0, num_cbufs, &bt);
+ ish->kernel_input_size, num_cbufs, &bt);
iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
@@ -2401,6 +2417,7 @@ iris_create_compute_state(struct pipe_context *ctx,
struct iris_uncompiled_shader *ish =
iris_create_uncompiled_shader(ctx, nir, NULL);
+ ish->kernel_input_size = state->req_input_mem;
// XXX: disallow more than 64KB of shared variables
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
index 5837d219356..e9f391d5a5c 100644
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -3221,26 +3221,35 @@ iris_set_constant_buffer(struct pipe_context *ctx,
static void
upload_sysvals(struct iris_context *ice,
- gl_shader_stage stage)
+ gl_shader_stage stage,
+ const struct pipe_grid_info *grid)
{
UNUSED struct iris_genx_state *genx = ice->state.genx;
struct iris_shader_state *shs = &ice->state.shaders[stage];
struct iris_compiled_shader *shader = ice->shaders.prog[stage];
- if (!shader || shader->num_system_values == 0)
+ if (!shader || (shader->num_system_values == 0 &&
+ shader->kernel_input_size == 0))
return;
assert(shader->num_cbufs > 0);
unsigned sysval_cbuf_index = shader->num_cbufs - 1;
struct pipe_shader_buffer *cbuf = &shs->constbuf[sysval_cbuf_index];
- unsigned upload_size = shader->num_system_values * sizeof(uint32_t);
- uint32_t *map = NULL;
+ unsigned system_values_start =
+ ALIGN(shader->kernel_input_size, sizeof(uint32_t));
+ unsigned upload_size = system_values_start +
+ shader->num_system_values * sizeof(uint32_t);
+ void *map = NULL;
assert(sysval_cbuf_index < PIPE_MAX_CONSTANT_BUFFERS);
u_upload_alloc(ice->ctx.const_uploader, 0, upload_size, 64,
- &cbuf->buffer_offset, &cbuf->buffer, (void **) &map);
+ &cbuf->buffer_offset, &cbuf->buffer, &map);
+ if (shader->kernel_input_size > 0)
+ memcpy(map, grid->input, shader->kernel_input_size);
+
+ uint32_t *sysval_map = map + system_values_start;
for (int i = 0; i < shader->num_system_values; i++) {
uint32_t sysval = shader->system_values[i];
uint32_t value = 0;
@@ -3289,7 +3298,7 @@ upload_sysvals(struct iris_context *ice,
assert(!"unhandled system value");
}
- *map++ = value;
+ *sysval_map++ = value;
}
cbuf->buffer_size = upload_size;
@@ -5641,7 +5650,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
continue;
if (shs->sysvals_need_upload)
- upload_sysvals(ice, stage);
+ upload_sysvals(ice, stage, NULL);
struct push_bos push_bos = {};
setup_constant_buffers(ice, batch, stage, &push_bos);
@@ -6790,7 +6799,7 @@ iris_upload_compute_state(struct iris_context *ice,
if ((stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS) &&
shs->sysvals_need_upload)
- upload_sysvals(ice, MESA_SHADER_COMPUTE);
+ upload_sysvals(ice, MESA_SHADER_COMPUTE, grid);
if (stage_dirty & IRIS_STAGE_DIRTY_BINDINGS_CS)
iris_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false);