diff options
author | Danylo Piliaiev <dpiliaiev@igalia.com> | 2021-12-07 16:43:21 +0200 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2022-01-07 15:29:23 +0000 |
commit | 94fc6b252c564002d575bb664e58780c4f56ccaf (patch) | |
tree | 4f5d257cc4abac0899d212cf2c34d96c1cd1e1fe | |
parent | 57c3e07f96d3a8f6483f680b67aa54755b38f00f (diff) |
ir3: Assert that we cannot have enough concurrent waves for CS with barriermarge_bot_batch_merge_job
If we have a compute shader that has a big workgroup, a barrier, and
a branchstack which limits max_waves - this may result in a situation
when we cannot run concurrently all waves of the workgroup, which
would lead to a hang.
Blob just explodes in such case.
Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14110>
-rw-r--r-- | src/freedreno/ir3/ir3.c | 44 |
1 files changed, 31 insertions, 13 deletions
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 45f26fbb5fc..ad67e695701 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -177,30 +177,48 @@ ir3_get_reg_independent_max_waves(struct ir3_shader_variant *v, const struct ir3_compiler *compiler = v->shader->compiler; unsigned max_waves = compiler->max_waves; + /* Compute the limit based on branchstack */ + if (v->branchstack > 0) { + unsigned branchstack_max_waves = compiler->branchstack_size / + v->branchstack * + compiler->wave_granularity; + max_waves = MIN2(max_waves, branchstack_max_waves); + } + /* If this is a compute shader, compute the limit based on shared size */ if ((v->type == MESA_SHADER_COMPUTE) || (v->type == MESA_SHADER_KERNEL)) { + unsigned threads_per_wg = + v->local_size[0] * v->local_size[1] * v->local_size[2]; + unsigned waves_per_wg = + DIV_ROUND_UP(threads_per_wg, compiler->threadsize_base * + (double_threadsize ? 2 : 1) * + compiler->wave_granularity); + /* Shared is allocated in chunks of 1k */ unsigned shared_per_wg = ALIGN_POT(v->shared_size, 1024); if (shared_per_wg > 0 && !v->local_size_variable) { unsigned wgs_per_core = compiler->local_mem_size / shared_per_wg; - unsigned threads_per_wg = - v->local_size[0] * v->local_size[1] * v->local_size[2]; - unsigned waves_per_wg = - DIV_ROUND_UP(threads_per_wg, compiler->threadsize_base * - (double_threadsize ? 2 : 1) * - compiler->wave_granularity); + max_waves = MIN2(max_waves, waves_per_wg * wgs_per_core * compiler->wave_granularity); } - } - /* Compute the limit based on branchstack */ - if (v->branchstack > 0) { - unsigned branchstack_max_waves = compiler->branchstack_size / - v->branchstack * - compiler->wave_granularity; - max_waves = MIN2(max_waves, branchstack_max_waves); + /* If we have a compute shader that has a big workgroup, a barrier, and + * a branchstack which limits max_waves - this may result in a situation + * when we cannot run concurrently all waves of the workgroup, which + * would lead to a hang. + * + * TODO: Could we spill branchstack or is there other way around? + * Blob just explodes in such case. + */ + if (v->has_barrier && (max_waves < waves_per_wg)) { + mesa_loge( + "Compute shader (%s:%s) which has workgroup barrier cannot be used " + "because it's impossible to have enough concurrent waves.", + v->shader->nir->info.name, v->shader->nir->info.label); + exit(1); + } } return max_waves; |