summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorConnor Abbott <cwabbott0@gmail.com>2020-11-10 17:59:03 +0100
committerConnor Abbott <cwabbott0@gmail.com>2021-04-15 16:05:11 +0200
commitc68ea960a781f1e59e906eb9c1a82330db7f2c9c (patch)
tree4488a10ca5babbffebfc44487e0f1a92dd12a17a
parent8f54028479b691a217128f6154d8b641224b8634 (diff)
ir3, tu: Add compiler flag for robust UBO behavior
This needs to be part of the compiler because it's the only piece that we always have access to in all the places ir3_optimize_loop() is called, and it's only enabled for the whole Vulkan device. Right now it's just used for constraining vectorization, but the next commit adds another use. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7573>
-rw-r--r--src/freedreno/computerator/a6xx.c2
-rw-r--r--src/freedreno/ir3/ir3_compiler.c3
-rw-r--r--src/freedreno/ir3/ir3_compiler.h11
-rw-r--r--src/freedreno/ir3/ir3_disk_cache.c4
-rw-r--r--src/freedreno/ir3/ir3_nir.c14
-rw-r--r--src/freedreno/ir3/ir3_nir.h2
-rw-r--r--src/freedreno/ir3/tests/delay.c2
-rw-r--r--src/freedreno/ir3/tests/disasm.c2
-rw-r--r--src/freedreno/vulkan/tu_device.c9
-rw-r--r--src/freedreno/vulkan/tu_shader.c2
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_cmdline.c2
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_gallium.c2
12 files changed, 37 insertions, 18 deletions
diff --git a/src/freedreno/computerator/a6xx.c b/src/freedreno/computerator/a6xx.c
index 90889ae91c4..5679f212111 100644
--- a/src/freedreno/computerator/a6xx.c
+++ b/src/freedreno/computerator/a6xx.c
@@ -490,7 +490,7 @@ a6xx_init(struct fd_device *dev, uint32_t gpu_id)
.read_perfcntrs = a6xx_read_perfcntrs,
};
- a6xx_backend->compiler = ir3_compiler_create(dev, gpu_id);
+ a6xx_backend->compiler = ir3_compiler_create(dev, gpu_id, false);
a6xx_backend->dev = dev;
a6xx_backend->control_mem = fd_bo_new(dev, 0x1000,
diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c
index ed8b43364c5..41847e1db55 100644
--- a/src/freedreno/ir3/ir3_compiler.c
+++ b/src/freedreno/ir3/ir3_compiler.c
@@ -63,7 +63,7 @@ ir3_compiler_destroy(struct ir3_compiler *compiler)
}
struct ir3_compiler *
-ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id)
+ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id, bool robust_ubo_access)
{
struct ir3_compiler *compiler = rzalloc(NULL, struct ir3_compiler);
@@ -77,6 +77,7 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id)
compiler->dev = dev;
compiler->gpu_id = gpu_id;
+ compiler->robust_ubo_access = robust_ubo_access;
compiler->set = ir3_ra_alloc_reg_set(compiler, false);
/* All known GPU's have 32k local memory (aka shared) */
diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h
index 6f7058f37e5..2366bf6a7ac 100644
--- a/src/freedreno/ir3/ir3_compiler.h
+++ b/src/freedreno/ir3/ir3_compiler.h
@@ -44,6 +44,11 @@ struct ir3_compiler {
struct disk_cache *disk_cache;
+ /* If true, UBO accesses are assumed to be bounds-checked as defined by
+ * VK_EXT_robustness2 and optimizations may have to be more conservative.
+ */
+ bool robust_ubo_access;
+
/*
* Configuration options for things that are handled differently on
* different generations:
@@ -153,7 +158,8 @@ struct ir3_compiler {
};
void ir3_compiler_destroy(struct ir3_compiler *compiler);
-struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id);
+struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id,
+ bool robust_ubo_access);
void ir3_disk_cache_init(struct ir3_compiler *compiler);
void ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler,
@@ -190,6 +196,9 @@ enum ir3_shader_debug {
/* DEBUG-only options: */
IR3_DBG_SCHEDMSGS = BITFIELD_BIT(20),
IR3_DBG_RAMSGS = BITFIELD_BIT(21),
+
+ /* Only used for the disk-caching logic: */
+ IR3_DBG_ROBUST_UBO_ACCESS = BITFIELD_BIT(30),
};
extern enum ir3_shader_debug ir3_shader_debug;
diff --git a/src/freedreno/ir3/ir3_disk_cache.c b/src/freedreno/ir3/ir3_disk_cache.c
index 29a2c8c2157..7a5f88cf5f8 100644
--- a/src/freedreno/ir3/ir3_disk_cache.c
+++ b/src/freedreno/ir3/ir3_disk_cache.c
@@ -67,7 +67,9 @@ ir3_disk_cache_init(struct ir3_compiler *compiler)
char timestamp[41];
_mesa_sha1_format(timestamp, id_sha1);
- const uint64_t driver_flags = ir3_shader_debug;
+ uint64_t driver_flags = ir3_shader_debug;
+ if (compiler->robust_ubo_access)
+ driver_flags |= IR3_DBG_ROBUST_UBO_ACCESS;
compiler->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
}
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 759b2ccc18f..e3bf7c9b79c 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -190,7 +190,7 @@ ir3_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
#define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
void
-ir3_optimize_loop(nir_shader *s)
+ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s)
{
bool progress;
unsigned lower_flrp =
@@ -227,7 +227,7 @@ ir3_optimize_loop(nir_shader *s)
nir_load_store_vectorize_options vectorize_opts = {
.modes = nir_var_mem_ubo,
.callback = ir3_nir_should_vectorize_mem,
- .robust_modes = 0,
+ .robust_modes = compiler->robust_ubo_access ? nir_var_mem_ubo : 0,
};
progress |= OPT(s, nir_opt_load_store_vectorize, &vectorize_opts);
@@ -315,7 +315,7 @@ ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s)
if (compiler->gpu_id < 500)
OPT_V(s, ir3_nir_lower_tg4_to_tex);
- ir3_optimize_loop(s);
+ ir3_optimize_loop(compiler, s);
/* do idiv lowering after first opt loop to get a chance to propagate
* constants for divide by immed power-of-two:
@@ -327,7 +327,7 @@ ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s)
const bool idiv_progress = OPT(s, nir_lower_idiv, &idiv_options);
if (idiv_progress)
- ir3_optimize_loop(s);
+ ir3_optimize_loop(compiler, s);
OPT_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
@@ -375,7 +375,7 @@ ir3_nir_post_finalize(struct ir3_compiler *compiler, nir_shader *s)
*/
OPT_V(s, ir3_nir_apply_trig_workarounds);
- ir3_optimize_loop(s);
+ ir3_optimize_loop(compiler, s);
}
static bool
@@ -523,14 +523,14 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
OPT_V(s, ir3_nir_lower_io_offsets, so->shader->compiler->gpu_id);
if (progress)
- ir3_optimize_loop(s);
+ ir3_optimize_loop(so->shader->compiler, s);
/* Fixup indirect load_uniform's which end up with a const base offset
* which is too large to encode. Do this late(ish) so we actually
* can differentiate indirect vs non-indirect.
*/
if (OPT(s, ir3_nir_fixup_load_uniform))
- ir3_optimize_loop(s);
+ ir3_optimize_loop(so->shader->compiler, s);
/* Do late algebraic optimization to turn add(a, neg(b)) back into
* subs, then the mandatory cleanup after algebraic. Note that it may
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h
index 17dc4aa155c..76eef3b2646 100644
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -52,7 +52,7 @@ void ir3_nir_lower_tess_eval(nir_shader *shader, struct ir3_shader_variant *v, u
void ir3_nir_lower_gs(nir_shader *shader);
const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler);
-void ir3_optimize_loop(nir_shader *s);
+void ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s);
void ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s);
void ir3_nir_post_finalize(struct ir3_compiler *compiler, nir_shader *s);
void ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s);
diff --git a/src/freedreno/ir3/tests/delay.c b/src/freedreno/ir3/tests/delay.c
index 5ffc688c32b..ef6cd555ef2 100644
--- a/src/freedreno/ir3/tests/delay.c
+++ b/src/freedreno/ir3/tests/delay.c
@@ -181,7 +181,7 @@ main(int argc, char **argv)
struct ir3_compiler *c;
int result = 0;
- c = ir3_compiler_create(NULL, 630);
+ c = ir3_compiler_create(NULL, 630, false);
for (int i = 0; i < ARRAY_SIZE(tests); i++) {
const struct test *test = &tests[i];
diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c
index bd4bff47d46..5d6d052f7fb 100644
--- a/src/freedreno/ir3/tests/disasm.c
+++ b/src/freedreno/ir3/tests/disasm.c
@@ -393,7 +393,7 @@ main(int argc, char **argv)
unsigned gen = test->gpu_id / 100;
if (!compilers[gen]) {
- compilers[gen] = ir3_compiler_create(NULL, test->gpu_id);
+ compilers[gen] = ir3_compiler_create(NULL, test->gpu_id, false);
}
FILE *fasm = fmemopen((void *)test->expected, strlen(test->expected), "r");
diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c
index 19229294315..aa13ddea071 100644
--- a/src/freedreno/vulkan/tu_device.c
+++ b/src/freedreno/vulkan/tu_device.c
@@ -1078,6 +1078,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
struct tu_device *device;
bool custom_border_colors = false;
bool perf_query_pools = false;
+ bool robust_buffer_access2 = false;
/* Check enabled features */
if (pCreateInfo->pEnabledFeatures) {
@@ -1110,6 +1111,11 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
perf_query_pools = feature->performanceCounterQueryPools;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
+ VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext;
+ robust_buffer_access2 = features->robustBufferAccess2;
+ break;
+ }
default:
break;
}
@@ -1166,7 +1172,8 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
}
}
- device->compiler = ir3_compiler_create(NULL, physical_device->gpu_id);
+ device->compiler = ir3_compiler_create(NULL, physical_device->gpu_id,
+ robust_buffer_access2);
if (!device->compiler) {
result = vk_startup_errorf(physical_device->instance,
VK_ERROR_INITIALIZATION_FAILED,
diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c
index 481d4568b1d..6c4eff30ce3 100644
--- a/src/freedreno/vulkan/tu_shader.c
+++ b/src/freedreno/vulkan/tu_shader.c
@@ -192,7 +192,7 @@ tu_spirv_to_nir(struct tu_device *dev,
NIR_PASS_V(nir, nir_lower_frexp);
- ir3_optimize_loop(nir);
+ ir3_optimize_loop(dev->compiler, nir);
return nir;
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
index 1989ba2d0b0..ea8a26c19c1 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -362,7 +362,7 @@ main(int argc, char **argv)
nir_shader *nir;
- compiler = ir3_compiler_create(NULL, gpu_id);
+ compiler = ir3_compiler_create(NULL, gpu_id, false);
if (from_tgsi) {
struct tgsi_token toks[65536];
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
index 18e3a860ef5..65e13efac87 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
@@ -515,7 +515,7 @@ ir3_screen_init(struct pipe_screen *pscreen)
{
struct fd_screen *screen = fd_screen(pscreen);
- screen->compiler = ir3_compiler_create(screen->dev, screen->gpu_id);
+ screen->compiler = ir3_compiler_create(screen->dev, screen->gpu_id, false);
/* TODO do we want to limit things to # of fast cores, or just limit
* based on total # of both big and little cores. The little cores