radeonsi: do compilation from si_create_shader_selector asynchronously

Main shader parts and geometry shaders are compiled asynchronously by util_queue. si_create_shader_selector doesn't wait and returns. si_draw_vbo(si_shader_select) waits for completion. This has the best effect when shaders are compiled at app-loading time. It doesn't help much for shaders compiled on demand, even though VS+PS compilation should take as much as time as the bigger one of the two. If an app creates more shaders, at most 4 threads will be used to compile them. Debug output disables this for shader stats to be printed in the correct order. (We could go even further and build variants asynchronously too, then emit draw calls without waiting and emit incomplete shader states, then force IB chaining to give the compiler more time, then sync the compilation at the IB flush and patch the IB with correct shader states. This is great for compilation before draw calls, but there are some difficulties such as scratch and tess states requiring the compiler output, and an on-disk shader cache will likely be a much better and simpler solution.) Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
author: Marek Olšák <marek.olsak@amd.com> 2016-06-11 19:57:40 +0200
committer: Marek Olšák <marek.olsak@amd.com> 2016-07-05 00:47:13 +0200
commit: 5c92c21369ee3b4f52eb5aed183092ba3ee7e079 (patch)
tree: d1464436b7410d1169ffd0a8e003db54f8c9e422 /src/gallium/drivers/radeonsi/si_pipe.c
parent: 84824935cf28b72bac9f73787aadf20b95dea230 (diff)
1 files changed, 18 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 06b32db43db..ee97bcfaea5 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -663,6 +663,13 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
 	if (!sscreen->b.ws->unref(sscreen->b.ws))
 		return;
 
+	if (util_queue_is_initialized(&sscreen->shader_compiler_queue))
+		util_queue_destroy(&sscreen->shader_compiler_queue);
+
+	for (i = 0; i < ARRAY_SIZE(sscreen->tm); i++)
+		if (sscreen->tm[i])
+			LLVMDisposeTargetMachine(sscreen->tm[i]);
+
 	/* Free shader parts. */
 	for (i = 0; i < ARRAY_SIZE(parts); i++) {
 		while (parts[i]) {
@@ -710,6 +717,7 @@ static bool si_init_gs_info(struct si_screen *sscreen)
 struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
 {
 	struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
+	unsigned num_cpus, num_compiler_threads, i;
 
 	if (!sscreen) {
 		return NULL;
@@ -754,6 +762,16 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
 	if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))
 		sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;
 
+	/* Only enable as many threads as we have target machines and CPUs. */
+	num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	num_compiler_threads = MIN2(num_cpus, ARRAY_SIZE(sscreen->tm));
+
+	for (i = 0; i < num_compiler_threads; i++)
+		sscreen->tm[i] = si_create_llvm_target_machine(sscreen);
+
+	util_queue_init(&sscreen->shader_compiler_queue, "si_shader",
+                        32, num_compiler_threads);
+
 	/* Create the auxiliary context. This must be done last. */
 	sscreen->b.aux_context = sscreen->b.b.context_create(&sscreen->b.b, NULL, 0);
author	Marek Olšák <marek.olsak@amd.com>	2016-06-11 19:57:40 +0200
committer	Marek Olšák <marek.olsak@amd.com>	2016-07-05 00:47:13 +0200
commit	5c92c21369ee3b4f52eb5aed183092ba3ee7e079 (patch)
tree	d1464436b7410d1169ffd0a8e003db54f8c9e422 /src/gallium/drivers/radeonsi/si_pipe.c
parent	84824935cf28b72bac9f73787aadf20b95dea230 (diff)