summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2016-01-28 01:29:59 +0100
committerMarek Olšák <marek.olsak@amd.com>2016-02-21 21:08:58 +0100
commit9aaf28da629e025f652c7ff63750ad8ee513ff42 (patch)
treeb03eb8a6f38fbbc9e15f22ba0ec7fdbf635b2da3
parent754cf171e90cc61d135e7c45f8be319ee2db02a5 (diff)
radeonsi: enable compiling one variant per shader
Shader stats from VERDE: Default scheduler: Totals: SGPRS: 491272 -> 488672 (-0.53 %) VGPRS: 289980 -> 311093 (7.28 %) Code Size: 11091656 -> 11219948 (1.16 %) bytes LDS: 97 -> 97 (0.00 %) blocks Scratch: 1732608 -> 2246656 (29.67 %) bytes per wave Max Waves: 78063 -> 77352 (-0.91 %) Wait states: 0 -> 0 (0.00 %) Looking at some of the worst regressions, I get: - The VGPR increase seems to be caused by the fact that if PS has used less than 16 VGPRs, now it will always use 16 VGPRs and sometimes even 20. However, the wave count remains at 10 if VGPRs <= 24, so no harm there. - The scratch increase seems to be caused by SGPR spilling. The unnecessary SGPR spilling has been an ongoing issue with the compiler and it's completely fixable by rematerializing s_loads or reordering instructions. SI scheduler: Totals: SGPRS: 374848 -> 374576 (-0.07 %) VGPRS: 284456 -> 307515 (8.11 %) Code Size: 11433068 -> 11535452 (0.90 %) bytes LDS: 97 -> 97 (0.00 %) blocks Scratch: 509952 -> 522240 (2.41 %) bytes per wave Max Waves: 79456 -> 78217 (-1.56 %) Wait states: 0 -> 0 (0.00 %) VGPRs - same story as before. The SI scheduler doesn't spill SGPRs so much and generally spills way less than the default scheduler. (522240 spills vs 2246656 spills) Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c1
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c4
3 files changed, 5 insertions, 1 deletions
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 324d2719f44..ea028272ccd 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -411,6 +411,7 @@ static const struct debug_named_value common_debug_options[] = {
{ "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
{ "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." },
{ "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." },
+ { "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders compiled on demand" },
DEBUG_NAMED_VALUE_END /* must be last */
};
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index e92df876c22..ee173d35880 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -89,6 +89,7 @@
#define DBG_NO_DCC_CLEAR (1llu << 44)
#define DBG_NO_RB_PLUS (1llu << 45)
#define DBG_SI_SCHED (1llu << 46)
+#define DBG_MONOLITHIC_SHADERS (1llu << 47)
#define R600_MAP_BUFFER_ALIGNMENT 64
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 8bfaf85df5b..30f3ec0753a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -623,7 +623,9 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
sscreen->b.has_cp_dma = true;
sscreen->b.has_streamout = true;
pipe_mutex_init(sscreen->shader_parts_mutex);
- sscreen->use_monolithic_shaders = true;
+ sscreen->use_monolithic_shaders =
+ HAVE_LLVM < 0x0308 ||
+ (sscreen->b.debug_flags & DBG_MONOLITHIC_SHADERS) != 0;
if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;