summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEmma Anholt <emma@anholt.net>2021-12-06 12:11:43 -0800
committerMarge Bot <emma+marge@anholt.net>2021-12-09 22:15:53 +0000
commit7d2ea9b0edef2176140629ac3dee6a6809c4abe2 (patch)
tree7857a0a27a8ab41487e41efe46b33888e4594b05
parente68a9b033997c9de485c2914717d25e55fbb053e (diff)
r300: Request NIR shaders from mesa/st and use NIR-to-TGSI.
This brings us into parity on state tracker paths with most other supported drivers, and a lot of additional optimization on our shaders. Results on a subset of shader-db that doesn't crash: instructions in affected programs: 59502 -> 47991 (-19.35%) vinst in affected programs: 17633 -> 15197 (-13.82%) sinst in affected programs: 9296 -> 7319 (-21.27%) flowcontrol in affected programs: 627 -> 310 (-50.56%) presub in affected programs: 4220 -> 1554 (-63.18%) temps in affected programs: 5775 -> 8570 (48.40%) lits in affected programs: 215 -> 37 (-82.79%) The temps (register usage) increase is unfortunate, but it seems that instruction counts tend to be our limit before reg counts are. Fixes: #3354 Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14096>
-rw-r--r--src/gallium/drivers/r300/ci/r300-rv515-fails.txt96
-rw-r--r--src/gallium/drivers/r300/ci/r300-rv515-skips.txt3
-rw-r--r--src/gallium/drivers/r300/meson.build2
-rw-r--r--src/gallium/drivers/r300/r300_debug.c1
-rw-r--r--src/gallium/drivers/r300/r300_screen.c131
-rw-r--r--src/gallium/drivers/r300/r300_screen.h2
-rw-r--r--src/gallium/drivers/r300/r300_state.c19
7 files changed, 179 insertions, 75 deletions
diff --git a/src/gallium/drivers/r300/ci/r300-rv515-fails.txt b/src/gallium/drivers/r300/ci/r300-rv515-fails.txt
index fa875f06d89..c395c114c9e 100644
--- a/src/gallium/drivers/r300/ci/r300-rv515-fails.txt
+++ b/src/gallium/drivers/r300/ci/r300-rv515-fails.txt
@@ -36,45 +36,14 @@ dEQP-GLES2.functional.rasterization.primitives.lines_wide,Fail
dEQP-GLES2.functional.rasterization.primitives.line_strip_wide,Fail,Fail
dEQP-GLES2.functional.rasterization.primitives.line_loop_wide,Fail
-# "Unknown opcode IF"
dEQP-GLES2.functional.shaders.functions.control_flow.mixed_return_break_continue_vertex,Fail
-dEQP-GLES2.functional.shaders.functions.control_flow.return_in_nested_loop_vertex,Fail
-dEQP-GLES2.functional.shaders.functions.control_flow.return_in_loop_if_vertex,Fail
-# "Ran out of temporary registers"
+# "No free temporary to use for predicate stack counter."
dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_dynamic_loop_read_vertex,Fail
dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_dynamic_loop_read_vertex,Fail
dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_dynamic_loop_read_vertex,Fail
dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_dynamic_loop_read_vertex,Fail
-# "Vertex program has too many instructions"
-dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_static_loop_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_static_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_static_loop_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_static_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_static_loop_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_static_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_static_loop_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_static_read_vertex,Fail
-
-# "emit_alu: Too many instructions"
-dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_static_read_fragment,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_static_read_fragment,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_static_read_fragment,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_static_read_fragment,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_static_loop_read_fragment,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_static_loop_read_fragment,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_static_loop_read_fragment,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_static_loop_read_fragment,Fail
-
-# "Ran out of temporary registers"
-dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_dynamic_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_dynamic_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_dynamic_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_dynamic_read_vertex,Fail
-
-dEQP-GLES2.functional.shaders.indexing.tmp_array.float_dynamic_loop_write_dynamic_read_fragment,Fail
-
dEQP-GLES2.functional.shaders.indexing.tmp_array.float_dynamic_loop_write_dynamic_read_vertex,Fail
dEQP-GLES2.functional.shaders.indexing.tmp_array.float_dynamic_loop_write_static_loop_read_vertex,Fail
dEQP-GLES2.functional.shaders.indexing.tmp_array.float_dynamic_loop_write_static_read_vertex,Fail
@@ -141,18 +110,34 @@ dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_loop_subscr
dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_loop_subscript_write_static_loop_subscript_read_vertex,Fail
dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_loop_subscript_write_dynamic_loop_subscript_read_vertex,Fail
+# "Rewrite of inst 0 failed Can't allocate source for Inst 4 src_type=1 new_index=1 new_mask=1"
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec2_dynamic_subscript_write_component_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec2_dynamic_subscript_write_direct_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec2_dynamic_subscript_write_dynamic_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec2_dynamic_subscript_write_static_loop_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec2_dynamic_subscript_write_static_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec3_dynamic_subscript_write_component_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec3_dynamic_subscript_write_direct_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec3_dynamic_subscript_write_dynamic_loop_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec3_dynamic_subscript_write_dynamic_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec3_dynamic_subscript_write_static_loop_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec3_dynamic_subscript_write_static_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_subscript_write_component_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_subscript_write_direct_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_subscript_write_dynamic_loop_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_subscript_write_dynamic_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_subscript_write_static_loop_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_subscript_write_static_subscript_read_fragment,Fail
+
# Bus error
dEQP-GLES2.functional.shaders.loops.for_dynamic_iterations.sequence_vertex,Crash
dEQP-GLES2.functional.shaders.loops.for_uniform_iterations.sequence_vertex,Crash
-dEQP-GLES2.functional.shaders.loops.do_while_constant_iterations.sequence_vertex,Crash
dEQP-GLES2.functional.shaders.loops.do_while_dynamic_iterations.sequence_vertex,Crash
dEQP-GLES2.functional.shaders.loops.do_while_uniform_iterations.sequence_vertex,Crash
dEQP-GLES2.functional.shaders.loops.while_constant_iterations.sequence_vertex,Crash
dEQP-GLES2.functional.shaders.loops.while_dynamic_iterations.sequence_vertex,Crash
dEQP-GLES2.functional.shaders.loops.while_uniform_iterations.sequence_vertex,Crash
-dEQP-GLES2.functional.shaders.loops.do_while_constant_iterations.conditional_continue_vertex,Fail
-dEQP-GLES2.functional.shaders.loops.do_while_constant_iterations.double_continue_vertex,Fail
dEQP-GLES2.functional.shaders.loops.do_while_constant_iterations.mixed_break_continue_vertex,Fail
dEQP-GLES2.functional.shaders.loops.do_while_dynamic_iterations.conditional_continue_vertex,Fail
dEQP-GLES2.functional.shaders.loops.do_while_dynamic_iterations.double_continue_vertex,Fail
@@ -163,24 +148,10 @@ dEQP-GLES2.functional.shaders.loops.do_while_uniform_iterations.mixed_break_cont
dEQP-GLES2.functional.shaders.loops.for_constant_iterations.mixed_break_continue_vertex,Fail
dEQP-GLES2.functional.shaders.loops.for_dynamic_iterations.mixed_break_continue_vertex,Fail
-dEQP-GLES2.functional.shaders.loops.for_dynamic_iterations.unconditional_break_vertex,Fail
dEQP-GLES2.functional.shaders.loops.for_uniform_iterations.mixed_break_continue_vertex,Fail
-dEQP-GLES2.functional.shaders.loops.for_uniform_iterations.unconditional_break_vertex,Fail
-dEQP-GLES2.functional.shaders.loops.while_constant_iterations.unconditional_break_vertex,Fail
-dEQP-GLES2.functional.shaders.loops.while_dynamic_iterations.unconditional_break_vertex,Fail
-dEQP-GLES2.functional.shaders.loops.while_uniform_iterations.unconditional_break_vertex,Fail
-# "Unknown opcode IF"
-dEQP-GLES2.functional.shaders.return.return_in_dynamic_loop_always_vertex,Fail
dEQP-GLES2.functional.shaders.return.return_in_dynamic_loop_dynamic_vertex,Fail
-dEQP-GLES2.functional.shaders.return.return_in_dynamic_loop_always_fragment,Fail
-
-# FS: POW channel looks good, the rest got trashed though?
-dEQP-GLES2.functional.shaders.operator.exponential.pow.highp_float_fragment,Fail
-dEQP-GLES2.functional.shaders.operator.exponential.pow.mediump_float_fragment,Fail
-
-dEQP-GLES2.functional.shaders.random.swizzle.fragment.24,Fail
dEQP-GLES2.functional.shaders.random.texture.fragment.141,Fail
# VS: Only the first channel of a POW result is right it looks like.
@@ -191,17 +162,9 @@ dEQP-GLES2.functional.shaders.operator.exponential.pow.mediump_vec2_vertex,Fail
dEQP-GLES2.functional.shaders.operator.exponential.pow.mediump_vec3_vertex,Fail
dEQP-GLES2.functional.shaders.operator.exponential.pow.mediump_vec4_vertex,Fail
-# "No free temporary to use for predicate stack counter."
dEQP-GLES2.functional.shaders.struct.local.dynamic_loop_struct_array_vertex,Fail
-dEQP-GLES2.functional.shaders.struct.local.dynamic_loop_nested_struct_array_vertex,Fail
-
-# "Rewrite of inst 1 failed Can't allocate source for Inst 17 src_type=1 new_index=1 new_mask=2"
-dEQP-GLES2.functional.shaders.struct.local.dynamic_loop_struct_array_fragment,Fail
dEQP-GLES2.functional.shaders.struct.local.dynamic_loop_nested_struct_array_fragment,Fail
-dEQP-GLES2.functional.shaders.struct.uniform.dynamic_loop_struct_array_vertex,Fail
-dEQP-GLES2.functional.shaders.struct.uniform.dynamic_loop_struct_array_fragment,Fail
-
dEQP-GLES2.functional.texture.format.a8_cube_npot,Fail
dEQP-GLES2.functional.texture.format.l8_cube_npot,Fail
dEQP-GLES2.functional.texture.format.la88_cube_npot,Fail
@@ -253,15 +216,8 @@ dEQP-GLES2.functional.texture.specification.teximage2d_align.cube_rgba8888_47_2,
dEQP-GLES2.functional.texture.specification.teximage2d_align.cube_rgba8888_47_4,Fail
dEQP-GLES2.functional.texture.specification.teximage2d_align.cube_rgba8888_47_8,Fail
-# "emit_tex: Too many instructionsUsing a dummy shader instead."
-dEQP-GLES2.functional.uniform_api.value.initial.render.array_in_struct.mat4_mat2_fragment,Fail
-dEQP-GLES2.functional.uniform_api.value.initial.render.array_in_struct.mat4_mat2_both,Fail
-dEQP-GLES2.functional.uniform_api.value.initial.render.nested_structs_arrays.mat4_mat2_fragment,Fail
-dEQP-GLES2.functional.uniform_api.value.initial.render.nested_structs_arrays.mat4_mat2_both,Fail
-dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.array_in_struct.mat4_mat2_fragment,Fail
-dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.array_in_struct.mat4_mat2_both,Fail
-dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.nested_structs_arrays.mat4_mat2_fragment,Fail
-dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.nested_structs_arrays.mat4_mat2_both,Fail
+dEQP-GLES2.functional.uniform_api.value.initial.render.basic_array.vec4_both,Fail
+dEQP-GLES2.functional.uniform_api.value.initial.render.array_in_struct.float_vec4_both,Fail
KHR-GLES2.core.internalformat.texture2d.depth_component_unsigned_int_depth_component16,Fail
KHR-GLES2.core.internalformat.texture2d.depth_component_unsigned_int_depth_component24,Fail
@@ -566,6 +522,12 @@ spec@arb_color_buffer_float@gl_rgba32f-render,Fail
spec@arb_color_buffer_float@gl_rgba8_snorm-render-sanity,Fail
spec@arb_framebuffer_object@fbo-blit-stretch,Fail
spec@arb_framebuffer_object@fbo-drawbuffers-none glclear,Fail
+
+# The test accidentally assigns a varying output instead of an attribute input to
+# gl_Position, then it gets optimized out and then set_vertex_inputs_outputs gets
+# angry that nobody set gl_Position.
+spec@arb_separate_shader_objects@getprogrampipelineiv,Crash
+
spec@arb_shader_texture_lod@execution@tex-miplevel-selection *gradarb 2d,Fail
spec@arb_shader_texture_lod@execution@tex-miplevel-selection *lod 2dshadow,Fail
spec@arb_shader_texture_lod@execution@tex-miplevel-selection *projgradarb 2d,Fail
@@ -601,8 +563,8 @@ spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backcolor-smooth-ve
spec@glsl-1.10@execution@interpolation@interpolation-none-gl_frontcolor-flat-vertex,Fail
spec@glsl-1.10@execution@interpolation@interpolation-none-gl_frontsecondarycolor-smooth-vertex,Fail
spec@glsl-1.10@execution@loops@glsl-vs-continue-inside-do-while,Fail
-spec@glsl-1.10@execution@vs-loop-complex-unroll-nested-break,Fail
spec@glsl-1.20@execution@clipping@vs-clip-vertex-primitives,Fail
+spec@glsl-1.20@execution@fs-function-inout-array-of-structs,Fail
spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj 1dshadow,Fail
spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj(bias) 1d,Fail
spec@glsl-1.20@execution@vs-nan-builtin-max,Fail
diff --git a/src/gallium/drivers/r300/ci/r300-rv515-skips.txt b/src/gallium/drivers/r300/ci/r300-rv515-skips.txt
index 86ef027fc02..c309d409ab2 100644
--- a/src/gallium/drivers/r300/ci/r300-rv515-skips.txt
+++ b/src/gallium/drivers/r300/ci/r300-rv515-skips.txt
@@ -8,3 +8,6 @@ shaders@glsl-predication-on-large-array
# I'm running it with gl_rgba8_snorm
glx@
+
+# GPU hang
+dEQP-GLES2.functional.shaders.struct.local.dynamic_loop_nested_struct_array_vertex
diff --git a/src/gallium/drivers/r300/meson.build b/src/gallium/drivers/r300/meson.build
index 41e09bd56fc..226c374e477 100644
--- a/src/gallium/drivers/r300/meson.build
+++ b/src/gallium/drivers/r300/meson.build
@@ -126,7 +126,7 @@ libr300 = static_library(
inc_mesa,
],
gnu_symbol_visibility : 'hidden',
- dependencies : [dep_libdrm_radeon, dep_llvm, idep_mesautil],
+ dependencies : [dep_libdrm_radeon, dep_llvm, idep_mesautil, idep_nir],
)
driver_r300 = declare_dependency(
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index c86577cd251..bb595b26b1a 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -50,6 +50,7 @@ static const struct debug_named_value r300_debug_options[] = {
{ "nozmask", DBG_NO_ZMASK, "Disable zbuffer compression" },
{ "nohiz", DBG_NO_HIZ, "Disable hierarchical zbuffer" },
{ "nocmask", DBG_NO_CMASK, "Disable AA compression and fast AA clear" },
+ { "use_tgsi", DBG_USE_TGSI, "Request TGSI shaders from the state tracker" },
/* must be last */
DEBUG_NAMED_VALUE_END
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 56353c47720..89502d46ff6 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -21,6 +21,7 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#include "compiler/nir/nir.h"
#include "util/format/u_format.h"
#include "util/format/u_format_s3tc.h"
#include "util/u_screen.h"
@@ -315,9 +316,9 @@ static int r300_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_PREFERRED_IR:
- return PIPE_SHADER_IR_TGSI;
+ return (r300screen->debug & DBG_USE_TGSI) ? PIPE_SHADER_IR_TGSI : PIPE_SHADER_IR_NIR;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
- return 1 << PIPE_SHADER_IR_TGSI;
+ return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_TGSI);
}
break;
case PIPE_SHADER_VERTEX:
@@ -391,9 +392,9 @@ static int r300_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_PREFERRED_IR:
- return PIPE_SHADER_IR_TGSI;
+ return (r300screen->debug & DBG_USE_TGSI) ? PIPE_SHADER_IR_TGSI : PIPE_SHADER_IR_NIR;
case PIPE_SHADER_CAP_SUPPORTED_IRS:
- return 1 << PIPE_SHADER_IR_TGSI;
+ return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_TGSI);
}
break;
default:
@@ -472,6 +473,127 @@ static int r300_get_video_param(struct pipe_screen *screen,
}
}
+static const nir_shader_compiler_options r500_vs_compiler_options = {
+ .fuse_ffma32 = true,
+ .fuse_ffma64 = true,
+ .lower_bitops = true,
+ .lower_extract_byte = true,
+ .lower_extract_word = true,
+ .lower_fdiv = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
+ .lower_fdph = true,
+ .lower_flrp32 = true,
+ .lower_flrp64 = true,
+ .lower_fmod = true,
+ .lower_rotate = true,
+ .lower_uniforms_to_ubo = true,
+ .lower_vector_cmp = true,
+
+ /* Have HW loops support and 1024 max instr count, but don't unroll *too*
+ * hard.
+ */
+ .max_unroll_iterations = 32,
+
+ .use_interpolated_input_intrinsics = true,
+};
+
+static const nir_shader_compiler_options r500_fs_compiler_options = {
+ .fuse_ffma32 = true,
+ .fuse_ffma64 = true,
+ .lower_bitops = true,
+ .lower_extract_byte = true,
+ .lower_extract_word = true,
+ .lower_fdiv = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
+ .lower_fdph = true,
+ .lower_fpow = true, /* POW is only in the VS */
+ .lower_flrp32 = true,
+ .lower_flrp64 = true,
+ .lower_fmod = true,
+ .lower_rotate = true,
+ .lower_uniforms_to_ubo = true,
+ .lower_vector_cmp = true,
+
+ /* Have HW loops support and 512 max instr count, but don't unroll *too*
+ * hard.
+ */
+ .max_unroll_iterations = 32,
+
+ .use_interpolated_input_intrinsics = true,
+};
+
+static const nir_shader_compiler_options r300_vs_compiler_options = {
+ .fuse_ffma32 = true,
+ .fuse_ffma64 = true,
+ .lower_bitops = true,
+ .lower_extract_byte = true,
+ .lower_extract_word = true,
+ .lower_fdiv = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
+ .lower_fdph = true,
+ .lower_fsat = true, /* No fsat in pre-r500 VS */
+ .lower_flrp32 = true,
+ .lower_flrp64 = true,
+ .lower_fmod = true,
+ .lower_rotate = true,
+ .lower_uniforms_to_ubo = true,
+ .lower_vector_cmp = true,
+
+ /* Note: has HW loops support, but only 256 ALU instructions. */
+ .max_unroll_iterations = 32,
+
+ .use_interpolated_input_intrinsics = true,
+};
+
+static const nir_shader_compiler_options r300_fs_compiler_options = {
+ .fuse_ffma32 = true,
+ .fuse_ffma64 = true,
+ .lower_bitops = true,
+ .lower_extract_byte = true,
+ .lower_extract_word = true,
+ .lower_fdiv = true,
+ .lower_fpow = true, /* POW is only in the VS */
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
+ .lower_fdph = true,
+ .lower_flrp32 = true,
+ .lower_flrp64 = true,
+ .lower_fmod = true,
+ .lower_rotate = true,
+ .lower_uniforms_to_ubo = true,
+ .lower_vector_cmp = true,
+
+ /* No HW loops support, so set it equal to ALU instr max */
+ .max_unroll_iterations = 64,
+
+ .use_interpolated_input_intrinsics = true,
+};
+
+static const void *
+r300_get_compiler_options(struct pipe_screen *pscreen,
+ enum pipe_shader_ir ir,
+ enum pipe_shader_type shader)
+{
+ struct r300_screen* r300screen = r300_screen(pscreen);
+
+ assert(ir == PIPE_SHADER_IR_NIR);
+
+ if (r300screen->caps.is_r500) {
+ if (shader == PIPE_SHADER_VERTEX)
+ return &r500_vs_compiler_options;
+ else
+ return &r500_fs_compiler_options;
+ } else {
+ if (shader == PIPE_SHADER_VERTEX)
+ return &r300_vs_compiler_options;
+ else
+ return &r300_fs_compiler_options;
+ }
+}
+
/**
* Whether the format matches:
* PIPE_FORMAT_?10?10?10?2_UNORM
@@ -734,6 +856,7 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws,
r300screen->screen.destroy = r300_destroy_screen;
r300screen->screen.get_name = r300_get_name;
r300screen->screen.get_vendor = r300_get_vendor;
+ r300screen->screen.get_compiler_options = r300_get_compiler_options;
r300screen->screen.get_device_vendor = r300_get_device_vendor;
r300screen->screen.get_disk_shader_cache = r300_get_disk_shader_cache;
r300screen->screen.get_param = r300_get_param;
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index 09332b3adcc..1fe9e861308 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -103,8 +103,8 @@ radeon_winsys(struct pipe_screen *screen) {
#define DBG_NO_ZMASK (1 << 21)
#define DBG_NO_HIZ (1 << 22)
#define DBG_NO_CMASK (1 << 23)
+#define DBG_USE_TGSI (1 << 24)
/*@}*/
-
static inline boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags)
{
return (screen->debug & flags) ? TRUE : FALSE;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 39c442802af..4c9df7588bb 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -46,6 +46,7 @@
#include "r300_fs.h"
#include "r300_texture.h"
#include "r300_vs.h"
+#include "nir/nir_to_tgsi.h"
/* r300_state: Functions used to initialize state context by translating
* Gallium state objects into semi-native r300 state objects. */
@@ -1041,7 +1042,14 @@ static void* r300_create_fs_state(struct pipe_context* pipe,
/* Copy state directly into shader. */
fs->state = *shader;
- fs->state.tokens = tgsi_dup_tokens(shader->tokens);
+
+ if (fs->state.type == PIPE_SHADER_IR_NIR) {
+ fs->state.tokens = nir_to_tgsi(shader->ir.nir, pipe->screen);
+ } else {
+ assert(fs->state.type == PIPE_SHADER_IR_TGSI);
+ /* we need to keep a local copy of the tokens */
+ fs->state.tokens = tgsi_dup_tokens(fs->state.tokens);
+ }
/* Precompile the fragment shader at creation time to avoid jank at runtime.
* In most cases we won't have anything in the key at draw time.
@@ -1925,7 +1933,14 @@ static void* r300_create_vs_state(struct pipe_context* pipe,
/* Copy state directly into shader. */
vs->state = *shader;
- vs->state.tokens = tgsi_dup_tokens(shader->tokens);
+
+ if (vs->state.type == PIPE_SHADER_IR_NIR) {
+ vs->state.tokens = nir_to_tgsi(shader->ir.nir, pipe->screen);
+ } else {
+ assert(vs->state.type == PIPE_SHADER_IR_TGSI);
+ /* we need to keep a local copy of the tokens */
+ vs->state.tokens = tgsi_dup_tokens(vs->state.tokens);
+ }
if (r300->screen->caps.has_tcl) {
r300_init_vs_outputs(r300, vs);