diff options
Diffstat (limited to 'src/gallium/drivers/lima')
64 files changed, 3127 insertions, 1457 deletions
diff --git a/src/gallium/drivers/lima/ci/deqp-lima-fails.txt b/src/gallium/drivers/lima/ci/deqp-lima-fails.txt deleted file mode 100644 index 680b8f247fd..00000000000 --- a/src/gallium/drivers/lima/ci/deqp-lima-fails.txt +++ /dev/null @@ -1,59 +0,0 @@ -dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_center,Fail -dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_corner,Fail -dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_neg_x_neg_y_pos_z_and_pos_x_pos_y_neg_z,Fail -dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_neg_x_pos_y_pos_z_and_pos_x_neg_y_neg_z,Fail -dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_pos_x_neg_y_pos_z_and_neg_x_pos_y_neg_z,Fail -dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_pos_x_pos_y_pos_z_and_neg_x_neg_y_neg_z,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.0,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.1,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.10,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.11,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.12,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.13,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.14,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.15,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.16,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.17,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.18,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.19,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.2,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.20,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.21,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.22,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.23,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.24,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.3,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.4,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.5,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.6,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.7,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.8,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.random.9,Fail -dEQP-GLES2.functional.fragment_ops.depth_stencil.write_mask.stencil,Fail -dEQP-GLES2.functional.negative_api.shader.uniform_matrixfv_invalid_transpose,Fail -dEQP-GLES2.functional.negative_api.texture.generatemipmap_zero_level_array_compressed,Fail -dEQP-GLES2.functional.shaders.builtin_variable.frontfacing,Fail -dEQP-GLES2.functional.shaders.indexing.matrix_subscript.mat4_dynamic_loop_write_dynamic_loop_read_vertex,Fail -dEQP-GLES2.functional.shaders.indexing.matrix_subscript.mat4_dynamic_loop_write_dynamic_read_vertex,Fail -dEQP-GLES2.functional.shaders.indexing.matrix_subscript.mat4_dynamic_loop_write_static_loop_read_vertex,Fail -dEQP-GLES2.functional.shaders.indexing.matrix_subscript.mat4_dynamic_loop_write_static_read_vertex,Fail -dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_dynamic_loop_write_dynamic_loop_read_vertex,Fail -dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_dynamic_loop_write_dynamic_read_vertex,Fail -dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_dynamic_loop_write_static_loop_read_vertex,Fail -dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_dynamic_loop_write_static_read_vertex,Fail -dEQP-GLES2.functional.shaders.indexing.varying_array.vec4_dynamic_loop_write_dynamic_loop_read,Fail -dEQP-GLES2.functional.shaders.indexing.varying_array.vec4_dynamic_loop_write_dynamic_read,Fail -dEQP-GLES2.functional.shaders.indexing.varying_array.vec4_dynamic_loop_write_static_loop_read,Fail -dEQP-GLES2.functional.shaders.indexing.varying_array.vec4_dynamic_loop_write_static_read,Fail -dEQP-GLES2.functional.shaders.texture_functions.fragment.texture2d_bias,Fail -dEQP-GLES2.functional.shaders.texture_functions.fragment.texture2dproj_vec4_bias,Fail -dEQP-GLES2.functional.texture.filtering.2d.linear_mipmap_linear_linear_clamp_rgba8888,Fail -dEQP-GLES2.functional.texture.filtering.2d.linear_mipmap_linear_linear_mirror_rgba8888,Fail -dEQP-GLES2.functional.texture.filtering.2d.linear_mipmap_linear_nearest_clamp_rgba8888,Fail -dEQP-GLES2.functional.texture.filtering.2d.linear_mipmap_linear_nearest_mirror_rgba8888,Fail -dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_linear,Fail -dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest,Fail -dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_linear,Fail -dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_nearest,Fail -dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_linear,Fail -dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_nearest,Fail diff --git a/src/gallium/drivers/lima/ci/deqp-lima-mali450-deqp.toml b/src/gallium/drivers/lima/ci/deqp-lima-mali450-deqp.toml new file mode 100644 index 00000000000..588edb95ab1 --- /dev/null +++ b/src/gallium/drivers/lima/ci/deqp-lima-mali450-deqp.toml @@ -0,0 +1,35 @@ +[[deqp]] +deqp = "/deqp/modules/gles2/deqp-gles2" +caselists = ["/deqp/mustpass/gles2-main.txt"] +tests_per_group = 250 +deqp_args = [ + "--deqp-surface-width=256", "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden", + "--deqp-gl-config-name=rgba8888d24s8ms0", +] +version_check = "GL ES 2.0.*git" +renderer_check = "Mali450" + +# wayland +[[deqp]] +deqp = "/deqp/modules/egl/deqp-egl-wayland" +caselists = ["/deqp/mustpass/egl-main.txt"] +tests_per_group = 250 +deqp_args = [ + "--deqp-surface-width=256", "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden", + "--deqp-gl-config-name=rgba8888d24s8ms0", +] +prefix = "wayland-" + +# x11 +[[deqp]] +deqp = "/deqp/modules/egl/deqp-egl-x11" +caselists = ["/deqp/mustpass/egl-main.txt"] +tests_per_group = 250 +deqp_args = [ + "--deqp-surface-width=256", "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden", + "--deqp-gl-config-name=rgba8888d24s8ms0", +] +prefix = "x11-" diff --git a/src/gallium/drivers/lima/ci/deqp-lima-mali450-piglit.toml b/src/gallium/drivers/lima/ci/deqp-lima-mali450-piglit.toml new file mode 100644 index 00000000000..1cc878998b0 --- /dev/null +++ b/src/gallium/drivers/lima/ci/deqp-lima-mali450-piglit.toml @@ -0,0 +1,4 @@ +[[piglit]] +piglit_folder = "/piglit" +profile = "gpu" +process_isolation = true diff --git a/src/gallium/drivers/lima/ci/deqp-lima-skips.txt b/src/gallium/drivers/lima/ci/deqp-lima-skips.txt deleted file mode 100644 index 824ea20af4c..00000000000 --- a/src/gallium/drivers/lima/ci/deqp-lima-skips.txt +++ /dev/null @@ -1,3 +0,0 @@ -# Note: skips lists for CI are just a list of lines that, when -# non-zero-length and not starting with '#', will regex match to -# delete lines from the test list. Be careful. diff --git a/src/gallium/drivers/lima/ci/gitlab-ci.yml b/src/gallium/drivers/lima/ci/gitlab-ci.yml index ee42f3c6397..680ed8c1f5a 100644 --- a/src/gallium/drivers/lima/ci/gitlab-ci.yml +++ b/src/gallium/drivers/lima/ci/gitlab-ci.yml @@ -1,15 +1,43 @@ -lima-mali450-test:arm64: +.lima-rules: + stage: arm + rules: + - !reference [.test, rules] + - !reference [.lima-farm-rules, rules] + - !reference [.gl-rules, rules] + - changes: + - src/gallium/drivers/lima/**/* + - src/gallium/winsys/lima/**/* + - src/lima/**/* + when: on_success + +# 4 devices (2023-12-16) +.lava-meson-gxl-s805x-libretech-ac:arm64: + variables: + DEVICE_TYPE: meson-gxl-s805x-libretech-ac + DTB: meson-gxl-s805x-libretech-ac + FDO_CI_CONCURRENT: 4 + GPU_VERSION: lima + RUNNER_TAG: mesa-ci-x86-64-lava-lima + +lima-mali450-deqp:arm64: extends: - - .lava-test:arm64 + - .lava-test-deqp:arm64 - .lima-rules + - .lava-meson-gxl-s805x-libretech-ac:arm64 variables: - DEVICE_TYPE: meson-gxl-s805x-libretech-ac - DTB: ${DEVICE_TYPE} FDO_HTTP_CACHE_URI: '' - GPU_VERSION: lima - DEQP_PARALLEL: 4 - DEQP_EXPECTED_RENDERER: Mali450 + HWCI_START_WESTON: 1 + DEQP_SUITE: lima-mali450-deqp VISIBILITY_GROUP: "mesa-ci" - tags: - - mesa-ci-x86-64-lava-lima +lima-mali450-piglit:arm64: + extends: + - .lava-test-deqp:arm64 + - .lima-rules + - .lava-meson-gxl-s805x-libretech-ac:arm64 + variables: + DEQP_SUITE: lima-mali450-piglit + FDO_HTTP_CACHE_URI: '' + PIGLIT_PLATFORM: gbm + VISIBILITY_GROUP: "mesa-ci" + parallel: 2 diff --git a/src/gallium/drivers/lima/ci/lima-fails.txt b/src/gallium/drivers/lima/ci/lima-fails.txt new file mode 100644 index 00000000000..ca73d800d81 --- /dev/null +++ b/src/gallium/drivers/lima/ci/lima-fails.txt @@ -0,0 +1,635 @@ +dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_neg_x_neg_y_pos_z_and_pos_x_pos_y_neg_z,Fail +dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_neg_x_pos_y_pos_z_and_pos_x_neg_y_neg_z,Fail +dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_pos_x_neg_y_pos_z_and_neg_x_pos_y_neg_z,Fail +dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_pos_x_pos_y_pos_z_and_neg_x_neg_y_neg_z,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.0,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.1,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.10,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.11,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.12,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.13,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.14,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.15,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.16,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.17,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.18,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.19,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.2,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.20,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.21,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.22,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.23,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.24,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.3,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.4,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.5,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.6,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.7,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.8,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.random.9,Fail +dEQP-GLES2.functional.fragment_ops.depth_stencil.write_mask.stencil,Fail +dEQP-GLES2.functional.shaders.texture_functions.fragment.texture2d_bias,Fail +dEQP-GLES2.functional.shaders.texture_functions.fragment.texture2dproj_vec3_bias,Fail +dEQP-GLES2.functional.shaders.texture_functions.fragment.texture2dproj_vec4_bias,Fail +dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_dynamic_read_vertex,Fail +dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_dynamic_read_vertex,Fail +dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_dynamic_read_vertex,Fail +dEQP-GLES2.functional.texture.filtering.2d.linear_mipmap_linear_linear_clamp_rgba8888,Fail +dEQP-GLES2.functional.texture.filtering.2d.linear_mipmap_linear_linear_mirror_rgba8888,Fail +dEQP-GLES2.functional.texture.filtering.2d.linear_mipmap_linear_nearest_clamp_rgba8888,Fail +dEQP-GLES2.functional.texture.filtering.2d.linear_mipmap_linear_nearest_mirror_rgba8888,Fail +dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_linear,Fail +dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest,Fail +dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_linear,Fail +dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_nearest,Fail +dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_linear,Fail +dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_nearest,Fail + +wayland-dEQP-EGL.functional.create_context.no_config,Fail +wayland-dEQP-EGL.functional.image.modify.renderbuffer_depth16_renderbuffer_clear_depth,Fail +wayland-dEQP-EGL.functional.render.multi_context.gles2.rgb888_window,Fail +wayland-dEQP-EGL.functional.render.multi_thread.gles2.rgb888_window,Fail +wayland-dEQP-EGL.functional.wide_color.window_fp16_default_colorspace,Fail + +x11-dEQP-EGL.functional.create_context.no_config,Fail +x11-dEQP-EGL.functional.image.modify.renderbuffer_depth16_renderbuffer_clear_depth,Fail +x11-dEQP-EGL.functional.render.multi_context.gles2.rgb888_window,Fail +x11-dEQP-EGL.functional.render.multi_context.gles2.rgba8888_pbuffer,Fail +x11-dEQP-EGL.functional.render.multi_thread.gles2.rgb888_window,Fail +x11-dEQP-EGL.functional.render.multi_thread.gles2.rgba8888_pbuffer,Fail +x11-dEQP-EGL.functional.wide_color.pbuffer_8888_colorspace_srgb,Fail +x11-dEQP-EGL.functional.wide_color.window_8888_colorspace_srgb,Fail + +shaders@glsl-arb-fragment-coord-conventions,Fail +shaders@glsl-bug-110796,Fail +shaders@glsl-fs-flat-color,Fail +shaders@glsl-predication-on-large-array,Fail +shaders@glsl-routing,Fail +spec@arb_color_buffer_float@gl_rgba8-render,Fail +spec@arb_color_buffer_float@gl_rgba8-render-sanity,Fail +spec@arb_depth_texture@fbo-generatemipmap-formats,Fail +spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT24,Fail +spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT24 NPOT,Fail +spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT32,Fail +spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT32 NPOT,Fail +spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT,Fail +spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT NPOT,Fail +spec@arb_depth_texture@texdepth,Fail +spec@arb_draw_elements_base_vertex@arb_draw_elements_base_vertex-negative-index,Fail +spec@arb_draw_elements_base_vertex@arb_draw_elements_base_vertex-negative-index-user_varrays,Fail +spec@arb_es2_compatibility@fbo-blending-formats,Fail +spec@arb_es2_compatibility@fbo-blending-formats@GL_RGB565,Fail +spec@arb_fragment_coord_conventions@fp-arb-fragment-coord-conventions-integer,Fail +spec@arb_fragment_program@fdo38145,Fail +spec@arb_fragment_program@fp-abs-01,Fail +spec@arb_fragment_program_shadow@masked,Fail +spec@arb_fragment_program_shadow@tex-shadow1d,Fail +spec@arb_fragment_program_shadow@tex-shadow2d,Fail +spec@arb_fragment_program_shadow@tex-shadow2drect,Fail +spec@arb_fragment_program_shadow@txp-shadow1d,Fail +spec@arb_fragment_program_shadow@txp-shadow2d,Fail +spec@arb_fragment_program_shadow@txp-shadow2drect,Fail +spec@arb_framebuffer_srgb@arb_framebuffer_srgb-srgb_conformance,Fail +spec@arb_framebuffer_srgb@framebuffer-srgb,Fail +spec@arb_occlusion_query@occlusion_query_conform,Fail +spec@arb_occlusion_query@occlusion_query_conform@GetObjivAval_multi2,Fail +spec@arb_pixel_buffer_object@cubemap npot pbo,Fail +spec@arb_pixel_buffer_object@cubemap pbo,Fail +spec@arb_pixel_buffer_object@fbo-pbo-readpixels-small,Fail +spec@arb_pixel_buffer_object@pbo-getteximage,Fail +spec@arb_pixel_buffer_object@texsubimage-unpack pbo,Fail +spec@arb_point_sprite@arb_point_sprite-checkerboard,Fail +spec@arb_point_sprite@arb_point_sprite-mipmap,Fail +spec@arb_provoking_vertex@arb-provoking-vertex-clipped-geometry-flatshading,Fail +spec@arb_provoking_vertex@arb-provoking-vertex-render,Fail +spec@arb_sampler_objects@gl_ext_texture_srgb_decode,Fail +spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail +spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgrad,Fail +spec@arb_shader_texture_lod@execution@glsl-fs-shadow2dgradarb-01,Fail +spec@arb_shader_texture_lod@execution@glsl-fs-shadow2dgradarb-02,Fail +spec@arb_shader_texture_lod@execution@glsl-fs-shadow2dgradarb-03,Fail +spec@arb_shader_texture_lod@execution@glsl-fs-shadow2dgradarb-04,Fail +spec@arb_shader_texture_lod@execution@glsl-fs-shadow2dgradarb-05,Fail +spec@arb_shader_texture_lod@execution@glsl-fs-shadow2dgradarb-07,Fail +spec@arb_shader_texture_lod@execution@glsl-fs-shadow2dgradarb-08,Fail +spec@arb_shader_texture_lod@execution@glsl-fs-shadow2dgradarb-cumulative,Fail +spec@arb_texture_cube_map@copyteximage cube,Fail +spec@arb_texture_cube_map@copyteximage cube samples=2,Fail +spec@arb_texture_cube_map@copyteximage cube samples=4,Fail +spec@arb_texture_cube_map@cubemap,Fail +spec@arb_texture_cube_map@cubemap npot,Fail +spec@arb_texture_cube_map@cubemap-shader,Fail +spec@arb_texture_rectangle@1-1-linear-texture,Fail +spec@arb_texture_rectangle@copyteximage rect samples=2,Fail +spec@arb_texture_rectangle@copyteximage rect samples=4,Fail +spec@arb_texture_rectangle@glsl-fs-shadow2drect-01,Fail +spec@arb_texture_rectangle@glsl-fs-shadow2drect-02,Fail +spec@arb_texture_rectangle@glsl-fs-shadow2drect-03,Fail +spec@arb_texture_rectangle@glsl-fs-shadow2drect-04,Fail +spec@arb_texture_rectangle@glsl-fs-shadow2drect-05,Fail +spec@arb_texture_rectangle@glsl-fs-shadow2drect-07,Fail +spec@arb_texture_rectangle@glsl-fs-shadow2drect-08,Fail +spec@arb_texture_rectangle@glsl-fs-shadow2drect,Fail +spec@arb_texture_rectangle@glsl-fs-shadow2drectproj,Fail +spec@arb_texture_rg@execution@fs-shadow2d-red-01,Fail +spec@arb_texture_rg@execution@fs-shadow2d-red-02,Fail +spec@arb_texture_rg@execution@fs-shadow2d-red-03,Fail +spec@arb_texture_rg@fbo-blending-formats,Fail +spec@arb_texture_rg@fbo-blending-formats@GL_R16,Fail +spec@arb_texture_rg@fbo-blending-formats@GL_R8,Fail +spec@arb_texture_rg@fbo-blending-formats@GL_RG16,Fail +spec@arb_texture_rg@fbo-blending-formats@GL_RG8,Fail +spec@arb_texture_rg@fbo-blending-formats@GL_RG,Fail +spec@arb_texture_rg@texwrap formats bordercolor,Fail +spec@arb_texture_rg@texwrap formats bordercolor@GL_RG8- border color only,Fail +spec@arb_texture_rg@texwrap formats bordercolor-swizzled,Fail +spec@arb_texture_rg@texwrap formats bordercolor-swizzled@GL_RG8- swizzled- border color only,Fail +spec@arb_texture_storage@texture-storage@cube array texture,Fail +spec@arb_vertex_program@arl,Fail +spec@arb_vertex_program@big-param,Fail +spec@arb_vertex_program@clip-plane-transformation arb,Fail +spec@arb_vertex_program@instructions@arl,Fail +spec@arb_vertex_program@vp-address-01,Fail +spec@arb_vertex_program@vp-arl-constant-array,Fail +spec@arb_vertex_program@vp-arl-constant-array-huge,Fail +spec@arb_vertex_program@vp-arl-constant-array-huge-offset,Fail +spec@arb_vertex_program@vp-arl-constant-array-huge-offset-neg,Fail +spec@arb_vertex_program@vp-arl-constant-array-huge-relative-offset,Fail +spec@arb_vertex_program@vp-arl-constant-array-huge-varying,Fail +spec@arb_vertex_program@vp-arl-constant-array-varying,Fail +spec@arb_vertex_program@vp-arl-env-array,Fail +spec@arb_vertex_program@vp-arl-local-array,Fail +spec@arb_vertex_program@vp-arl-neg-array-2,Fail +spec@arb_vertex_program@vp-arl-neg-array,Fail +spec@ati_fragment_shader@ati_fragment_shader-render-default,Fail +spec@ati_fragment_shader@ati_fragment_shader-render-notexture,Fail +spec@ati_fragment_shader@ati_fragment_shader-render-sources,Fail +spec@egl 1.4@eglterminate then unbind context,Fail +spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail +spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_rgba,Fail +spec@egl_khr_surfaceless_context@viewport,Fail +spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail +spec@ext_framebuffer_multisample@dlist,Fail +spec@ext_framebuffer_multisample@renderbuffer-samples,Fail +spec@ext_framebuffer_multisample@samples,Fail +spec@ext_framebuffer_object@ext_framebuffer_object-error-handling,Fail +spec@ext_framebuffer_object@fbo-blending-formats@3,Fail +spec@ext_framebuffer_object@fbo-blending-formats,Fail +spec@ext_framebuffer_object@fbo-blending-formats@GL_R3_G3_B2,Fail +spec@ext_framebuffer_object@fbo-blending-formats@GL_RGB10,Fail +spec@ext_framebuffer_object@fbo-blending-formats@GL_RGB12,Fail +spec@ext_framebuffer_object@fbo-blending-formats@GL_RGB16,Fail +spec@ext_framebuffer_object@fbo-blending-formats@GL_RGB5,Fail +spec@ext_framebuffer_object@fbo-blending-formats@GL_RGB8,Fail +spec@ext_framebuffer_object@fbo-blending-formats@GL_RGB,Fail +spec@ext_framebuffer_object@fbo-cubemap,Fail +spec@ext_framebuffer_object@fbo-depth-sample-compare,Fail +spec@ext_framebuffer_object@fbo-maxsize,Fail +spec@ext_framebuffer_object@fbo-scissor-bitmap,Fail +spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index16-blit,Fail +spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index16-copypixels,Fail +spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index1-blit,Fail +spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index1-copypixels,Fail +spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index4-blit,Fail +spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index4-copypixels,Fail +spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index8-blit,Fail +spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index8-copypixels,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y212,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y410,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y412,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y416,Fail +spec@ext_packed_depth_stencil@depth_stencil texture,Fail +spec@ext_packed_depth_stencil@fbo-blit-d24s8,Fail +spec@ext_packed_depth_stencil@fbo-stencil-gl_depth24_stencil8-blit,Fail +spec@ext_packed_depth_stencil@fbo-stencil-gl_depth24_stencil8-copypixels,Fail +spec@ext_provoking_vertex@provoking-vertex,Fail +spec@ext_texture_format_bgra8888@api-errors,Fail +spec@ext_texture_lod_bias@lodbias,Fail +spec@ext_texture_srgb@tex-srgb,Fail +spec@ext_texture_srgb@texwrap formats bordercolor,Fail +spec@ext_texture_srgb@texwrap formats bordercolor@GL_SRGB8_ALPHA8- border color only,Fail +spec@ext_texture_srgb@texwrap formats bordercolor@GL_SRGB8- border color only,Fail +spec@ext_texture_srgb@texwrap formats bordercolor-swizzled,Fail +spec@ext_texture_srgb@texwrap formats bordercolor-swizzled@GL_SRGB8_ALPHA8- swizzled- border color only,Fail +spec@ext_texture_srgb@texwrap formats bordercolor-swizzled@GL_SRGB8- swizzled- border color only,Fail +spec@ext_texture_srgb@texwrap formats,Fail +spec@ext_texture_srgb@texwrap formats@GL_SRGB8_ALPHA8,Fail +spec@ext_texture_srgb@texwrap formats@GL_SRGB8_ALPHA8- NPOT,Fail +spec@ext_texture_srgb@texwrap formats@GL_SRGB8_ALPHA8- swizzled,Fail +spec@ext_texture_srgb@texwrap formats@GL_SRGB8,Fail +spec@ext_texture_srgb@texwrap formats@GL_SRGB8- NPOT,Fail +spec@ext_texture_srgb@texwrap formats@GL_SRGB8- swizzled,Fail +spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled,Fail +spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled@GL_COMPRESSED_SRGB_ALPHA- swizzled- border color only,Fail +spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled@GL_COMPRESSED_SRGB- swizzled- border color only,Fail +spec@ext_texture_srgb@texwrap formats-s3tc bordercolor,Fail +spec@ext_texture_srgb@texwrap formats-s3tc bordercolor@GL_COMPRESSED_SRGB_ALPHA- border color only,Fail +spec@ext_texture_srgb@texwrap formats-s3tc bordercolor@GL_COMPRESSED_SRGB- border color only,Fail +spec@ext_texture_srgb@texwrap formats-s3tc,Fail +spec@ext_texture_srgb@texwrap formats-s3tc@GL_COMPRESSED_SRGB_ALPHA- NPOT,Fail +spec@ext_texture_srgb@texwrap formats-s3tc@GL_COMPRESSED_SRGB_ALPHA- swizzled,Fail +spec@ext_texture_srgb@texwrap formats-s3tc@GL_COMPRESSED_SRGB_ALPHA,Fail +spec@ext_texture_srgb@texwrap formats-s3tc@GL_COMPRESSED_SRGB- NPOT,Fail +spec@ext_texture_srgb@texwrap formats-s3tc@GL_COMPRESSED_SRGB- swizzled,Fail +spec@ext_texture_srgb@texwrap formats-s3tc@GL_COMPRESSED_SRGB,Fail +spec@glsl-1.10@execution@built-in-functions@fs-atan-float-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-atan-vec2-vec2,Fail +spec@glsl-1.10@execution@built-in-functions@fs-atan-vec3-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@fs-atan-vec4-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@fs-degrees-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-degrees-vec2,Fail +spec@glsl-1.10@execution@built-in-functions@fs-degrees-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@fs-degrees-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@fs-dot-float-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-dot-vec4-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@fs-exp-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-exp-vec2,Fail +spec@glsl-1.10@execution@built-in-functions@fs-exp-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@fs-exp-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@fs-fract-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-inversesqrt-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-length-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@fs-log2-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-log-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-mix-float-float-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-mod-float-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-assign-div-float-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-assign-div-int-int,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-assign-div-ivec2-ivec2,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-assign-div-ivec3-ivec3,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-assign-div-ivec4-int,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-assign-div-ivec4-ivec4,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-assign-div-mat3-mat3,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-assign-div-mat4-mat4,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-assign-div-vec3-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-assign-mult-float-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-div-float-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-div-float-mat2,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-div-float-mat3,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-div-float-mat4,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-div-float-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-div-int-int,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-div-int-ivec2,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-div-int-ivec4,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-div-ivec2-ivec2,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-div-ivec3-ivec3,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-div-ivec4-int,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-div-ivec4-ivec4,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-div-mat3-mat3,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-div-mat4-mat4,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-div-vec3-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@fs-op-mult-float-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-pow-float-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-radians-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-sin-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-smoothstep-float-float-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-tan-float,Fail +spec@glsl-1.10@execution@built-in-functions@vs-op-assign-div-ivec2-ivec2,Fail +spec@glsl-1.10@execution@built-in-functions@vs-op-assign-div-ivec3-ivec3,Fail +spec@glsl-1.10@execution@built-in-functions@vs-op-assign-div-ivec4-ivec4,Fail +spec@glsl-1.10@execution@built-in-functions@vs-op-div-ivec2-ivec2,Fail +spec@glsl-1.10@execution@built-in-functions@vs-op-div-ivec3-ivec3,Fail +spec@glsl-1.10@execution@built-in-functions@vs-op-div-ivec4-ivec4,Fail +spec@glsl-1.10@execution@clipping@clip-plane-transformation clipvert_pos,Fail +spec@glsl-1.10@execution@clipping@clip-plane-transformation fixed,Fail +spec@glsl-1.10@execution@clipping@clip-plane-transformation pos_clipvert,Fail +spec@glsl-1.10@execution@derivatives@glsl-derivs-abs-sign,Fail +spec@glsl-1.10@execution@derivatives@glsl-derivs-swizzle,Fail +spec@glsl-1.10@execution@glsl-vs-large-uniform-array,Fail +spec@glsl-1.10@execution@glsl-vs-uniform-array-4,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backcolor-flat-fixed,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backcolor-flat-none,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backcolor-flat-vertex,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backcolor-smooth-fixed,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backcolor-smooth-none,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backcolor-smooth-vertex,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backsecondarycolor-flat-fixed,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backsecondarycolor-flat-none,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backsecondarycolor-flat-vertex,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backsecondarycolor-smooth-fixed,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backsecondarycolor-smooth-none,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backsecondarycolor-smooth-vertex,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_frontcolor-flat-fixed,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_frontcolor-flat-none,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_frontcolor-flat-vertex,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_frontcolor-smooth-vertex,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_frontsecondarycolor-flat-fixed,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_frontsecondarycolor-flat-none,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_frontsecondarycolor-flat-vertex,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-gl_frontsecondarycolor-smooth-vertex,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-other-flat-vertex,Fail +spec@glsl-1.10@execution@interpolation@interpolation-none-other-smooth-vertex,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow1d-01,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow1d-02,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow1d-03,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow1d-04,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow1d-05,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow1d-07,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow1d-08,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow1d-bias,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow1d,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow1dproj-bias,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow1dproj,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-01,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-02,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-03,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-04,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-05,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-07,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-08,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-bias,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-clamp-z,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow2dproj-bias,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-shadow2dproj,Fail +spec@glsl-1.10@execution@temp-array-indexing@glsl-vs-giant-temp-array,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat2-col-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat2-col-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat2-index-col-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat2-index-col-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat2-index-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat2-index-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat2-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat2-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat3-col-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat3-col-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat3-index-col-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat3-index-col-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat3-index-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat3-index-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat3-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat3-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat4-col-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat4-col-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat4-index-col-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat4-index-col-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat4-index-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat4-index-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat4-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat4-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-mat2-col-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-mat2-col-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-mat2-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-mat2-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-mat3-col-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-mat3-col-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-mat3-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-mat3-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-mat4-col-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-mat4-col-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-mat4-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@fs-temp-mat4-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat2-col-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat2-col-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat2-index-col-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat2-index-col-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat2-index-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat2-index-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat2-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat2-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat3-col-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat3-col-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat3-index-col-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat3-index-col-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat3-index-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat3-index-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat3-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat3-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-mat2-col-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-mat2-col-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-mat2-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-mat2-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-mat3-col-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-mat3-col-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-mat3-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-mat3-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-mat4-col-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-mat4-col-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-mat4-row-wr,Fail +spec@glsl-1.10@execution@variable-indexing@vs-varying-mat4-wr,Fail +spec@glsl-1.10@glsl-fs-discard-only,Fail +spec@glsl-1.10@linker@glsl-link-varyings-3,Fail +spec@glsl-1.20@execution@built-in-functions@fs-op-assign-div-mat2x4-mat2x4,Fail +spec@glsl-1.20@execution@built-in-functions@fs-op-assign-div-mat3x2-mat3x2,Fail +spec@glsl-1.20@execution@built-in-functions@fs-op-assign-div-mat3x4-mat3x4,Fail +spec@glsl-1.20@execution@built-in-functions@fs-op-assign-div-mat4x3-mat4x3,Fail +spec@glsl-1.20@execution@built-in-functions@fs-op-div-float-mat2x4,Fail +spec@glsl-1.20@execution@built-in-functions@fs-op-div-float-mat3x2,Fail +spec@glsl-1.20@execution@built-in-functions@fs-op-div-float-mat3x4,Fail +spec@glsl-1.20@execution@built-in-functions@fs-op-div-float-mat4x3,Fail +spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat2x4-mat2x4,Fail +spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat3x2-mat3x2,Fail +spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat3x4-mat3x4,Fail +spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat4x3-mat4x3,Fail +spec@glsl-1.20@execution@built-in-functions@fs-op-mult-mat4x3-mat3x4,Fail +spec@glsl-1.20@execution@clipping@fixed-clip-enables,Fail +spec@glsl-1.20@execution@clipping@vs-clip-vertex-const-reject,Fail +spec@glsl-1.20@execution@clipping@vs-clip-vertex-different-from-position,Fail +spec@glsl-1.20@execution@clipping@vs-clip-vertex-enables,Fail +spec@glsl-1.20@execution@clipping@vs-clip-vertex-equal-to-position,Fail +spec@glsl-1.20@execution@clipping@vs-clip-vertex-homogeneity,Fail +spec@glsl-1.20@execution@clipping@vs-clip-vertex-primitives,Fail +spec@glsl-1.20@execution@fs-underflow-mul-compare-zero,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat2-col-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat2-col-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat2-index-col-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat2-index-col-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat2-index-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat2-index-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat2-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat2-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat3-col-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat3-col-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat3-index-col-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat3-index-col-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat3-index-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat3-index-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat3-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat3-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat4-col-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat4-col-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat4-index-col-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat4-index-col-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat4-index-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat4-index-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat4-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat4-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-mat2-col-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-mat2-col-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-mat2-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-mat2-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-mat3-col-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-mat3-col-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-mat3-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-mat3-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-mat4-col-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-mat4-col-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-mat4-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-mat4-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat2-col-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat2-col-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat2-index-col-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat2-index-col-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat2-index-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat2-index-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat2-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat2-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat3-col-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat3-col-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat3-index-col-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat3-index-col-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat3-index-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat3-index-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat3-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat3-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-mat2-col-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-mat2-col-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-mat2-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-mat2-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-mat3-col-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-mat3-col-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-mat3-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-mat3-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-mat4-col-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-mat4-col-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-mat4-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-varying-mat4-wr,Fail +spec@intel_performance_query@intel_performance_query-issue_2235,Fail +spec@khr_texture_compression_astc@basic-gles,Fail +spec@khr_texture_compression_astc@miptree-gles srgb,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail +spec@oes_point_sprite@arb_point_sprite-checkerboard_gles1,Fail +spec@!opengl 1.0@gl-1.0-dlist-bitmap,Fail +spec@!opengl 1.0@gl-1.0-dlist-materials,Fail +spec@!opengl 1.0@gl-1.0-dlist-shademodel,Fail +spec@!opengl 1.0@gl-1.0-drawbuffer-modes,Fail +spec@!opengl 1.0@gl-1.0-edgeflag-const,Fail +spec@!opengl 1.0@gl-1.0-edgeflag,Fail +spec@!opengl 1.0@gl-1.0-edgeflag-quads,Fail +spec@!opengl 1.0@gl-1.0-logicop@GL_AND,Fail +spec@!opengl 1.0@gl-1.0-logicop@GL_AND_INVERTED,Fail +spec@!opengl 1.0@gl-1.0-logicop@GL_AND_REVERSE,Fail +spec@!opengl 1.0@gl-1.0-logicop@GL_CLEAR,Fail +spec@!opengl 1.0@gl-1.0-logicop@GL_COPY_INVERTED,Fail +spec@!opengl 1.0@gl-1.0-logicop@GL_EQUIV,Fail +spec@!opengl 1.0@gl-1.0-logicop@GL_INVERT,Fail +spec@!opengl 1.0@gl-1.0-logicop@GL_NAND,Fail +spec@!opengl 1.0@gl-1.0-logicop@GL_NOOP,Fail +spec@!opengl 1.0@gl-1.0-logicop@GL_NOR,Fail +spec@!opengl 1.0@gl-1.0-logicop@GL_OR,Fail +spec@!opengl 1.0@gl-1.0-logicop@GL_OR_INVERTED,Fail +spec@!opengl 1.0@gl-1.0-logicop@GL_OR_REVERSE,Fail +spec@!opengl 1.0@gl-1.0-logicop@GL_SET,Fail +spec@!opengl 1.0@gl-1.0-logicop@GL_XOR,Fail +spec@!opengl 1.0@gl-1.0-no-op-paths,Fail +spec@!opengl 1.0@gl-1.0-ortho-pos,Fail +spec@!opengl 1.0@gl-1.0-rastercolor,Fail +spec@!opengl 1.0@gl-1.0-scissor-bitmap,Fail +spec@!opengl 1.0@gl-1.0-swapbuffers-behavior,Fail +spec@!opengl 1.0@gl-1.0-user-clip-all-planes,Fail +spec@!opengl 1.1@gl-1.1-xor-copypixels,Fail +spec@!opengl 1.1@gl-1.1-xor,Fail +spec@!opengl 1.1@line-flat-clip-color,Fail +spec@!opengl 1.1@linestipple@Factor 2x,Fail +spec@!opengl 1.1@linestipple@Factor 3x,Fail +spec@!opengl 1.1@linestipple,Fail +spec@!opengl 1.1@linestipple@Line loop,Fail +spec@!opengl 1.1@linestipple@Line strip,Fail +spec@!opengl 1.1@linestipple@Restarting lines within a single Begin-End block,Fail +spec@!opengl 1.1@polygon-mode,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 1: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 1: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on bottom edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on left edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on bottom edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on left edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 5: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 5: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset,Fail +spec@!opengl 1.1@teximage-scale-bias,Fail +spec@!opengl 1.1@texsubimage,Fail +spec@!opengl 1.1@texsubimage-unpack,Fail +spec@!opengl 1.1@user-clip,Fail +spec@!opengl 1.3@gl-1.3-texture-env,Fail +spec@!opengl 1.4@copy-pixels,Fail +spec@!opengl 1.4@gl-1.4-polygon-offset,Fail +spec@!opengl 1.5@depth-tex-compare,Fail +spec@!opengl 2.0@early-z,Fail +spec@!opengl 2.0@gl-2.0-edgeflag,Fail +spec@!opengl 2.0@gl-2.0-edgeflag-immediate,Fail +spec@!opengl 2.0@vertex-program-two-side back front2 back2,Fail +spec@!opengl 2.0@vertex-program-two-side back front2 back2@vs and fs,Fail +spec@!opengl 2.0@vertex-program-two-side back front2,Fail +spec@!opengl 2.0@vertex-program-two-side back front2@vs and fs,Fail +spec@!opengl 2.0@vertex-program-two-side enabled back2,Fail +spec@!opengl 2.0@vertex-program-two-side enabled back2@vs and fs,Fail +spec@!opengl 2.0@vertex-program-two-side enabled back back2,Fail +spec@!opengl 2.0@vertex-program-two-side enabled back back2@vs and fs,Fail +spec@!opengl 2.0@vertex-program-two-side enabled back,Fail +spec@!opengl 2.0@vertex-program-two-side enabled back front2 back2,Fail +spec@!opengl 2.0@vertex-program-two-side enabled back front2 back2@vs and fs,Fail +spec@!opengl 2.0@vertex-program-two-side enabled back front2,Fail +spec@!opengl 2.0@vertex-program-two-side enabled back front2@vs and fs,Fail +spec@!opengl 2.0@vertex-program-two-side enabled back@vs and fs,Fail +spec@!opengl 2.0@vertex-program-two-side enabled front back2,Fail +spec@!opengl 2.0@vertex-program-two-side enabled front back2@vs and fs,Fail +spec@!opengl 2.0@vertex-program-two-side enabled front back back2,Fail +spec@!opengl 2.0@vertex-program-two-side enabled front back back2@vs and fs,Fail +spec@!opengl 2.0@vertex-program-two-side enabled front front2 back2,Fail +spec@!opengl 2.0@vertex-program-two-side enabled front front2 back2@vs and fs,Fail +spec@!opengl 2.1@pbo,Fail +spec@!opengl 2.1@pbo@test_bitmap,Fail +spec@!opengl 2.1@pbo@test_polygon_stip,Fail +spec@!opengl 2.1@polygon-stipple-fs,Fail +spec@!opengl es 2.0@glsl-fs-pointcoord,Fail + +# see https://gitlab.freedesktop.org/mesa/piglit/-/merge_requests/730 +# and https://gitlab.freedesktop.org/mesa/mesa/-/issues/7208 +spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgb_s3tc_dxt1_ext,Fail +spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt1_ext,Fail +spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt3_ext,Fail +spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt5_ext,Fail +spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt1_ext,Fail +spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt3_ext,Fail +spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt5_ext,Fail +spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_s3tc_dxt1_ext,Fail + +# Expects that some format/internal_format combinations should be supported when they currently aren't. +spec@arb_clear_texture@arb_clear_texture-supported-formats,Fail + +# Precision issue when lowering GL_RGB16 and GL_RGBA16 +spec@arb_clear_texture@arb_clear_texture-sized-formats,Fail + +spec@!opengl 1.1@line-smooth-stipple,Fail + +# bookworm update +spec@ext_framebuffer_multisample@renderbufferstorage-samples,Fail + +# remove this after https://gitlab.freedesktop.org/mesa/piglit/-/merge_requests/843 +# is merged and piglit is updated +spec@glsl-1.10@execution@glsl-1.10-built-in-uniform-state,Fail + +x11-dEQP-EGL.functional.wide_color.pbuffer_888_colorspace_srgb,Fail +x11-dEQP-EGL.functional.wide_color.window_888_colorspace_srgb,Fail diff --git a/src/gallium/drivers/lima/ci/lima-flakes.txt b/src/gallium/drivers/lima/ci/lima-flakes.txt new file mode 100644 index 00000000000..403fc9c63cb --- /dev/null +++ b/src/gallium/drivers/lima/ci/lima-flakes.txt @@ -0,0 +1,4 @@ +# dEQP error: terminate called after throwing an instance of 'tcu::TestError' +# dEQP error: what(): Runtime check failed: '!m_requiresRestart' at teglGLES2SharingThreadedTests.cpp:2271 +x11-dEQP-EGL.functional.sharing.gles2.multithread.random.programs.link.19 +wayland-dEQP-EGL.functional.sharing.gles2.multithread.random.programs.link.19 diff --git a/src/gallium/drivers/lima/ci/lima-skips.txt b/src/gallium/drivers/lima/ci/lima-skips.txt new file mode 100644 index 00000000000..497abcb8ef0 --- /dev/null +++ b/src/gallium/drivers/lima/ci/lima-skips.txt @@ -0,0 +1,91 @@ +# Note: skips lists for CI are just a list of lines that, when +# non-zero-length and not starting with '#', will regex match to +# delete lines from the test list. Be careful. + +# deqp-egl skips +# slow +dEQP-EGL.functional.multicontext + +# piglit skips +gles3 +glsl-1.3 +glsl-1.4 +glsl-1.5 +glsl-3 +glsl-4 +glsl-es-3 +opengl 3 +opengl 4 +opengl es 3 +glx@ + +# dmesg-fail +spec@arb_draw_elements_base_vertex@arb_draw_elements_base_vertex-negative-index +spec@arb_draw_elements_base_vertex@arb_draw_elements_base_vertex-negative-index-user_varrays +spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count +spec@!opengl 1.4@triangle-rasterization-overdraw + +# oom +spec@!opengl 1.1@streaming-texture-leak +spec@!opengl 1.2@tex3d-maxsize + +# Allocates >1GB CPU memory and more GPU, and ooms. +ppgtt_memory_alignment + +# slow +shaders@glsl-predication-on-large-array +spec@glsl-1.10@execution@temp-array-indexing@glsl-fs-giant-temp-array + +# timeout +shaders@glsl-uniform-interstage-limits@subdivide 5 +shaders@glsl-uniform-interstage-limits@subdivide 5- statechanges +spec@arb_internalformat_query2@all internalformat_<x>_type pname checks + +# very large list of fails +spec@!opengl 1.1@clipflat + +# large lists of skips with "Failed to create waffle_context for OpenGL [34].x" errors +spec@amd_shader_trinary_minmax +spec@arb_bindless_texture +spec@arb_compute_shader +spec@arb_compute_variable_group_size +spec@arb_direct_state_access +spec@arb_draw_indirect +spec@arb_enhanced_layouts +spec@arb_es3_compatibility +spec@arb_explicit_uniform_location +spec@arb_geometry_shader4 +spec@arb_gl_spirv +spec@arb_gpu_shader5 +spec@arb_gpu_shader_fp64 +spec@arb_gpu_shader_int64 +spec@arb_pipeline_statistics_query +spec@arb_program_interface_query +spec@arb_sample_shading +spec@arb_shader_atomic_counters +spec@arb_shader_draw_parameters +spec@arb_shader_image_load_store +spec@arb_shader_precision +spec@arb_shader_storage_buffer_object +spec@arb_shader_texture_image_samples +spec@arb_sparse_buffer +spec@arb_stencil_texturing +spec@arb_tessellation_shader +spec@arb_texture_barrier +spec@arb_texture_buffer_object +spec@arb_texture_cube_map_array +spec@arb_texture_gather +spec@arb_texture_stencil8 +spec@arb_texture_view +spec@arb_transform_feedback3 +spec@arb_uniform_buffer_object +spec@arb_vertex_attrib_64bit +spec@arb_viewport_array +spec@ext_shader_samples_identical +spec@ext_texture_array +spec@ext_transform_feedback +spec@intel_conservative_rasterization +spec@intel_shader_integer_functions2 +spec@nv_alpha_to_coverage_dither_control +spec@nv_compute_shader_derivatives +spec@nv_shader_atomic_int64 diff --git a/src/gallium/drivers/lima/drm-shim/meson.build b/src/gallium/drivers/lima/drm-shim/meson.build index a978d3505ba..db1806064bb 100644 --- a/src/gallium/drivers/lima/drm-shim/meson.build +++ b/src/gallium/drivers/lima/drm-shim/meson.build @@ -20,7 +20,7 @@ # IN THE SOFTWARE. liblima_noop_drm_shim = shared_library( - ['lima_noop_drm_shim'], + 'lima_noop_drm_shim', 'lima_noop.c', include_directories: [inc_include, inc_src], dependencies: dep_drm_shim, diff --git a/src/gallium/drivers/lima/ir/gp/codegen.c b/src/gallium/drivers/lima/ir/gp/codegen.c index d9a46f86a90..aa0a0496b06 100644 --- a/src/gallium/drivers/lima/ir/gp/codegen.c +++ b/src/gallium/drivers/lima/ir/gp/codegen.c @@ -608,7 +608,7 @@ bool gpir_codegen_prog(gpir_compiler *comp) if (lima_debug & LIMA_DEBUG_GP) { gpir_codegen_print_prog(comp); - gpir_disassemble_program(code, num_instr); + gpir_disassemble_program(code, num_instr, stdout); } return true; diff --git a/src/gallium/drivers/lima/ir/gp/codegen.h b/src/gallium/drivers/lima/ir/gp/codegen.h index d24b31b41f7..f6bf4eb1923 100644 --- a/src/gallium/drivers/lima/ir/gp/codegen.h +++ b/src/gallium/drivers/lima/ir/gp/codegen.h @@ -161,6 +161,6 @@ typedef struct __attribute__((__packed__)) { unsigned branch_target : 8; } gpir_codegen_instr; -void gpir_disassemble_program(gpir_codegen_instr *code, unsigned num_instr); +void gpir_disassemble_program(gpir_codegen_instr *code, unsigned num_instr, FILE *fp); #endif diff --git a/src/gallium/drivers/lima/ir/gp/disasm.c b/src/gallium/drivers/lima/ir/gp/disasm.c index bc0ce3bec4d..eb15fdb5e1a 100644 --- a/src/gallium/drivers/lima/ir/gp/disasm.c +++ b/src/gallium/drivers/lima/ir/gp/disasm.c @@ -47,9 +47,9 @@ static const gpir_codegen_store_src gp_unit_to_store_src[num_units] = { }; static void -print_dest(gpir_codegen_instr *instr, gp_unit unit, unsigned cur_dest_index) +print_dest(gpir_codegen_instr *instr, gp_unit unit, unsigned cur_dest_index, FILE *fp) { - printf("^%u", cur_dest_index + unit); + fprintf(fp, "^%u", cur_dest_index + unit); gpir_codegen_store_src src = gp_unit_to_store_src[unit]; @@ -59,54 +59,54 @@ print_dest(gpir_codegen_instr *instr, gp_unit unit, unsigned cur_dest_index) /* Temporary stores ignore the address, and always use whatever's * stored in address register 0. */ - printf("/t[addr0]"); + fprintf(fp, "/t[addr0]"); } else { if (instr->store0_varying) - printf("/v"); + fprintf(fp, "/v"); else - printf("/$"); - printf("%u", instr->store0_addr); + fprintf(fp, "/$"); + fprintf(fp, "%u", instr->store0_addr); } - printf("."); + fprintf(fp, "."); if (instr->store0_src_x == src) - printf("x"); + fprintf(fp, "x"); if (instr->store0_src_y == src) - printf("y"); + fprintf(fp, "y"); } if (instr->store1_src_z == src || instr->store1_src_w == src) { if (instr->store1_temporary) { - printf("/t[addr0]"); + fprintf(fp, "/t[addr0]"); } else { if (instr->store1_varying) - printf("/v"); + fprintf(fp, "/v"); else - printf("/$"); - printf("%u", instr->store1_addr); + fprintf(fp, "/$"); + fprintf(fp, "%u", instr->store1_addr); } - printf("."); + fprintf(fp, "."); if (instr->store1_src_z == src) - printf("z"); + fprintf(fp, "z"); if (instr->store1_src_w == src) - printf("w"); + fprintf(fp, "w"); } if (unit == unit_complex) { switch (instr->complex_op) { case gpir_codegen_complex_op_temp_store_addr: - printf("/addr0"); + fprintf(fp, "/addr0"); break; case gpir_codegen_complex_op_temp_load_addr_0: - printf("/addr1"); + fprintf(fp, "/addr1"); break; case gpir_codegen_complex_op_temp_load_addr_1: - printf("/addr2"); + fprintf(fp, "/addr2"); break; case gpir_codegen_complex_op_temp_load_addr_2: - printf("/addr3"); + fprintf(fp, "/addr3"); break; default: break; @@ -117,14 +117,14 @@ print_dest(gpir_codegen_instr *instr, gp_unit unit, unsigned cur_dest_index) static void print_src(gpir_codegen_src src, gp_unit unit, unsigned unit_src_num, gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr, - unsigned cur_dest_index) + unsigned cur_dest_index, FILE *fp) { switch (src) { case gpir_codegen_src_attrib_x: case gpir_codegen_src_attrib_y: case gpir_codegen_src_attrib_z: case gpir_codegen_src_attrib_w: - printf("%c%d.%c", instr->register0_attribute ? 'a' : '$', + fprintf(fp, "%c%d.%c", instr->register0_attribute ? 'a' : '$', instr->register0_addr, "xyzw"[src - gpir_codegen_src_attrib_x]); break; @@ -132,7 +132,7 @@ print_src(gpir_codegen_src src, gp_unit unit, unsigned unit_src_num, case gpir_codegen_src_register_y: case gpir_codegen_src_register_z: case gpir_codegen_src_register_w: - printf("$%d.%c", instr->register1_addr, + fprintf(fp, "$%d.%c", instr->register1_addr, "xyzw"[src - gpir_codegen_src_register_x]); break; @@ -140,54 +140,54 @@ print_src(gpir_codegen_src src, gp_unit unit, unsigned unit_src_num, case gpir_codegen_src_unknown_1: case gpir_codegen_src_unknown_2: case gpir_codegen_src_unknown_3: - printf("unknown%d", src - gpir_codegen_src_unknown_0); + fprintf(fp, "unknown%d", src - gpir_codegen_src_unknown_0); break; case gpir_codegen_src_load_x: case gpir_codegen_src_load_y: case gpir_codegen_src_load_z: case gpir_codegen_src_load_w: - printf("t[%d", instr->load_addr); + fprintf(fp, "t[%d", instr->load_addr); switch (instr->load_offset) { case gpir_codegen_load_off_ld_addr_0: - printf("+addr1"); + fprintf(fp, "+addr1"); break; case gpir_codegen_load_off_ld_addr_1: - printf("+addr2"); + fprintf(fp, "+addr2"); break; case gpir_codegen_load_off_ld_addr_2: - printf("+addr3"); + fprintf(fp, "+addr3"); break; case gpir_codegen_load_off_none: break; default: - printf("+unk%d", instr->load_offset); + fprintf(fp, "+unk%d", instr->load_offset); } - printf("].%c", "xyzw"[src - gpir_codegen_src_load_x]); + fprintf(fp, "].%c", "xyzw"[src - gpir_codegen_src_load_x]); break; case gpir_codegen_src_p1_acc_0: - printf("^%d", cur_dest_index - 1 * num_units + unit_acc_0); + fprintf(fp, "^%d", cur_dest_index - 1 * num_units + unit_acc_0); break; case gpir_codegen_src_p1_acc_1: - printf("^%d", cur_dest_index - 1 * num_units + unit_acc_1); + fprintf(fp, "^%d", cur_dest_index - 1 * num_units + unit_acc_1); break; case gpir_codegen_src_p1_mul_0: - printf("^%d", cur_dest_index - 1 * num_units + unit_mul_0); + fprintf(fp, "^%d", cur_dest_index - 1 * num_units + unit_mul_0); break; case gpir_codegen_src_p1_mul_1: - printf("^%d", cur_dest_index - 1 * num_units + unit_mul_1); + fprintf(fp, "^%d", cur_dest_index - 1 * num_units + unit_mul_1); break; case gpir_codegen_src_p1_pass: - printf("^%d", cur_dest_index - 1 * num_units + unit_pass); + fprintf(fp, "^%d", cur_dest_index - 1 * num_units + unit_pass); break; case gpir_codegen_src_unused: - printf("unused"); + fprintf(fp, "unused"); break; case gpir_codegen_src_p1_complex: /* Also ident */ @@ -195,48 +195,48 @@ print_src(gpir_codegen_src src, gp_unit unit, unsigned unit_src_num, case unit_acc_0: case unit_acc_1: if (unit_src_num == 1) { - printf("0"); + fprintf(fp, "0"); return; } break; case unit_mul_0: case unit_mul_1: if (unit_src_num == 1) { - printf("1"); + fprintf(fp, "1"); return; } break; default: break; } - printf("^%d", cur_dest_index - 1 * num_units + unit_complex); + fprintf(fp, "^%d", cur_dest_index - 1 * num_units + unit_complex); break; case gpir_codegen_src_p2_pass: - printf("^%d", cur_dest_index - 2 * num_units + unit_pass); + fprintf(fp, "^%d", cur_dest_index - 2 * num_units + unit_pass); break; case gpir_codegen_src_p2_acc_0: - printf("^%d", cur_dest_index - 2 * num_units + unit_acc_0); + fprintf(fp, "^%d", cur_dest_index - 2 * num_units + unit_acc_0); break; case gpir_codegen_src_p2_acc_1: - printf("^%d", cur_dest_index - 2 * num_units + unit_acc_1); + fprintf(fp, "^%d", cur_dest_index - 2 * num_units + unit_acc_1); break; case gpir_codegen_src_p2_mul_0: - printf("^%d", cur_dest_index - 2 * num_units + unit_mul_0); + fprintf(fp, "^%d", cur_dest_index - 2 * num_units + unit_mul_0); break; case gpir_codegen_src_p2_mul_1: - printf("^%d", cur_dest_index - 2 * num_units + unit_mul_1); + fprintf(fp, "^%d", cur_dest_index - 2 * num_units + unit_mul_1); break; case gpir_codegen_src_p1_attrib_x: case gpir_codegen_src_p1_attrib_y: case gpir_codegen_src_p1_attrib_z: case gpir_codegen_src_p1_attrib_w: - printf("%c%d.%c", prev_instr->register0_attribute ? 'a' : '$', + fprintf(fp, "%c%d.%c", prev_instr->register0_attribute ? 'a' : '$', prev_instr->register0_addr, "xyzw"[src - gpir_codegen_src_p1_attrib_x]); break; @@ -245,7 +245,7 @@ print_src(gpir_codegen_src src, gp_unit unit, unsigned unit_src_num, static bool print_mul(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr, - unsigned cur_dest_index) + unsigned cur_dest_index, FILE *fp) { bool printed = false; @@ -255,113 +255,113 @@ print_mul(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr, if (instr->mul0_src0 != gpir_codegen_src_unused && instr->mul0_src1 != gpir_codegen_src_unused) { printed = true; - printf("\t"); + fprintf(fp, "\t"); if (instr->mul0_src1 == gpir_codegen_src_ident && !instr->mul0_neg) { - printf("mov.m0 "); - print_dest(instr, unit_mul_0, cur_dest_index); - printf(" "); + fprintf(fp, "mov.m0 "); + print_dest(instr, unit_mul_0, cur_dest_index, fp); + fprintf(fp, " "); print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr, - cur_dest_index); + cur_dest_index, fp); } else { if (instr->mul_op == gpir_codegen_mul_op_complex2) - printf("complex2.m0 "); + fprintf(fp, "complex2.m0 "); else - printf("mul.m0 "); + fprintf(fp, "mul.m0 "); - print_dest(instr, unit_mul_0, cur_dest_index); - printf(" "); + print_dest(instr, unit_mul_0, cur_dest_index, fp); + fprintf(fp, " "); print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr, - cur_dest_index); - printf(" "); + cur_dest_index, fp); + fprintf(fp, " "); if (instr->mul0_neg) - printf("-"); + fprintf(fp, "-"); print_src(instr->mul0_src1, unit_mul_0, 1, instr, prev_instr, - cur_dest_index); + cur_dest_index, fp); } - printf("\n"); + fprintf(fp, "\n"); } if (instr->mul1_src0 != gpir_codegen_src_unused && instr->mul1_src1 != gpir_codegen_src_unused) { printed = true; - printf("\t"); + fprintf(fp, "\t"); if (instr->mul1_src1 == gpir_codegen_src_ident && !instr->mul1_neg) { - printf("mov.m1 "); - print_dest(instr, unit_mul_1, cur_dest_index); - printf(" "); + fprintf(fp, "mov.m1 "); + print_dest(instr, unit_mul_1, cur_dest_index, fp); + fprintf(fp, " "); print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr, - cur_dest_index); + cur_dest_index, fp); } else { - printf("mul.m1 "); - print_dest(instr, unit_mul_1, cur_dest_index); - printf(" "); + fprintf(fp, "mul.m1 "); + print_dest(instr, unit_mul_1, cur_dest_index, fp); + fprintf(fp, " "); print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr, - cur_dest_index); - printf(" "); + cur_dest_index, fp); + fprintf(fp, " "); if (instr->mul1_neg) - printf("-"); + fprintf(fp, "-"); print_src(instr->mul1_src1, unit_mul_0, 1, instr, prev_instr, - cur_dest_index); + cur_dest_index, fp); } - printf("\n"); + fprintf(fp, "\n"); } break; case gpir_codegen_mul_op_complex1: printed = true; - printf("\tcomplex1.m01 "); - print_dest(instr, unit_mul_0, cur_dest_index); - printf(" "); + fprintf(fp, "\tcomplex1.m01 "); + print_dest(instr, unit_mul_0, cur_dest_index, fp); + fprintf(fp, " "); print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr, - cur_dest_index); - printf(" "); + cur_dest_index, fp); + fprintf(fp, " "); print_src(instr->mul0_src1, unit_mul_0, 1, instr, prev_instr, - cur_dest_index); - printf(" "); + cur_dest_index, fp); + fprintf(fp, " "); print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr, - cur_dest_index); - printf(" "); + cur_dest_index, fp); + fprintf(fp, " "); print_src(instr->mul1_src1, unit_mul_1, 1, instr, prev_instr, - cur_dest_index); - printf("\n"); + cur_dest_index, fp); + fprintf(fp, "\n"); break; case gpir_codegen_mul_op_select: printed = true; - printf("\tsel.m01 "); - print_dest(instr, unit_mul_0, cur_dest_index); - printf(" "); + fprintf(fp, "\tsel.m01 "); + print_dest(instr, unit_mul_0, cur_dest_index, fp); + fprintf(fp, " "); print_src(instr->mul0_src1, unit_mul_0, 1, instr, prev_instr, - cur_dest_index); - printf(" "); + cur_dest_index, fp); + fprintf(fp, " "); print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr, - cur_dest_index); - printf(" "); + cur_dest_index, fp); + fprintf(fp, " "); print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr, - cur_dest_index); - printf("\n"); + cur_dest_index, fp); + fprintf(fp, "\n"); break; default: printed = true; - printf("\tunknown%u.m01 ", instr->mul_op); - print_dest(instr, unit_mul_0, cur_dest_index); - printf(" "); + fprintf(fp, "\tunknown%u.m01 ", instr->mul_op); + print_dest(instr, unit_mul_0, cur_dest_index, fp); + fprintf(fp, " "); print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr, - cur_dest_index); - printf(" "); + cur_dest_index, fp); + fprintf(fp, " "); print_src(instr->mul0_src1, unit_mul_0, 1, instr, prev_instr, - cur_dest_index); - printf(" "); + cur_dest_index, fp); + fprintf(fp, " "); print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr, - cur_dest_index); - printf(" "); + cur_dest_index, fp); + fprintf(fp, " "); print_src(instr->mul1_src1, unit_mul_1, 1, instr, prev_instr, - cur_dest_index); - printf("\n"); + cur_dest_index, fp); + fprintf(fp, "\n"); break; } @@ -393,14 +393,14 @@ static const acc_op_info acc_op_infos[8] = { static bool print_acc(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr, - unsigned cur_dest_index) + unsigned cur_dest_index, FILE *fp) { bool printed = false; const acc_op_info op = acc_op_infos[instr->acc_op]; if (instr->acc0_src0 != gpir_codegen_src_unused) { printed = true; - printf("\t"); + fprintf(fp, "\t"); acc_op_info acc0_op = op; if (instr->acc0_src1 == gpir_codegen_src_ident && instr->acc0_src1_neg) { @@ -410,30 +410,30 @@ print_acc(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr, } if (acc0_op.name) - printf("%s.a0 ", acc0_op.name); + fprintf(fp, "%s.a0 ", acc0_op.name); else - printf("op%u.a0 ", instr->acc_op); + fprintf(fp, "op%u.a0 ", instr->acc_op); - print_dest(instr, unit_acc_0, cur_dest_index); - printf(" "); + print_dest(instr, unit_acc_0, cur_dest_index, fp); + fprintf(fp, " "); if (instr->acc0_src0_neg) - printf("-"); + fprintf(fp, "-"); print_src(instr->acc0_src0, unit_acc_0, 0, instr, prev_instr, - cur_dest_index); + cur_dest_index, fp); if (acc0_op.srcs > 1) { - printf(" "); + fprintf(fp, " "); if (instr->acc0_src1_neg) - printf("-"); + fprintf(fp, "-"); print_src(instr->acc0_src1, unit_acc_0, 1, instr, prev_instr, - cur_dest_index); + cur_dest_index, fp); } - printf("\n"); + fprintf(fp, "\n"); } if (instr->acc1_src0 != gpir_codegen_src_unused) { printed = true; - printf("\t"); + fprintf(fp, "\t"); acc_op_info acc1_op = op; if (instr->acc1_src1 == gpir_codegen_src_ident && instr->acc1_src1_neg) { @@ -443,25 +443,25 @@ print_acc(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr, } if (acc1_op.name) - printf("%s.a1 ", acc1_op.name); + fprintf(fp, "%s.a1 ", acc1_op.name); else - printf("op%u.a1 ", instr->acc_op); + fprintf(fp, "op%u.a1 ", instr->acc_op); - print_dest(instr, unit_acc_1, cur_dest_index); - printf(" "); + print_dest(instr, unit_acc_1, cur_dest_index, fp); + fprintf(fp, " "); if (instr->acc1_src0_neg) - printf("-"); + fprintf(fp, "-"); print_src(instr->acc1_src0, unit_acc_1, 0, instr, prev_instr, - cur_dest_index); + cur_dest_index, fp); if (acc1_op.srcs > 1) { - printf(" "); + fprintf(fp, " "); if (instr->acc1_src1_neg) - printf("-"); + fprintf(fp, "-"); print_src(instr->acc1_src1, unit_acc_1, 1, instr, prev_instr, - cur_dest_index); + cur_dest_index, fp); } - printf("\n"); + fprintf(fp, "\n"); } return printed; @@ -469,131 +469,129 @@ print_acc(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr, static bool print_pass(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr, - unsigned cur_dest_index) + unsigned cur_dest_index, FILE *fp) { if (instr->pass_src == gpir_codegen_src_unused) return false; - printf("\t"); + fprintf(fp, "\t"); switch (instr->pass_op) { case gpir_codegen_pass_op_pass: - printf("mov.p "); + fprintf(fp, "mov.p "); break; case gpir_codegen_pass_op_preexp2: - printf("preexp2.p "); + fprintf(fp, "preexp2.p "); break; case gpir_codegen_pass_op_postlog2: - printf("postlog2.p "); + fprintf(fp, "postlog2.p "); break; case gpir_codegen_pass_op_clamp: - printf("clamp.p "); + fprintf(fp, "clamp.p "); break; default: - printf("unk%u.p ", instr->pass_op); + fprintf(fp, "unk%u.p ", instr->pass_op); } - print_dest(instr, unit_pass, cur_dest_index); - printf(" "); + print_dest(instr, unit_pass, cur_dest_index, fp); + fprintf(fp, " "); print_src(instr->pass_src, unit_pass, 0, instr, prev_instr, - cur_dest_index); + cur_dest_index, fp); if (instr->pass_op == gpir_codegen_pass_op_clamp) { - printf(" "); + fprintf(fp, " "); print_src(gpir_codegen_src_load_x, unit_pass, 1, instr, prev_instr, - cur_dest_index); - printf(" "); + cur_dest_index, fp); + fprintf(fp, " "); print_src(gpir_codegen_src_load_y, unit_pass, 2, instr, prev_instr, - cur_dest_index); + cur_dest_index, fp); } - printf("\n"); + fprintf(fp, "\n"); return true; } static bool print_complex(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr, - unsigned cur_dest_index) + unsigned cur_dest_index, FILE *fp) { if (instr->complex_src == gpir_codegen_src_unused) return false; - printf("\t"); + fprintf(fp, "\t"); switch (instr->complex_op) { case gpir_codegen_complex_op_nop: return false; case gpir_codegen_complex_op_exp2: - printf("exp2.c "); + fprintf(fp, "exp2.c "); break; case gpir_codegen_complex_op_log2: - printf("log2.c "); + fprintf(fp, "log2.c "); break; case gpir_codegen_complex_op_rsqrt: - printf("rsqrt.c "); + fprintf(fp, "rsqrt.c "); break; case gpir_codegen_complex_op_rcp: - printf("rcp.c "); + fprintf(fp, "rcp.c "); break; case gpir_codegen_complex_op_pass: case gpir_codegen_complex_op_temp_store_addr: case gpir_codegen_complex_op_temp_load_addr_0: case gpir_codegen_complex_op_temp_load_addr_1: case gpir_codegen_complex_op_temp_load_addr_2: - printf("mov.c "); + fprintf(fp, "mov.c "); break; default: - printf("unk%u.c ", instr->complex_op); + fprintf(fp, "unk%u.c ", instr->complex_op); } - print_dest(instr, unit_complex, cur_dest_index); - printf(" "); + print_dest(instr, unit_complex, cur_dest_index, fp); + fprintf(fp, " "); print_src(instr->complex_src, unit_complex, 0, instr, prev_instr, - cur_dest_index); - printf("\n"); + cur_dest_index, fp); + fprintf(fp, "\n"); return true; } static void print_instr(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr, - unsigned instr_number, unsigned cur_dest_index) + unsigned instr_number, unsigned cur_dest_index, FILE *fp) { bool printed = false; - printf("%03d:", instr_number); - printed |= print_acc(instr, prev_instr, cur_dest_index); - printed |= print_mul(instr, prev_instr, cur_dest_index); - printed |= print_complex(instr, prev_instr, cur_dest_index); - printed |= print_pass(instr, prev_instr, cur_dest_index); + fprintf(fp, "%03d:", instr_number); + printed |= print_acc(instr, prev_instr, cur_dest_index, fp); + printed |= print_mul(instr, prev_instr, cur_dest_index, fp); + printed |= print_complex(instr, prev_instr, cur_dest_index, fp); + printed |= print_pass(instr, prev_instr, cur_dest_index, fp); if (instr->branch) { printed = true; /* The branch condition is taken from the current pass unit result */ - printf("\tbranch ^%d %03d\n", cur_dest_index + unit_pass, + fprintf(fp, "\tbranch ^%d %03d\n", cur_dest_index + unit_pass, instr->branch_target + (instr->branch_target_lo ? 0 : 0x100)); } if (instr->unknown_1 != 0) { printed = true; - printf("\tunknown_1 %u\n", instr->unknown_1); + fprintf(fp, "\tunknown_1 %u\n", instr->unknown_1); } if (!printed) - printf("\tnop\n"); + fprintf(fp, "\tnop\n"); } void -gpir_disassemble_program(gpir_codegen_instr *code, unsigned num_instr) +gpir_disassemble_program(gpir_codegen_instr *code, unsigned num_instr, FILE *fp) { - printf("=======disassembly:=======\n"); - unsigned cur_dest_index = 0; unsigned cur_instr = 0; for (gpir_codegen_instr *instr = code; cur_instr < num_instr; instr++, cur_instr++, cur_dest_index += num_units) { - print_instr(instr, instr - 1, cur_instr, cur_dest_index); + print_instr(instr, instr - 1, cur_instr, cur_dest_index, fp); } } diff --git a/src/gallium/drivers/lima/ir/gp/gpir.h b/src/gallium/drivers/lima/ir/gp/gpir.h index 63f74caa0ed..7065633aafe 100644 --- a/src/gallium/drivers/lima/ir/gp/gpir.h +++ b/src/gallium/drivers/lima/ir/gp/gpir.h @@ -32,6 +32,7 @@ /* list of operations that a node can do. */ typedef enum { + gpir_op_unsupported = 0, gpir_op_mov, /* mul ops */ @@ -397,15 +398,9 @@ typedef struct gpir_compiler { /* Find the gpir node for a given NIR SSA def. */ gpir_node **node_for_ssa; - /* Find the gpir node for a given NIR register. */ - gpir_node **node_for_reg; - /* Find the gpir register for a given NIR SSA def. */ gpir_reg **reg_for_ssa; - /* Find the gpir register for a given NIR register. */ - gpir_reg **reg_for_reg; - /* gpir block for NIR block. */ gpir_block **blocks; diff --git a/src/gallium/drivers/lima/ir/gp/nir.c b/src/gallium/drivers/lima/ir/gp/nir.c index 4b1479a68fc..4b02e60a8fc 100644 --- a/src/gallium/drivers/lima/ir/gp/nir.c +++ b/src/gallium/drivers/lima/ir/gp/nir.c @@ -38,18 +38,12 @@ gpir_reg *gpir_create_reg(gpir_compiler *comp) return reg; } -static gpir_reg *reg_for_nir_reg(gpir_compiler *comp, nir_register *nir_reg) -{ - unsigned index = nir_reg->index; - gpir_reg *reg = comp->reg_for_reg[index]; - if (reg) - return reg; - reg = gpir_create_reg(comp); - comp->reg_for_reg[index] = reg; - return reg; -} - -static void register_node_ssa(gpir_block *block, gpir_node *node, nir_ssa_def *ssa) +/* Register the given gpir_node as providing the given NIR destination, so + * that gpir_node_find() will return it. Also insert any stores necessary if + * the destination will be used after the end of this basic block. The node + * must already be inserted. + */ +static void register_node_ssa(gpir_block *block, gpir_node *node, nir_def *ssa) { block->comp->node_for_ssa[ssa->index] = node; snprintf(node->name, sizeof(node->name), "ssa%d", ssa->index); @@ -59,7 +53,7 @@ static void register_node_ssa(gpir_block *block, gpir_node *node, nir_ssa_def *s */ bool needs_register = false; nir_foreach_use(use, ssa) { - if (use->parent_instr->block != ssa->parent_instr->block) { + if (nir_src_parent_instr(use)->block != ssa->parent_instr->block) { needs_register = true; break; } @@ -67,7 +61,7 @@ static void register_node_ssa(gpir_block *block, gpir_node *node, nir_ssa_def *s if (!needs_register) { nir_foreach_if_use(use, ssa) { - if (nir_cf_node_prev(&use->parent_if->cf_node) != + if (nir_cf_node_prev(&nir_src_parent_if(use)->cf_node) != &ssa->parent_instr->block->cf_node) { needs_register = true; break; @@ -85,56 +79,36 @@ static void register_node_ssa(gpir_block *block, gpir_node *node, nir_ssa_def *s } } -static void register_node_reg(gpir_block *block, gpir_node *node, nir_reg_dest *nir_reg) +static void register_node_reg(gpir_block *block, gpir_node *node, int index) { - block->comp->node_for_reg[nir_reg->reg->index] = node; + block->comp->node_for_ssa[index] = node; gpir_store_node *store = gpir_node_create(block, gpir_op_store_reg); - snprintf(node->name, sizeof(node->name), "reg%d", nir_reg->reg->index); + snprintf(store->node.name, sizeof(node->name), "reg%d", index); store->child = node; - store->reg = reg_for_nir_reg(block->comp, nir_reg->reg); + store->reg = block->comp->reg_for_ssa[index]; gpir_node_add_dep(&store->node, node, GPIR_DEP_INPUT); list_addtail(&store->node.list, &block->node_list); } -/* Register the given gpir_node as providing the given NIR destination, so - * that gpir_node_find() will return it. Also insert any stores necessary if - * the destination will be used after the end of this basic block. The node - * must already be inserted. - */ -static void register_node(gpir_block *block, gpir_node *node, nir_dest *dest) -{ - if (dest->is_ssa) - register_node_ssa(block, node, &dest->ssa); - else - register_node_reg(block, node, &dest->reg); -} - static gpir_node *gpir_node_find(gpir_block *block, nir_src *src, int channel) { gpir_reg *reg = NULL; gpir_node *pred = NULL; - if (src->is_ssa) { - if (src->ssa->num_components > 1) { - for (int i = 0; i < GPIR_VECTOR_SSA_NUM; i++) { - if (block->comp->vector_ssa[i].ssa == src->ssa->index) { - return block->comp->vector_ssa[i].nodes[channel]; - } + if (src->ssa->num_components > 1) { + for (int i = 0; i < GPIR_VECTOR_SSA_NUM; i++) { + if (block->comp->vector_ssa[i].ssa == src->ssa->index) { + return block->comp->vector_ssa[i].nodes[channel]; } - } else { - gpir_node *pred = block->comp->node_for_ssa[src->ssa->index]; - if (pred->block == block) - return pred; - reg = block->comp->reg_for_ssa[src->ssa->index]; } } else { - pred = block->comp->node_for_reg[src->reg.reg->index]; + gpir_node *pred = block->comp->node_for_ssa[src->ssa->index]; if (pred && pred->block == block) return pred; - reg = reg_for_nir_reg(block->comp, src->reg.reg); + reg = block->comp->reg_for_ssa[src->ssa->index]; } assert(reg); @@ -147,9 +121,6 @@ static gpir_node *gpir_node_find(gpir_block *block, nir_src *src, } static int nir_to_gpir_opcodes[nir_num_opcodes] = { - /* not supported */ - [0 ... nir_last_opcode] = -1, - [nir_op_fmul] = gpir_op_mul, [nir_op_fadd] = gpir_op_add, [nir_op_fneg] = gpir_op_neg, @@ -182,13 +153,13 @@ static bool gpir_emit_alu(gpir_block *block, nir_instr *ni) if (instr->op == nir_op_mov) { gpir_node *child = gpir_node_find(block, &instr->src[0].src, instr->src[0].swizzle[0]); - register_node(block, child, &instr->dest.dest); + register_node_ssa(block, child, &instr->def); return true; } int op = nir_to_gpir_opcodes[instr->op]; - if (op < 0) { + if (op == gpir_op_unsupported) { gpir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name); return false; } @@ -203,7 +174,6 @@ static bool gpir_emit_alu(gpir_block *block, nir_instr *ni) for (int i = 0; i < num_child; i++) { nir_alu_src *src = instr->src + i; - node->children_negate[i] = src->negate; gpir_node *child = gpir_node_find(block, &src->src, src->swizzle[0]); node->children[i] = child; @@ -212,12 +182,12 @@ static bool gpir_emit_alu(gpir_block *block, nir_instr *ni) } list_addtail(&node->node.list, &block->node_list); - register_node(block, &node->node, &instr->dest.dest); + register_node_ssa(block, &node->node, &instr->def); return true; } -static gpir_node *gpir_create_load(gpir_block *block, nir_dest *dest, +static gpir_node *gpir_create_load(gpir_block *block, nir_def *def, int op, int index, int component) { gpir_load_node *load = gpir_node_create(block, op); @@ -227,25 +197,24 @@ static gpir_node *gpir_create_load(gpir_block *block, nir_dest *dest, load->index = index; load->component = component; list_addtail(&load->node.list, &block->node_list); - register_node(block, &load->node, dest); + register_node_ssa(block, &load->node, def); return &load->node; } -static bool gpir_create_vector_load(gpir_block *block, nir_dest *dest, int index) +static bool gpir_create_vector_load(gpir_block *block, nir_def *def, int index) { - assert(dest->is_ssa); assert(index < GPIR_VECTOR_SSA_NUM); - block->comp->vector_ssa[index].ssa = dest->ssa.index; + block->comp->vector_ssa[index].ssa = def->index; - for (int i = 0; i < dest->ssa.num_components; i++) { - gpir_node *node = gpir_create_load(block, dest, gpir_op_load_uniform, + for (int i = 0; i < def->num_components; i++) { + gpir_node *node = gpir_create_load(block, def, gpir_op_load_uniform, block->comp->constant_base + index, i); if (!node) return false; block->comp->vector_ssa[index].nodes[i] = node; - snprintf(node->name, sizeof(node->name), "ssa%d.%c", dest->ssa.index, "xyzw"[i]); + snprintf(node->name, sizeof(node->name), "ssa%d.%c", def->index, "xyzw"[i]); } return true; @@ -256,24 +225,49 @@ static bool gpir_emit_intrinsic(gpir_block *block, nir_instr *ni) nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni); switch (instr->intrinsic) { + case nir_intrinsic_decl_reg: + { + gpir_reg *reg = gpir_create_reg(block->comp); + block->comp->reg_for_ssa[instr->def.index] = reg; + return true; + } + case nir_intrinsic_load_reg: + { + gpir_node *node = gpir_node_find(block, &instr->src[0], 0); + assert(node); + block->comp->node_for_ssa[instr->def.index] = node; + return true; + } + case nir_intrinsic_store_reg: + { + gpir_node *child = gpir_node_find(block, &instr->src[0], 0); + assert(child); + register_node_reg(block, child, instr->src[1].ssa->index); + return true; + } case nir_intrinsic_load_input: - return gpir_create_load(block, &instr->dest, + return gpir_create_load(block, &instr->def, gpir_op_load_attribute, nir_intrinsic_base(instr), nir_intrinsic_component(instr)) != NULL; case nir_intrinsic_load_uniform: { int offset = nir_intrinsic_base(instr); + + if (!nir_src_is_const(instr->src[0])) { + gpir_error("indirect indexing for uniforms is not implemented\n"); + return false; + } offset += (int)nir_src_as_float(instr->src[0]); - return gpir_create_load(block, &instr->dest, + return gpir_create_load(block, &instr->def, gpir_op_load_uniform, offset / 4, offset % 4) != NULL; } case nir_intrinsic_load_viewport_scale: - return gpir_create_vector_load(block, &instr->dest, GPIR_VECTOR_SSA_VIEWPORT_SCALE); + return gpir_create_vector_load(block, &instr->def, GPIR_VECTOR_SSA_VIEWPORT_SCALE); case nir_intrinsic_load_viewport_offset: - return gpir_create_vector_load(block, &instr->dest, GPIR_VECTOR_SSA_VIEWPORT_OFFSET); + return gpir_create_vector_load(block, &instr->def, GPIR_VECTOR_SSA_VIEWPORT_OFFSET); case nir_intrinsic_store_output: { gpir_store_node *store = gpir_node_create(block, gpir_op_store_varying); @@ -315,7 +309,7 @@ static bool gpir_emit_load_const(gpir_block *block, nir_instr *ni) static bool gpir_emit_ssa_undef(gpir_block *block, nir_instr *ni) { - gpir_error("nir_ssa_undef_instr is not supported\n"); + gpir_error("nir_undef_instr is not supported\n"); return false; } @@ -335,7 +329,7 @@ static bool (*gpir_emit_instr[nir_instr_type_phi])(gpir_block *, nir_instr *) = [nir_instr_type_alu] = gpir_emit_alu, [nir_instr_type_intrinsic] = gpir_emit_intrinsic, [nir_instr_type_load_const] = gpir_emit_load_const, - [nir_instr_type_ssa_undef] = gpir_emit_ssa_undef, + [nir_instr_type_undef] = gpir_emit_ssa_undef, [nir_instr_type_tex] = gpir_emit_tex, [nir_instr_type_jump] = gpir_emit_jump, }; @@ -401,7 +395,7 @@ static bool gpir_emit_function(gpir_compiler *comp, nir_function_impl *impl) return true; } -static gpir_compiler *gpir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa) +static gpir_compiler *gpir_compiler_create(void *prog, unsigned num_ssa) { gpir_compiler *comp = rzalloc(prog, gpir_compiler); @@ -412,9 +406,7 @@ static gpir_compiler *gpir_compiler_create(void *prog, unsigned num_reg, unsigne comp->vector_ssa[i].ssa = -1; comp->node_for_ssa = rzalloc_array(comp, gpir_node *, num_ssa); - comp->node_for_reg = rzalloc_array(comp, gpir_node *, num_reg); comp->reg_for_ssa = rzalloc_array(comp, gpir_reg *, num_ssa); - comp->reg_for_reg = rzalloc_array(comp, gpir_reg *, num_reg); comp->prog = prog; return comp; } @@ -427,7 +419,7 @@ static int gpir_glsl_type_size(enum glsl_base_type type) } static void gpir_print_shader_db(struct nir_shader *nir, gpir_compiler *comp, - struct pipe_debug_callback *debug) + struct util_debug_callback *debug) { const struct shader_info *info = &nir->info; char *shaderdb; @@ -443,15 +435,15 @@ static void gpir_print_shader_db(struct nir_shader *nir, gpir_compiler *comp, if (lima_debug & LIMA_DEBUG_SHADERDB) fprintf(stderr, "SHADER-DB: %s\n", shaderdb); - pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb); + util_debug_message(debug, SHADER_INFO, "%s", shaderdb); free(shaderdb); } bool gpir_compile_nir(struct lima_vs_compiled_shader *prog, struct nir_shader *nir, - struct pipe_debug_callback *debug) + struct util_debug_callback *debug) { nir_function_impl *func = nir_shader_get_entrypoint(nir); - gpir_compiler *comp = gpir_compiler_create(prog, func->reg_alloc, func->ssa_alloc); + gpir_compiler *comp = gpir_compiler_create(prog, func->ssa_alloc); if (!comp) return false; diff --git a/src/gallium/drivers/lima/ir/gp/node.c b/src/gallium/drivers/lima/ir/gp/node.c index ef534e7e286..9372d72a8bf 100644 --- a/src/gallium/drivers/lima/ir/gp/node.c +++ b/src/gallium/drivers/lima/ir/gp/node.c @@ -28,6 +28,9 @@ #include "gpir.h" const gpir_op_info gpir_op_infos[] = { + [gpir_op_unsupported] = { + .name = "unsupported", + }, [gpir_op_mov] = { .name = "mov", .slots = (int []) { diff --git a/src/gallium/drivers/lima/ir/gp/optimize.c b/src/gallium/drivers/lima/ir/gp/optimize.c index c95faec9c6d..e5896b7d41e 100644 --- a/src/gallium/drivers/lima/ir/gp/optimize.c +++ b/src/gallium/drivers/lima/ir/gp/optimize.c @@ -80,7 +80,7 @@ optimize_branches(gpir_compiler *comp) if (block->list.prev == &comp->block_list) continue; - gpir_block *prev_block = LIST_ENTRY(gpir_block, block->list.prev, list); + gpir_block *prev_block = list_entry(block->list.prev, gpir_block, list); if (list_is_empty(&prev_block->node_list)) continue; @@ -109,7 +109,7 @@ optimize_branches(gpir_compiler *comp) /* Delete the branch */ list_del(&node->list); - block->successors[0] = LIST_ENTRY(gpir_block, block->list.next, list); + block->successors[0] = list_entry(block->list.next, gpir_block, list); } } diff --git a/src/gallium/drivers/lima/ir/gp/reduce_scheduler.c b/src/gallium/drivers/lima/ir/gp/reduce_scheduler.c index 47cc6109e01..bcfe6fd89b0 100644 --- a/src/gallium/drivers/lima/ir/gp/reduce_scheduler.c +++ b/src/gallium/drivers/lima/ir/gp/reduce_scheduler.c @@ -31,6 +31,13 @@ * Author: Vivek Sarkar, Mauricio J. Serrano, Barbara B. Simons */ +static int cmp_float(const void *a, const void *b) +{ + const float *fa = (const float *) a; + const float *fb = (const float *) b; + return (*fa > *fb) - (*fa < *fb); +} + static void schedule_calc_sched_info(gpir_node *node) { int n = 0; @@ -68,15 +75,7 @@ static void schedule_calc_sched_info(gpir_node *node) } /* sort */ - for (i = 0; i < n - 1; i++) { - for (int j = 0; j < n - i - 1; j++) { - if (reg[j] > reg[j + 1]) { - float tmp = reg[j + 1]; - reg[j + 1] = reg[j]; - reg[j] = tmp; - } - } - } + qsort(reg, n, sizeof(reg[0]), cmp_float); for (i = 0; i < n; i++) { float pressure = reg[i] + n - (i + 1); diff --git a/src/gallium/drivers/lima/ir/gp/regalloc.c b/src/gallium/drivers/lima/ir/gp/regalloc.c index 8526d1e9e7d..eaab3e68182 100644 --- a/src/gallium/drivers/lima/ir/gp/regalloc.c +++ b/src/gallium/drivers/lima/ir/gp/regalloc.c @@ -507,6 +507,11 @@ static void handle_reg_write(gpir_store_node *store, static void handle_value_write(gpir_node *node, struct value_regalloc_ctx *ctx) { + /* TODO: why does an uninitialized node->value_reg + * sometimes end up here? */ + if (node->value_reg < 0) + return; + ctx->last_written[node->value_reg] = node; ctx->live[node->value_reg] = NULL; } diff --git a/src/gallium/drivers/lima/ir/gp/scheduler.c b/src/gallium/drivers/lima/ir/gp/scheduler.c index 78128bd891e..c4700dce34d 100644 --- a/src/gallium/drivers/lima/ir/gp/scheduler.c +++ b/src/gallium/drivers/lima/ir/gp/scheduler.c @@ -1297,9 +1297,9 @@ static bool try_node(sched_ctx *ctx) * the list at all. We know better here, so we have to open-code * list_for_each_entry() without the check in order to not assert. */ - for (gpir_node *node = LIST_ENTRY(gpir_node, ctx->ready_list.next, list); + for (gpir_node *node = list_entry(ctx->ready_list.next, gpir_node, list); &node->list != &ctx->ready_list; - node = LIST_ENTRY(gpir_node, node->list.next, list)) { + node = list_entry(node->list.next, gpir_node, list)) { if (best_score != INT_MIN) { if (node->sched.dist < best_node->sched.dist) break; diff --git a/src/gallium/drivers/lima/ir/lima_ir.h b/src/gallium/drivers/lima/ir/lima_ir.h index 41d363a5550..706804fb8d5 100644 --- a/src/gallium/drivers/lima/ir/lima_ir.h +++ b/src/gallium/drivers/lima/ir/lima_ir.h @@ -54,13 +54,13 @@ struct lima_fs_compiled_shader; /* gpir interface */ bool gpir_compile_nir(struct lima_vs_compiled_shader *prog, struct nir_shader *nir, - struct pipe_debug_callback *debug); + struct util_debug_callback *debug); /* ppir interface */ bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *nir, struct ra_regs *ra, - struct pipe_debug_callback *debug); + struct util_debug_callback *debug); struct ra_regs *ppir_regalloc_init(void *mem_ctx); void lima_nir_lower_uniform_to_scalar(nir_shader *shader); @@ -73,4 +73,6 @@ void lima_nir_duplicate_load_consts(nir_shader *shader); void lima_nir_duplicate_load_inputs(nir_shader *shader); void lima_nir_duplicate_load_uniforms(nir_shader *shader); +bool lima_nir_lower_txp(nir_shader *shader); + #endif diff --git a/src/gallium/drivers/lima/ir/lima_nir_duplicate_consts.c b/src/gallium/drivers/lima/ir/lima_nir_duplicate_consts.c index 0eeab6b6ad0..efe59fd9b50 100644 --- a/src/gallium/drivers/lima/ir/lima_nir_duplicate_consts.c +++ b/src/gallium/drivers/lima/ir/lima_nir_duplicate_consts.c @@ -34,9 +34,9 @@ lima_nir_duplicate_load_const(nir_builder *b, nir_load_const_instr *load) nir_foreach_use_safe(use_src, &load->def) { nir_load_const_instr *dupl; - if (last_parent_instr != use_src->parent_instr) { + if (last_parent_instr != nir_src_parent_instr(use_src)) { /* if ssa use, clone for the target block */ - b->cursor = nir_before_instr(use_src->parent_instr); + b->cursor = nir_before_instr(nir_src_parent_instr(use_src)); dupl = nir_load_const_instr_create(b->shader, load->def.num_components, load->def.bit_size); @@ -49,18 +49,19 @@ lima_nir_duplicate_load_const(nir_builder *b, nir_load_const_instr *load) dupl = last_dupl; } - nir_instr_rewrite_src(use_src->parent_instr, use_src, nir_src_for_ssa(&dupl->def)); - last_parent_instr = use_src->parent_instr; + nir_src_rewrite(use_src, &dupl->def); + last_parent_instr = nir_src_parent_instr(use_src); last_dupl = dupl; } last_dupl = NULL; - last_parent_instr = NULL; + nir_if *last_parent_if = NULL; nir_foreach_if_use_safe(use_src, &load->def) { nir_load_const_instr *dupl; + nir_if *nif = nir_src_parent_if(use_src); - if (last_parent_instr != use_src->parent_instr) { + if (last_parent_if != nif) { /* if 'if use', clone where it is */ b->cursor = nir_before_instr(&load->instr); @@ -75,8 +76,8 @@ lima_nir_duplicate_load_const(nir_builder *b, nir_load_const_instr *load) dupl = last_dupl; } - nir_if_rewrite_condition(use_src->parent_if, nir_src_for_ssa(&dupl->def)); - last_parent_instr = use_src->parent_instr; + nir_src_rewrite(&nir_src_parent_if(use_src)->condition, &dupl->def); + last_parent_if = nif; last_dupl = dupl; } @@ -87,8 +88,7 @@ lima_nir_duplicate_load_const(nir_builder *b, nir_load_const_instr *load) static void lima_nir_duplicate_load_consts_impl(nir_shader *shader, nir_function_impl *impl) { - nir_builder builder; - nir_builder_init(&builder, impl); + nir_builder builder = nir_builder_create(impl); nir_foreach_block(block, impl) { nir_foreach_instr(instr, block) { @@ -118,9 +118,7 @@ lima_nir_duplicate_load_consts_impl(nir_shader *shader, nir_function_impl *impl) void lima_nir_duplicate_load_consts(nir_shader *shader) { - nir_foreach_function(function, shader) { - if (function->impl) { - lima_nir_duplicate_load_consts_impl(shader, function->impl); - } + nir_foreach_function_impl(impl, shader) { + lima_nir_duplicate_load_consts_impl(shader, impl); } } diff --git a/src/gallium/drivers/lima/ir/lima_nir_duplicate_intrinsic.c b/src/gallium/drivers/lima/ir/lima_nir_duplicate_intrinsic.c index ecff28e525e..358352b3245 100644 --- a/src/gallium/drivers/lima/ir/lima_nir_duplicate_intrinsic.c +++ b/src/gallium/drivers/lima/ir/lima_nir_duplicate_intrinsic.c @@ -32,23 +32,19 @@ lima_nir_duplicate_intrinsic(nir_builder *b, nir_intrinsic_instr *itr, nir_intrinsic_instr *last_dupl = NULL; nir_instr *last_parent_instr = NULL; - nir_foreach_use_safe(use_src, &itr->dest.ssa) { + nir_foreach_use_safe(use_src, &itr->def) { nir_intrinsic_instr *dupl; - if (last_parent_instr != use_src->parent_instr) { + if (last_parent_instr != nir_src_parent_instr(use_src)) { /* if ssa use, clone for the target block */ - b->cursor = nir_before_instr(use_src->parent_instr); + b->cursor = nir_before_instr(nir_src_parent_instr(use_src)); dupl = nir_intrinsic_instr_create(b->shader, op); dupl->num_components = itr->num_components; memcpy(dupl->const_index, itr->const_index, sizeof(itr->const_index)); - dupl->src[0].is_ssa = itr->src[0].is_ssa; - if (itr->src[0].is_ssa) - dupl->src[0].ssa = itr->src[0].ssa; - else - dupl->src[0].reg = itr->src[0].reg; + dupl->src[0].ssa = itr->src[0].ssa; - nir_ssa_dest_init(&dupl->instr, &dupl->dest, - dupl->num_components, itr->dest.ssa.bit_size, NULL); + nir_def_init(&dupl->instr, &dupl->def, dupl->num_components, + itr->def.bit_size); dupl->instr.pass_flags = 1; nir_builder_instr_insert(b, &dupl->instr); @@ -57,31 +53,28 @@ lima_nir_duplicate_intrinsic(nir_builder *b, nir_intrinsic_instr *itr, dupl = last_dupl; } - nir_instr_rewrite_src(use_src->parent_instr, use_src, nir_src_for_ssa(&dupl->dest.ssa)); - last_parent_instr = use_src->parent_instr; + nir_src_rewrite(use_src, &dupl->def); + last_parent_instr = nir_src_parent_instr(use_src); last_dupl = dupl; } last_dupl = NULL; - last_parent_instr = NULL; + nir_if *last_parent_if = NULL; - nir_foreach_if_use_safe(use_src, &itr->dest.ssa) { + nir_foreach_if_use_safe(use_src, &itr->def) { nir_intrinsic_instr *dupl; + nir_if *nif = nir_src_parent_if(use_src); - if (last_parent_instr != use_src->parent_instr) { + if (last_parent_if != nif) { /* if 'if use', clone where it is */ b->cursor = nir_before_instr(&itr->instr); dupl = nir_intrinsic_instr_create(b->shader, op); dupl->num_components = itr->num_components; memcpy(dupl->const_index, itr->const_index, sizeof(itr->const_index)); - dupl->src[0].is_ssa = itr->src[0].is_ssa; - if (itr->src[0].is_ssa) - dupl->src[0].ssa = itr->src[0].ssa; - else - dupl->src[0].reg = itr->src[0].reg; + dupl->src[0].ssa = itr->src[0].ssa; - nir_ssa_dest_init(&dupl->instr, &dupl->dest, - dupl->num_components, itr->dest.ssa.bit_size, NULL); + nir_def_init(&dupl->instr, &dupl->def, dupl->num_components, + itr->def.bit_size); dupl->instr.pass_flags = 1; nir_builder_instr_insert(b, &dupl->instr); @@ -90,8 +83,8 @@ lima_nir_duplicate_intrinsic(nir_builder *b, nir_intrinsic_instr *itr, dupl = last_dupl; } - nir_if_rewrite_condition(use_src->parent_if, nir_src_for_ssa(&dupl->dest.ssa)); - last_parent_instr = use_src->parent_instr; + nir_src_rewrite(&nir_src_parent_if(use_src)->condition, &dupl->def); + last_parent_if = nif; last_dupl = dupl; } @@ -103,8 +96,7 @@ static void lima_nir_duplicate_intrinsic_impl(nir_shader *shader, nir_function_impl *impl, nir_intrinsic_op op) { - nir_builder builder; - nir_builder_init(&builder, impl); + nir_builder builder = nir_builder_create(impl); nir_foreach_block(block, impl) { nir_foreach_instr(instr, block) { @@ -123,9 +115,6 @@ lima_nir_duplicate_intrinsic_impl(nir_shader *shader, nir_function_impl *impl, if (itr->instr.pass_flags) continue; - if (!itr->dest.is_ssa) - continue; - lima_nir_duplicate_intrinsic(&builder, itr, op); } } @@ -140,10 +129,8 @@ lima_nir_duplicate_intrinsic_impl(nir_shader *shader, nir_function_impl *impl, void lima_nir_duplicate_load_uniforms(nir_shader *shader) { - nir_foreach_function(function, shader) { - if (function->impl) { - lima_nir_duplicate_intrinsic_impl(shader, function->impl, nir_intrinsic_load_uniform); - } + nir_foreach_function_impl(impl, shader) { + lima_nir_duplicate_intrinsic_impl(shader, impl, nir_intrinsic_load_uniform); } } @@ -153,9 +140,7 @@ lima_nir_duplicate_load_uniforms(nir_shader *shader) void lima_nir_duplicate_load_inputs(nir_shader *shader) { - nir_foreach_function(function, shader) { - if (function->impl) { - lima_nir_duplicate_intrinsic_impl(shader, function->impl, nir_intrinsic_load_input); - } + nir_foreach_function_impl(impl, shader) { + lima_nir_duplicate_intrinsic_impl(shader, impl, nir_intrinsic_load_input); } } diff --git a/src/gallium/drivers/lima/ir/lima_nir_lower_txp.c b/src/gallium/drivers/lima/ir/lima_nir_lower_txp.c new file mode 100644 index 00000000000..8ee6a4b3528 --- /dev/null +++ b/src/gallium/drivers/lima/ir/lima_nir_lower_txp.c @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2021 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" +#include "lima_ir.h" + +static nir_def * +get_proj_index(nir_instr *coord_instr, nir_instr *proj_instr, + int coord_components, int *proj_idx) +{ + *proj_idx = -1; + if (coord_instr->type != nir_instr_type_alu || + proj_instr->type != nir_instr_type_alu) + return NULL; + + nir_alu_instr *coord_alu = nir_instr_as_alu(coord_instr); + nir_alu_instr *proj_alu = nir_instr_as_alu(proj_instr); + + if (coord_alu->op != nir_op_mov || + proj_alu->op != nir_op_mov) + return NULL; + + nir_def *coord_src_ssa = coord_alu->src[0].src.ssa; + nir_def *proj_src_ssa = proj_alu->src[0].src.ssa; + + if (coord_src_ssa != proj_src_ssa) + return NULL; + + if (coord_src_ssa->parent_instr->type != nir_instr_type_intrinsic) + return NULL; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(coord_src_ssa->parent_instr); + if (intrin->intrinsic != nir_intrinsic_load_input) + return NULL; + + if (intrin->def.num_components != 4) + return NULL; + + /* Coords must be in .xyz */ + for (int i = 0; i < coord_components; i++) { + if (coord_alu->src[0].swizzle[i] != i) + return NULL; + } + + *proj_idx = proj_alu->src[0].swizzle[0]; + + return coord_src_ssa; +} + +static bool +lima_nir_lower_txp_instr(nir_builder *b, nir_instr *instr, + UNUSED void *cb_data) +{ + if (instr->type != nir_instr_type_tex) + return false; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + + int proj_idx = nir_tex_instr_src_index(tex, nir_tex_src_projector); + int coords_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord); + + if (proj_idx < 0) + return false; + + switch (tex->sampler_dim) { + case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_1D: + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_3D: + break; + default: + return false; + } + + b->cursor = nir_before_instr(&tex->instr); + + /* Merge coords and projector into single backend-specific source. + * It's easy if texture2DProj argument is vec3, it's more tricky with + * vec4 since NIR just drops Z component that we need, so we have to + * step back and use load_input SSA instead of mov as a source for + * newly constructed vec4 + */ + nir_def *proj_ssa = tex->src[proj_idx].src.ssa; + nir_def *coords_ssa = tex->src[coords_idx].src.ssa; + + int proj_idx_in_vec = -1; + nir_def *load_input = get_proj_index(coords_ssa->parent_instr, + proj_ssa->parent_instr, + tex->coord_components, + &proj_idx_in_vec); + nir_def *combined; + if (load_input && proj_idx_in_vec == 3) { + unsigned xyzw[] = { 0, 1, 2, 3 }; + combined = nir_swizzle(b, load_input, xyzw, 4); + tex->coord_components = 4; + } else if (load_input && proj_idx_in_vec == 2) { + unsigned xyz[] = { 0, 1, 2 }; + combined = nir_swizzle(b, load_input, xyz, 3); + tex->coord_components = 3; + } else { + switch (tex->coord_components) { + default: + case 1: + /* We still need vec3 for 1D textures, so duplicate coordinate */ + combined = nir_vec3(b, + nir_channel(b, coords_ssa, 0), + nir_channel(b, coords_ssa, 0), + nir_channel(b, proj_ssa, 0)); + tex->coord_components = 3; + break; + case 2: + combined = nir_vec3(b, + nir_channel(b, coords_ssa, 0), + nir_channel(b, coords_ssa, 1), + nir_channel(b, proj_ssa, 0)); + tex->coord_components = 3; + break; + case 3: + combined = nir_vec4(b, + nir_channel(b, coords_ssa, 0), + nir_channel(b, coords_ssa, 1), + nir_channel(b, coords_ssa, 2), + nir_channel(b, proj_ssa, 0)); + tex->coord_components = 4; + } + } + + nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_coord)); + nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_projector)); + nir_tex_instr_add_src(tex, nir_tex_src_backend1, combined); + + return true; +} + +bool +lima_nir_lower_txp(nir_shader *shader) +{ + return nir_shader_instructions_pass(shader, lima_nir_lower_txp_instr, + nir_metadata_block_index | + nir_metadata_dominance, + NULL); +} diff --git a/src/gallium/drivers/lima/ir/lima_nir_lower_uniform_to_scalar.c b/src/gallium/drivers/lima/ir/lima_nir_lower_uniform_to_scalar.c index f9d593f0417..4067746b574 100644 --- a/src/gallium/drivers/lima/ir/lima_nir_lower_uniform_to_scalar.c +++ b/src/gallium/drivers/lima/ir/lima_nir_lower_uniform_to_scalar.c @@ -31,12 +31,12 @@ lower_load_uniform_to_scalar(nir_builder *b, nir_intrinsic_instr *intr) { b->cursor = nir_before_instr(&intr->instr); - nir_ssa_def *loads[4]; + nir_def *loads[4]; for (unsigned i = 0; i < intr->num_components; i++) { nir_intrinsic_instr *chan_intr = nir_intrinsic_instr_create(b->shader, intr->intrinsic); - nir_ssa_dest_init(&chan_intr->instr, &chan_intr->dest, - 1, intr->dest.ssa.bit_size, NULL); + nir_def_init(&chan_intr->instr, &chan_intr->def, 1, + intr->def.bit_size); chan_intr->num_components = 1; nir_intrinsic_set_base(chan_intr, nir_intrinsic_base(intr) * 4 + i); @@ -48,10 +48,10 @@ lower_load_uniform_to_scalar(nir_builder *b, nir_intrinsic_instr *intr) nir_builder_instr_insert(b, &chan_intr->instr); - loads[i] = &chan_intr->dest.ssa; + loads[i] = &chan_intr->def; } - nir_ssa_def_rewrite_uses(&intr->dest.ssa, + nir_def_rewrite_uses(&intr->def, nir_vec(b, loads, intr->num_components)); nir_instr_remove(&intr->instr); } @@ -59,23 +59,20 @@ lower_load_uniform_to_scalar(nir_builder *b, nir_intrinsic_instr *intr) void lima_nir_lower_uniform_to_scalar(nir_shader *shader) { - nir_foreach_function(function, shader) { - if (function->impl) { - nir_builder b; - nir_builder_init(&b, function->impl); + nir_foreach_function_impl(impl, shader) { + nir_builder b = nir_builder_create(impl); - nir_foreach_block(block, function->impl) { - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - if (intr->intrinsic != nir_intrinsic_load_uniform) - continue; + if (intr->intrinsic != nir_intrinsic_load_uniform) + continue; - lower_load_uniform_to_scalar(&b, intr); - } + lower_load_uniform_to_scalar(&b, intr); } } } diff --git a/src/gallium/drivers/lima/ir/lima_nir_split_load_input.c b/src/gallium/drivers/lima/ir/lima_nir_split_load_input.c index dbdb3a81c64..c061c88cdad 100644 --- a/src/gallium/drivers/lima/ir/lima_nir_split_load_input.c +++ b/src/gallium/drivers/lima/ir/lima_nir_split_load_input.c @@ -27,86 +27,63 @@ #include "lima_ir.h" static bool -lima_nir_split_load_input_block(nir_block *block, nir_builder *b) +lima_nir_split_load_input_instr(nir_builder *b, + nir_instr *instr, + UNUSED void *cb_data) { - bool progress = false; - - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_alu) - continue; - - nir_alu_instr *alu = nir_instr_as_alu(instr); - if (alu->op != nir_op_mov) - continue; - - if (!alu->dest.dest.is_ssa) - continue; - - if (!alu->src[0].src.is_ssa) - continue; - - nir_ssa_def *ssa = alu->src[0].src.ssa; - if (ssa->parent_instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(ssa->parent_instr); - if (intrin->intrinsic != nir_intrinsic_load_input) - continue; - - uint8_t swizzle = alu->src[0].swizzle[0]; - int i; - - for (i = 1; i < nir_dest_num_components(alu->dest.dest); i++) - if (alu->src[0].swizzle[i] != (swizzle + i)) - break; - - if (i != nir_dest_num_components(alu->dest.dest)) - continue; - - /* mali4xx can't access unaligned vec3, don't split load input */ - if (nir_dest_num_components(alu->dest.dest) == 3 && swizzle > 0) - continue; - - b->cursor = nir_before_instr(&intrin->instr); - nir_intrinsic_instr *new_intrin = nir_intrinsic_instr_create( - b->shader, - intrin->intrinsic); - nir_ssa_dest_init(&new_intrin->instr, &new_intrin->dest, - nir_dest_num_components(alu->dest.dest), - ssa->bit_size, - NULL); - new_intrin->num_components = nir_dest_num_components(alu->dest.dest); - nir_intrinsic_set_base(new_intrin, nir_intrinsic_base(intrin)); - nir_intrinsic_set_component(new_intrin, nir_intrinsic_component(intrin) + swizzle); - nir_intrinsic_set_dest_type(new_intrin, nir_intrinsic_dest_type(intrin)); - - /* offset */ - nir_src_copy(&new_intrin->src[0], &intrin->src[0]); - - nir_builder_instr_insert(b, &new_intrin->instr); - nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, - &new_intrin->dest.ssa); - nir_instr_remove(&alu->instr); - progress = true; - } - - return progress; -} - -static bool -lima_nir_split_load_input_impl(nir_function_impl *impl) -{ - bool progress = false; - nir_builder builder; - nir_builder_init(&builder, impl); - - nir_foreach_block(block, impl) { - progress |= lima_nir_split_load_input_block(block, &builder); - } - - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - return progress; + if (instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + if (alu->op != nir_op_mov) + return false; + + nir_def *ssa = alu->src[0].src.ssa; + if (ssa->parent_instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(ssa->parent_instr); + if (intrin->intrinsic != nir_intrinsic_load_input) + return false; + + uint8_t swizzle = alu->src[0].swizzle[0]; + int i; + + for (i = 1; i < alu->def.num_components; i++) + if (alu->src[0].swizzle[i] != (swizzle + i)) + break; + + if (i != alu->def.num_components) + return false; + + /* mali4xx can't access unaligned vec3, don't split load input */ + if (alu->def.num_components == 3 && swizzle > 0) + return false; + + /* mali4xx can't access unaligned vec2, don't split load input */ + if (alu->def.num_components == 2 && + swizzle != 0 && swizzle != 2) + return false; + + b->cursor = nir_before_instr(&intrin->instr); + nir_intrinsic_instr *new_intrin = nir_intrinsic_instr_create( + b->shader, + intrin->intrinsic); + nir_def_init(&new_intrin->instr, &new_intrin->def, + alu->def.num_components, ssa->bit_size); + new_intrin->num_components = alu->def.num_components; + nir_intrinsic_set_base(new_intrin, nir_intrinsic_base(intrin)); + nir_intrinsic_set_component(new_intrin, nir_intrinsic_component(intrin) + swizzle); + nir_intrinsic_set_dest_type(new_intrin, nir_intrinsic_dest_type(intrin)); + + /* offset */ + new_intrin->src[0] = nir_src_for_ssa(intrin->src[0].ssa); + + nir_builder_instr_insert(b, &new_intrin->instr); + nir_def_rewrite_uses(&alu->def, + &new_intrin->def); + nir_instr_remove(&alu->instr); + return true; } /* Replaces a single load of several packed varyings and number of movs with @@ -115,13 +92,8 @@ lima_nir_split_load_input_impl(nir_function_impl *impl) bool lima_nir_split_load_input(nir_shader *shader) { - bool progress = false; - - nir_foreach_function(function, shader) { - if (function->impl) - progress |= lima_nir_split_load_input_impl(function->impl); - } - - return progress; + return nir_shader_instructions_pass(shader, lima_nir_split_load_input_instr, + nir_metadata_block_index | + nir_metadata_dominance, + NULL); } - diff --git a/src/gallium/drivers/lima/ir/lima_nir_split_loads.c b/src/gallium/drivers/lima/ir/lima_nir_split_loads.c index 75707280c7b..fb6e4451753 100644 --- a/src/gallium/drivers/lima/ir/lima_nir_split_loads.c +++ b/src/gallium/drivers/lima/ir/lima_nir_split_loads.c @@ -36,59 +36,45 @@ * down but won't split it. */ -static nir_ssa_def * +static nir_def * clone_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin) { nir_intrinsic_instr *new_intrin = nir_instr_as_intrinsic(nir_instr_clone(b->shader, &intrin->instr)); - assert(new_intrin->dest.is_ssa); - - unsigned num_srcs = nir_intrinsic_infos[new_intrin->intrinsic].num_srcs; - for (unsigned i = 0; i < num_srcs; i++) { - assert(new_intrin->src[i].is_ssa); - } - nir_builder_instr_insert(b, &new_intrin->instr); - return &new_intrin->dest.ssa; + return &new_intrin->def; } static bool replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin) { - if (!intrin->dest.is_ssa) - return false; - if (intrin->intrinsic != nir_intrinsic_load_input && intrin->intrinsic != nir_intrinsic_load_uniform) return false; - if (!intrin->src[0].is_ssa) - return false; - if (intrin->src[0].ssa->parent_instr->type == nir_instr_type_load_const) return false; struct hash_table *visited_instrs = _mesa_pointer_hash_table_create(NULL); - nir_foreach_use_safe(src, &intrin->dest.ssa) { + nir_foreach_use_safe(src, &intrin->def) { struct hash_entry *entry = - _mesa_hash_table_search(visited_instrs, src->parent_instr); - if (entry && (src->parent_instr->type != nir_instr_type_phi)) { - nir_ssa_def *def = entry->data; - nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(def)); + _mesa_hash_table_search(visited_instrs, nir_src_parent_instr(src)); + if (entry && (nir_src_parent_instr(src)->type != nir_instr_type_phi)) { + nir_def *def = entry->data; + nir_src_rewrite(src, def); continue; } - b->cursor = nir_before_src(src, false); - nir_ssa_def *new = clone_intrinsic(b, intrin); - nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(new)); - _mesa_hash_table_insert(visited_instrs, src->parent_instr, new); + b->cursor = nir_before_src(src); + nir_def *new = clone_intrinsic(b, intrin); + nir_src_rewrite(src, new); + _mesa_hash_table_insert(visited_instrs, nir_src_parent_instr(src), new); } - nir_foreach_if_use_safe(src, &intrin->dest.ssa) { - b->cursor = nir_before_src(src, true); - nir_if_rewrite_condition(src->parent_if, - nir_src_for_ssa(clone_intrinsic(b, intrin))); + nir_foreach_if_use_safe(src, &intrin->def) { + b->cursor = nir_before_src(src); + nir_src_rewrite(&nir_src_parent_if(src)->condition, clone_intrinsic(b, intrin)); } nir_instr_remove(&intrin->instr); @@ -103,18 +89,18 @@ replace_load_const(nir_builder *b, nir_load_const_instr *load_const) nir_foreach_use_safe(src, &load_const->def) { struct hash_entry *entry = - _mesa_hash_table_search(visited_instrs, src->parent_instr); - if (entry && (src->parent_instr->type != nir_instr_type_phi)) { - nir_ssa_def *def = entry->data; - nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(def)); + _mesa_hash_table_search(visited_instrs, nir_src_parent_instr(src)); + if (entry && (nir_src_parent_instr(src)->type != nir_instr_type_phi)) { + nir_def *def = entry->data; + nir_src_rewrite(src, def); continue; } - b->cursor = nir_before_src(src, false); - nir_ssa_def *new = nir_build_imm(b, load_const->def.num_components, + b->cursor = nir_before_src(src); + nir_def *new = nir_build_imm(b, load_const->def.num_components, load_const->def.bit_size, load_const->value); - nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(new)); - _mesa_hash_table_insert(visited_instrs, src->parent_instr, new); + nir_src_rewrite(src, new); + _mesa_hash_table_insert(visited_instrs, nir_src_parent_instr(src), new); } nir_instr_remove(&load_const->instr); @@ -126,19 +112,16 @@ lima_nir_split_loads(nir_shader *shader) { bool progress = false; - nir_foreach_function(function, shader) { - if (function->impl) { - nir_builder b; - nir_builder_init(&b, function->impl); - - nir_foreach_block_reverse(block, function->impl) { - nir_foreach_instr_reverse_safe(instr, block) { - if (instr->type == nir_instr_type_load_const) { - replace_load_const(&b, nir_instr_as_load_const(instr)); - progress = true; - } else if (instr->type == nir_instr_type_intrinsic) { - progress |= replace_intrinsic(&b, nir_instr_as_intrinsic(instr)); - } + nir_foreach_function_impl(impl, shader) { + nir_builder b = nir_builder_create(impl); + + nir_foreach_block_reverse(block, impl) { + nir_foreach_instr_reverse_safe(instr, block) { + if (instr->type == nir_instr_type_load_const) { + replace_load_const(&b, nir_instr_as_load_const(instr)); + progress = true; + } else if (instr->type == nir_instr_type_intrinsic) { + progress |= replace_intrinsic(&b, nir_instr_as_intrinsic(instr)); } } } diff --git a/src/gallium/drivers/lima/ir/pp/codegen.c b/src/gallium/drivers/lima/ir/pp/codegen.c index 47ceb183677..b043bd46bd8 100644 --- a/src/gallium/drivers/lima/ir/pp/codegen.c +++ b/src/gallium/drivers/lima/ir/pp/codegen.c @@ -91,8 +91,20 @@ static void ppir_codegen_encode_varying(ppir_node *node, void *code) f->imm.perspective = 1; break; case ppir_op_load_coords: - /* num_components == 3 implies cubemap as we don't support 3D textures */ - f->imm.source_type = num_components == 3 ? 2 : 0; + if (load->sampler_dim == GLSL_SAMPLER_DIM_CUBE) + f->imm.source_type = 2; + + switch (load->perspective) { + case ppir_perspective_none: + f->imm.perspective = 0; + break; + case ppir_perspective_z: + f->imm.perspective = 2; + break; + case ppir_perspective_w: + f->imm.perspective = 3; + break; + } break; default: break; @@ -103,12 +115,22 @@ static void ppir_codegen_encode_varying(ppir_node *node, void *code) f->reg.mask = dest->write_mask << (index & 0x3); if (load->num_src) { - /* num_components == 3 implies cubemap as we don't support 3D textures */ - if (num_components == 3) { + if (load->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { f->reg.source_type = 2; f->reg.perspective = 1; } else { f->reg.source_type = 1; + switch (load->perspective) { + case ppir_perspective_none: + f->reg.perspective = 0; + break; + case ppir_perspective_z: + f->reg.perspective = 2; + break; + case ppir_perspective_w: + f->reg.perspective = 3; + break; + } } ppir_src *src = &load->src; index = ppir_target_get_src_reg_index(src); @@ -134,9 +156,10 @@ static void ppir_codegen_encode_texld(ppir_node *node, void *code) switch (ldtex->sampler_dim) { case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_3D: case GLSL_SAMPLER_DIM_RECT: case GLSL_SAMPLER_DIM_EXTERNAL: - f->type = ppir_codegen_sampler_type_2d; + f->type = ppir_codegen_sampler_type_generic; break; case GLSL_SAMPLER_DIM_CUBE: f->type = ppir_codegen_sampler_type_cube; @@ -175,6 +198,22 @@ static void ppir_codegen_encode_uniform(ppir_node *node, void *code) } } +static ppir_codegen_outmod ppir_codegen_get_outmod(ppir_outmod outmod) +{ + switch (outmod) { + case ppir_outmod_none: + return ppir_codegen_outmod_none; + case ppir_outmod_clamp_fraction: + return ppir_codegen_outmod_clamp_fraction; + case ppir_outmod_clamp_positive: + return ppir_codegen_outmod_clamp_positive; + case ppir_outmod_round: + return ppir_codegen_outmod_round; + default: + unreachable("invalid ppir_outmod"); + } +} + static unsigned shift_to_op(int shift) { assert(shift >= -3 && shift <= 3); @@ -194,7 +233,7 @@ static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code) f->dest = index >> 2; f->mask = dest->write_mask << dest_shift; } - f->dest_modifier = dest->modifier; + f->dest_modifier = ppir_codegen_get_outmod(dest->modifier); switch (node->op) { case ppir_op_mul: @@ -267,7 +306,7 @@ static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code) f->dest = ppir_target_get_dest_reg_index(dest) + dest_component; f->output_en = true; } - f->dest_modifier = dest->modifier; + f->dest_modifier = ppir_codegen_get_outmod(dest->modifier); switch (node->op) { case ppir_op_mul: @@ -333,7 +372,7 @@ static void ppir_codegen_encode_vec_add(ppir_node *node, void *code) int dest_shift = index & 0x3; f->dest = index >> 2; f->mask = dest->write_mask << dest_shift; - f->dest_modifier = dest->modifier; + f->dest_modifier = ppir_codegen_get_outmod(dest->modifier); switch (node->op) { case ppir_op_add: @@ -423,7 +462,7 @@ static void ppir_codegen_encode_scl_add(ppir_node *node, void *code) f->dest = ppir_target_get_dest_reg_index(dest) + dest_component; f->output_en = true; - f->dest_modifier = dest->modifier; + f->dest_modifier = ppir_codegen_get_outmod(dest->modifier); switch (node->op) { case ppir_op_add: @@ -509,7 +548,7 @@ static void ppir_codegen_encode_combine(ppir_node *node, void *code) int dest_component = ffs(dest->write_mask) - 1; assert(dest_component >= 0); f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component; - f->scalar.dest_modifier = dest->modifier; + f->scalar.dest_modifier = ppir_codegen_get_outmod(dest->modifier); ppir_src *src = alu->src; f->scalar.arg0_src = get_scl_reg_index(src, dest_component); @@ -620,7 +659,7 @@ static void ppir_codegen_encode_branch(ppir_node *node, void *code) while (list_is_empty(&target->instr_list)) { if (!target->list.next) break; - target = LIST_ENTRY(ppir_block, target->list.next, list); + target = list_entry(target->list.next, ppir_block, list); } assert(!list_is_empty(&target->instr_list)); @@ -674,13 +713,13 @@ static int get_instr_encode_size(ppir_instr *instr) static void bitcopy(void *dst, int dst_offset, void *src, int src_size) { - int off1 = dst_offset & 0x1f; - uint32_t *cpy_dst = dst, *cpy_src = src; + unsigned char *cpy_dst = dst, *cpy_src = src; + int off1 = dst_offset & 0x07; - cpy_dst += (dst_offset >> 5); + cpy_dst += (dst_offset >> 3); if (off1) { - int off2 = 32 - off1; + int off2 = 0x08 - off1; int cpy_size = 0; while (1) { *cpy_dst |= *cpy_src << off1; @@ -750,7 +789,7 @@ static int encode_instr(ppir_instr *instr, void *code, void *last_code) size = align_to_word(size) + 1; ctrl->count = size; - if (instr->is_end) + if (instr->stop) ctrl->stop = true; if (last_code) { @@ -778,7 +817,7 @@ static void ppir_codegen_print_prog(ppir_compiler *comp) printf("%08x ", prog[i]); } printf("\n"); - ppir_disassemble_instr(prog, offset); + ppir_disassemble_instr(prog, offset, stdout); prog += n; offset += n; } @@ -795,6 +834,11 @@ bool ppir_codegen_prog(ppir_compiler *comp) instr->encode_size = get_instr_encode_size(instr); size += instr->encode_size; } + /* Set stop flag for the last instruction if block has stop flag */ + if (block->stop) { + ppir_instr *instr = list_last_entry(&block->instr_list, ppir_instr, list); + instr->stop = true; + } } uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t)); diff --git a/src/gallium/drivers/lima/ir/pp/codegen.h b/src/gallium/drivers/lima/ir/pp/codegen.h index bf2541f1a8f..dbd0c8f654b 100644 --- a/src/gallium/drivers/lima/ir/pp/codegen.h +++ b/src/gallium/drivers/lima/ir/pp/codegen.h @@ -104,8 +104,8 @@ typedef union __attribute__((__packed__)) { } ppir_codegen_field_varying; typedef enum { - ppir_codegen_sampler_type_2d = 0x00, - ppir_codegen_sampler_type_cube = 0x1F, + ppir_codegen_sampler_type_generic = 0x00, + ppir_codegen_sampler_type_cube = 0x1F, } ppir_codegen_sampler_type; typedef struct __attribute__((__packed__)) { @@ -355,6 +355,6 @@ typedef union __attribute__((__packed__)) { } discard; } ppir_codegen_field_branch; -void ppir_disassemble_instr(uint32_t *instr, unsigned offset); +void ppir_disassemble_instr(uint32_t *instr, unsigned offset, FILE *fp); #endif diff --git a/src/gallium/drivers/lima/ir/pp/disasm.c b/src/gallium/drivers/lima/ir/pp/disasm.c index 50aa4cbb852..81d4016a090 100644 --- a/src/gallium/drivers/lima/ir/pp/disasm.c +++ b/src/gallium/drivers/lima/ir/pp/disasm.c @@ -35,51 +35,51 @@ typedef struct { } asm_op; static void -print_swizzle(uint8_t swizzle) +print_swizzle(uint8_t swizzle, FILE *fp) { if (swizzle == 0xE4) return; - printf("."); + fprintf(fp, "."); for (unsigned i = 0; i < 4; i++, swizzle >>= 2) - printf("%c", "xyzw"[swizzle & 3]); + fprintf(fp, "%c", "xyzw"[swizzle & 3]); } static void -print_mask(uint8_t mask) +print_mask(uint8_t mask, FILE *fp) { if (mask == 0xF) return; - printf("."); - if (mask & 1) printf("x"); - if (mask & 2) printf("y"); - if (mask & 4) printf("z"); - if (mask & 8) printf("w"); + fprintf(fp, "."); + if (mask & 1) fprintf(fp, "x"); + if (mask & 2) fprintf(fp, "y"); + if (mask & 4) fprintf(fp, "z"); + if (mask & 8) fprintf(fp, "w"); } static void -print_reg(ppir_codegen_vec4_reg reg, const char *special) +print_reg(ppir_codegen_vec4_reg reg, const char *special, FILE *fp) { if (special) { - printf("%s", special); + fprintf(fp, "%s", special); } else { switch (reg) { case ppir_codegen_vec4_reg_constant0: - printf("^const0"); + fprintf(fp, "^const0"); break; case ppir_codegen_vec4_reg_constant1: - printf("^const1"); + fprintf(fp, "^const1"); break; case ppir_codegen_vec4_reg_texture: - printf("^texture"); + fprintf(fp, "^texture"); break; case ppir_codegen_vec4_reg_uniform: - printf("^uniform"); + fprintf(fp, "^uniform"); break; default: - printf("$%u", reg); + fprintf(fp, "$%u", reg); break; } } @@ -87,75 +87,75 @@ print_reg(ppir_codegen_vec4_reg reg, const char *special) static void print_vector_source(ppir_codegen_vec4_reg reg, const char *special, - uint8_t swizzle, bool abs, bool neg) + uint8_t swizzle, bool abs, bool neg, FILE *fp) { if (neg) - printf("-"); + fprintf(fp, "-"); if (abs) - printf("abs("); + fprintf(fp, "abs("); - print_reg(reg, special); - print_swizzle(swizzle); + print_reg(reg, special, fp); + print_swizzle(swizzle, fp); if (abs) - printf(")"); + fprintf(fp, ")"); } static void -print_source_scalar(unsigned reg, const char *special, bool abs, bool neg) +print_source_scalar(unsigned reg, const char *special, bool abs, bool neg, FILE *fp) { if (neg) - printf("-"); + fprintf(fp, "-"); if (abs) - printf("abs("); + fprintf(fp, "abs("); - print_reg(reg >> 2, special); + print_reg(reg >> 2, special, fp); if (!special) - printf(".%c", "xyzw"[reg & 3]); + fprintf(fp, ".%c", "xyzw"[reg & 3]); if (abs) - printf(")"); + fprintf(fp, ")"); } static void -print_varying_source(ppir_codegen_field_varying *varying) +print_varying_source(ppir_codegen_field_varying *varying, FILE *fp) { switch (varying->imm.alignment) { case 0: - printf("%u.%c", varying->imm.index >> 2, + fprintf(fp, "%u.%c", varying->imm.index >> 2, "xyzw"[varying->imm.index & 3]); break; case 1: { const char *c[2] = {"xy", "zw"}; - printf("%u.%s", varying->imm.index >> 1, c[varying->imm.index & 1]); + fprintf(fp, "%u.%s", varying->imm.index >> 1, c[varying->imm.index & 1]); break; } default: - printf("%u", varying->imm.index); + fprintf(fp, "%u", varying->imm.index); break; } if (varying->imm.offset_vector != 15) { unsigned reg = (varying->imm.offset_vector << 2) + varying->imm.offset_scalar; - printf("+"); - print_source_scalar(reg, NULL, false, false); + fprintf(fp, "+"); + print_source_scalar(reg, NULL, false, false, fp); } } static void -print_outmod(ppir_codegen_outmod modifier) +print_outmod(ppir_codegen_outmod modifier, FILE *fp) { switch (modifier) { case ppir_codegen_outmod_clamp_fraction: - printf(".sat"); + fprintf(fp, ".sat"); break; case ppir_codegen_outmod_clamp_positive: - printf(".pos"); + fprintf(fp, ".pos"); break; case ppir_codegen_outmod_round: - printf(".int"); + fprintf(fp, ".int"); break; default: break; @@ -163,190 +163,189 @@ print_outmod(ppir_codegen_outmod modifier) } static void -print_dest_scalar(unsigned reg) +print_dest_scalar(unsigned reg, FILE *fp) { - printf("$%u", reg >> 2); - printf(".%c ", "xyzw"[reg & 3]); + fprintf(fp, "$%u", reg >> 2); + fprintf(fp, ".%c ", "xyzw"[reg & 3]); } static void -print_const(unsigned const_num, uint16_t *val) +print_const(unsigned const_num, uint16_t *val, FILE *fp) { - printf("const%u", const_num); + fprintf(fp, "const%u", const_num); for (unsigned i = 0; i < 4; i++) - printf(" %f", _mesa_half_to_float(val[i])); + fprintf(fp, " %f", _mesa_half_to_float(val[i])); } static void -print_const0(void *code, unsigned offset) +print_const0(void *code, unsigned offset, FILE *fp) { (void) offset; - print_const(0, code); + print_const(0, code, fp); } static void -print_const1(void *code, unsigned offset) +print_const1(void *code, unsigned offset, FILE *fp) { (void) offset; - print_const(1, code); + print_const(1, code, fp); } static void -print_varying(void *code, unsigned offset) +print_varying(void *code, unsigned offset, FILE *fp) { (void) offset; ppir_codegen_field_varying *varying = code; - printf("load"); + fprintf(fp, "load"); bool perspective = varying->imm.source_type < 2 && varying->imm.perspective; if (perspective) { - printf(".perspective"); + fprintf(fp, ".perspective"); switch (varying->imm.perspective) { case 2: - printf(".z"); + fprintf(fp, ".z"); break; case 3: - printf(".w"); + fprintf(fp, ".w"); break; default: - printf(".unknown"); + fprintf(fp, ".unknown"); break; } } - printf(".v "); + fprintf(fp, ".v "); switch (varying->imm.dest) { case ppir_codegen_vec4_reg_discard: - printf("^discard"); + fprintf(fp, "^discard"); break; default: - printf("$%u", varying->imm.dest); + fprintf(fp, "$%u", varying->imm.dest); break; } - print_mask(varying->imm.mask); - printf(" "); + print_mask(varying->imm.mask, fp); + fprintf(fp, " "); switch (varying->imm.source_type) { case 1: print_vector_source(varying->reg.source, NULL, varying->reg.swizzle, - varying->reg.absolute, varying->reg.negate); + varying->reg.absolute, varying->reg.negate, fp); break; case 2: switch (varying->imm.perspective) { case 0: - printf("cube("); - print_varying_source(varying); - printf(")"); + fprintf(fp, "cube("); + print_varying_source(varying, fp); + fprintf(fp, ")"); break; case 1: - printf("cube("); + fprintf(fp, "cube("); print_vector_source(varying->reg.source, NULL, varying->reg.swizzle, - varying->reg.absolute, varying->reg.negate); - printf(")"); + varying->reg.absolute, varying->reg.negate, fp); + fprintf(fp, ")"); break; case 2: - printf("normalize("); + fprintf(fp, "normalize("); print_vector_source(varying->reg.source, NULL, varying->reg.swizzle, - varying->reg.absolute, varying->reg.negate); - printf(")"); + varying->reg.absolute, varying->reg.negate, fp); + fprintf(fp, ")"); break; default: - printf("gl_FragCoord"); + fprintf(fp, "gl_FragCoord"); break; } break; case 3: if (varying->imm.perspective) - printf("gl_FrontFacing"); + fprintf(fp, "gl_FrontFacing"); else - printf("gl_PointCoord"); + fprintf(fp, "gl_PointCoord"); break; default: - print_varying_source(varying); + print_varying_source(varying, fp); break; } } static void -print_sampler(void *code, unsigned offset) +print_sampler(void *code, unsigned offset, FILE *fp) { (void) offset; ppir_codegen_field_sampler *sampler = code; - printf("texld"); + fprintf(fp, "texld"); if (sampler->lod_bias_en) - printf(".b"); + fprintf(fp, ".b"); switch (sampler->type) { - case ppir_codegen_sampler_type_2d: - printf(".2d"); + case ppir_codegen_sampler_type_generic: break; case ppir_codegen_sampler_type_cube: - printf(".cube"); + fprintf(fp, ".cube"); break; default: - printf("_t%u", sampler->type); + fprintf(fp, "_t%u", sampler->type); break; } - printf(" %u", sampler->index); + fprintf(fp, " %u", sampler->index); if (sampler->offset_en) { - printf("+"); - print_source_scalar(sampler->index_offset, NULL, false, false); + fprintf(fp, "+"); + print_source_scalar(sampler->index_offset, NULL, false, false, fp); } if (sampler->lod_bias_en) { - printf(" "); - print_source_scalar(sampler->lod_bias, NULL, false, false); + fprintf(fp, " "); + print_source_scalar(sampler->lod_bias, NULL, false, false, fp); } } static void -print_uniform(void *code, unsigned offset) +print_uniform(void *code, unsigned offset, FILE *fp) { (void) offset; ppir_codegen_field_uniform *uniform = code; - printf("load."); + fprintf(fp, "load."); switch (uniform->source) { case ppir_codegen_uniform_src_uniform: - printf("u"); + fprintf(fp, "u"); break; case ppir_codegen_uniform_src_temporary: - printf("t"); + fprintf(fp, "t"); break; default: - printf(".u%u", uniform->source); + fprintf(fp, ".u%u", uniform->source); break; } int16_t index = uniform->index; switch (uniform->alignment) { case 2: - printf(" %d", index); + fprintf(fp, " %d", index); break; case 1: - printf(" %d.%s", index / 2, (index & 1) ? "zw" : "xy"); + fprintf(fp, " %d.%s", index / 2, (index & 1) ? "zw" : "xy"); break; default: - printf(" %d.%c", index / 4, "xyzw"[index & 3]); + fprintf(fp, " %d.%c", index / 4, "xyzw"[index & 3]); break; } if (uniform->offset_en) { - printf("+"); - print_source_scalar(uniform->offset_reg, NULL, false, false); + fprintf(fp, "+"); + print_source_scalar(uniform->offset_reg, NULL, false, false, fp); } } @@ -377,7 +376,7 @@ static const asm_op vec4_mul_ops[] = { #undef CASE static void -print_vec4_mul(void *code, unsigned offset) +print_vec4_mul(void *code, unsigned offset, FILE *fp) { (void) offset; ppir_codegen_field_vec4_mul *vec4_mul = code; @@ -385,34 +384,34 @@ print_vec4_mul(void *code, unsigned offset) asm_op op = vec4_mul_ops[vec4_mul->op]; if (op.name) - printf("%s", op.name); + fprintf(fp, "%s", op.name); else - printf("op%u", vec4_mul->op); - print_outmod(vec4_mul->dest_modifier); - printf(".v0 "); + fprintf(fp, "op%u", vec4_mul->op); + print_outmod(vec4_mul->dest_modifier, fp); + fprintf(fp, ".v0 "); if (vec4_mul->mask) { - printf("$%u", vec4_mul->dest); - print_mask(vec4_mul->mask); - printf(" "); + fprintf(fp, "$%u", vec4_mul->dest); + print_mask(vec4_mul->mask, fp); + fprintf(fp, " "); } print_vector_source(vec4_mul->arg0_source, NULL, vec4_mul->arg0_swizzle, vec4_mul->arg0_absolute, - vec4_mul->arg0_negate); + vec4_mul->arg0_negate, fp); if (vec4_mul->op < 8 && vec4_mul->op != 0) { - printf("<<%u", vec4_mul->op); + fprintf(fp, "<<%u", vec4_mul->op); } - printf(" "); + fprintf(fp, " "); if (op.srcs > 1) { print_vector_source(vec4_mul->arg1_source, NULL, vec4_mul->arg1_swizzle, vec4_mul->arg1_absolute, - vec4_mul->arg1_negate); + vec4_mul->arg1_negate, fp); } } @@ -444,7 +443,7 @@ static const asm_op vec4_acc_ops[] = { #undef CASE static void -print_vec4_acc(void *code, unsigned offset) +print_vec4_acc(void *code, unsigned offset, FILE *fp) { (void) offset; ppir_codegen_field_vec4_acc *vec4_acc = code; @@ -452,29 +451,29 @@ print_vec4_acc(void *code, unsigned offset) asm_op op = vec4_acc_ops[vec4_acc->op]; if (op.name) - printf("%s", op.name); + fprintf(fp, "%s", op.name); else - printf("op%u", vec4_acc->op); - print_outmod(vec4_acc->dest_modifier); - printf(".v1 "); + fprintf(fp, "op%u", vec4_acc->op); + print_outmod(vec4_acc->dest_modifier, fp); + fprintf(fp, ".v1 "); if (vec4_acc->mask) { - printf("$%u", vec4_acc->dest); - print_mask(vec4_acc->mask); - printf(" "); + fprintf(fp, "$%u", vec4_acc->dest); + print_mask(vec4_acc->mask, fp); + fprintf(fp, " "); } print_vector_source(vec4_acc->arg0_source, vec4_acc->mul_in ? "^v0" : NULL, vec4_acc->arg0_swizzle, vec4_acc->arg0_absolute, - vec4_acc->arg0_negate); + vec4_acc->arg0_negate, fp); if (op.srcs > 1) { - printf(" "); + fprintf(fp, " "); print_vector_source(vec4_acc->arg1_source, NULL, vec4_acc->arg1_swizzle, vec4_acc->arg1_absolute, - vec4_acc->arg1_negate); + vec4_acc->arg1_negate, fp); } } @@ -505,7 +504,7 @@ static const asm_op float_mul_ops[] = { #undef CASE static void -print_float_mul(void *code, unsigned offset) +print_float_mul(void *code, unsigned offset, FILE *fp) { (void) offset; ppir_codegen_field_float_mul *float_mul = code; @@ -513,29 +512,29 @@ print_float_mul(void *code, unsigned offset) asm_op op = float_mul_ops[float_mul->op]; if (op.name) - printf("%s", op.name); + fprintf(fp, "%s", op.name); else - printf("op%u", float_mul->op); - print_outmod(float_mul->dest_modifier); - printf(".s0 "); + fprintf(fp, "op%u", float_mul->op); + print_outmod(float_mul->dest_modifier, fp); + fprintf(fp, ".s0 "); if (float_mul->output_en) - print_dest_scalar(float_mul->dest); + print_dest_scalar(float_mul->dest, fp); print_source_scalar(float_mul->arg0_source, NULL, float_mul->arg0_absolute, - float_mul->arg0_negate); + float_mul->arg0_negate, fp); if (float_mul->op < 8 && float_mul->op != 0) { - printf("<<%u", float_mul->op); + fprintf(fp, "<<%u", float_mul->op); } if (op.srcs > 1) { - printf(" "); + fprintf(fp, " "); print_source_scalar(float_mul->arg1_source, NULL, float_mul->arg1_absolute, - float_mul->arg1_negate); + float_mul->arg1_negate, fp); } } @@ -565,7 +564,7 @@ static const asm_op float_acc_ops[] = { #undef CASE static void -print_float_acc(void *code, unsigned offset) +print_float_acc(void *code, unsigned offset, FILE *fp) { (void) offset; ppir_codegen_field_float_acc *float_acc = code; @@ -573,24 +572,24 @@ print_float_acc(void *code, unsigned offset) asm_op op = float_acc_ops[float_acc->op]; if (op.name) - printf("%s", op.name); + fprintf(fp, "%s", op.name); else - printf("op%u", float_acc->op); - print_outmod(float_acc->dest_modifier); - printf(".s1 "); + fprintf(fp, "op%u", float_acc->op); + print_outmod(float_acc->dest_modifier, fp); + fprintf(fp, ".s1 "); if (float_acc->output_en) - print_dest_scalar(float_acc->dest); + print_dest_scalar(float_acc->dest, fp); print_source_scalar(float_acc->arg0_source, float_acc->mul_in ? "^s0" : NULL, float_acc->arg0_absolute, - float_acc->arg0_negate); + float_acc->arg0_negate, fp); if (op.srcs > 1) { - printf(" "); + fprintf(fp, " "); print_source_scalar(float_acc->arg1_source, NULL, float_acc->arg1_absolute, - float_acc->arg1_negate); + float_acc->arg1_negate, fp); } } @@ -616,7 +615,7 @@ static const asm_op combine_ops[] = { #undef CASE static void -print_combine(void *code, unsigned offset) +print_combine(void *code, unsigned offset, FILE *fp) { (void) offset; ppir_codegen_field_combine *combine = code; @@ -626,105 +625,104 @@ print_combine(void *code, unsigned offset) /* This particular combination can only be valid for scalar * vector * multiplies, and the opcode field is reused for something else. */ - printf("mul"); + fprintf(fp, "mul"); } else { asm_op op = combine_ops[combine->scalar.op]; if (op.name) - printf("%s", op.name); + fprintf(fp, "%s", op.name); else - printf("op%u", combine->scalar.op); + fprintf(fp, "op%u", combine->scalar.op); } if (!combine->scalar.dest_vec) - print_outmod(combine->scalar.dest_modifier); - printf(".s2 "); + print_outmod(combine->scalar.dest_modifier, fp); + fprintf(fp, ".s2 "); if (combine->scalar.dest_vec) { - printf("$%u", combine->vector.dest); - print_mask(combine->vector.mask); + fprintf(fp, "$%u", combine->vector.dest); + print_mask(combine->vector.mask, fp); } else { - print_dest_scalar(combine->scalar.dest); + print_dest_scalar(combine->scalar.dest, fp); } - printf(" "); + fprintf(fp, " "); print_source_scalar(combine->scalar.arg0_src, NULL, combine->scalar.arg0_absolute, - combine->scalar.arg0_negate); - printf(" "); + combine->scalar.arg0_negate, fp); + fprintf(fp, " "); if (combine->scalar.arg1_en) { if (combine->scalar.dest_vec) { print_vector_source(combine->vector.arg1_source, NULL, combine->vector.arg1_swizzle, - false, false); + false, false, fp); } else { print_source_scalar(combine->scalar.arg1_src, NULL, combine->scalar.arg1_absolute, - combine->scalar.arg1_negate); + combine->scalar.arg1_negate, fp); } } } static void -print_temp_write(void *code, unsigned offset) +print_temp_write(void *code, unsigned offset, FILE *fp) { (void) offset; ppir_codegen_field_temp_write *temp_write = code; if (temp_write->fb_read.unknown_0 == 0x7) { if (temp_write->fb_read.source) - printf("fb_color"); + fprintf(fp, "fb_color"); else - printf("fb_depth"); - printf(" $%u", temp_write->fb_read.dest); + fprintf(fp, "fb_depth"); + fprintf(fp, " $%u", temp_write->fb_read.dest); return; } - printf("store.t"); + fprintf(fp, "store.t"); int16_t index = temp_write->temp_write.index; switch (temp_write->temp_write.alignment) { case 2: - printf(" %d", index); + fprintf(fp, " %d", index); break; case 1: - printf(" %d.%s", index / 2, (index & 1) ? "zw" : "xy"); + fprintf(fp, " %d.%s", index / 2, (index & 1) ? "zw" : "xy"); break; default: - printf(" %d.%c", index / 4, "xyzw"[index & 3]); + fprintf(fp, " %d.%c", index / 4, "xyzw"[index & 3]); break; } if (temp_write->temp_write.offset_en) { - printf("+"); + fprintf(fp, "+"); print_source_scalar(temp_write->temp_write.offset_reg, - NULL, false, false); + NULL, false, false, fp); } - printf(" "); + fprintf(fp, " "); if (temp_write->temp_write.alignment) { - print_reg(temp_write->temp_write.source >> 2, NULL); + print_reg(temp_write->temp_write.source >> 2, NULL, fp); } else { - print_source_scalar(temp_write->temp_write.source, NULL, false, false); + print_source_scalar(temp_write->temp_write.source, NULL, false, false, fp); } } static void -print_branch(void *code, unsigned offset) -{ +print_branch(void *code, unsigned offset, FILE *fp) +{ ppir_codegen_field_branch *branch = code; if (branch->discard.word0 == PPIR_CODEGEN_DISCARD_WORD0 && branch->discard.word1 == PPIR_CODEGEN_DISCARD_WORD1 && branch->discard.word2 == PPIR_CODEGEN_DISCARD_WORD2) { - printf("discard"); + fprintf(fp, "discard"); return; } - const char* cond[] = { "nv", "lt", "eq", "le", "gt", "ne", "ge", "" , @@ -734,18 +732,18 @@ print_branch(void *code, unsigned offset) cond_mask |= (branch->branch.cond_lt ? 1 : 0); cond_mask |= (branch->branch.cond_eq ? 2 : 0); cond_mask |= (branch->branch.cond_gt ? 4 : 0); - printf("branch"); + fprintf(fp, "branch"); if (cond_mask != 0x7) { - printf(".%s ", cond[cond_mask]); - print_source_scalar(branch->branch.arg0_source, NULL, false, false); - printf(" "); - print_source_scalar(branch->branch.arg1_source, NULL, false, false); + fprintf(fp, ".%s ", cond[cond_mask]); + print_source_scalar(branch->branch.arg0_source, NULL, false, false, fp); + fprintf(fp, " "); + print_source_scalar(branch->branch.arg1_source, NULL, false, false, fp); } - printf(" %d", branch->branch.target + offset); + fprintf(fp, " %d", branch->branch.target + offset); } -typedef void (*print_field_func)(void *, unsigned); +typedef void (*print_field_func)(void *, unsigned, FILE *); static const print_field_func print_field[ppir_codegen_field_shift_count] = { [ppir_codegen_field_shift_varying] = print_varying, @@ -767,29 +765,29 @@ static const int ppir_codegen_field_size[] = { }; static void -bitcopy(char *src, char *dst, unsigned bits, unsigned src_offset) +bitcopy(unsigned char *src, unsigned char *dst, unsigned bits, unsigned src_offset) { src += src_offset / 8; src_offset %= 8; - for (int b = bits; b > 0; b -= 8, src++, dst++) { - unsigned char out = ((unsigned char) *src) >> src_offset; + for (unsigned b = bits; b > 0; b -= MIN2(b, 8), src++, dst++) { + unsigned char out = *src >> src_offset; if (src_offset > 0 && src_offset + b > 8) - out |= ((unsigned char) *(src + 1)) << (8 - src_offset); - *dst = (char) out; + out |= *(src + 1) << (8 - src_offset); + *dst = out; } } void -ppir_disassemble_instr(uint32_t *instr, unsigned offset) +ppir_disassemble_instr(uint32_t *instr, unsigned offset, FILE *fp) { ppir_codegen_ctrl *ctrl = (ppir_codegen_ctrl *) instr; - char *instr_code = (char *) (instr + 1); + unsigned char *instr_code = (unsigned char *) (instr + 1); unsigned bit_offset = 0; bool first = true; for (unsigned i = 0; i < ppir_codegen_field_shift_count; i++) { - char code[12]; + unsigned char code[12]; if (!((ctrl->fields >> i) & 1)) continue; @@ -800,18 +798,18 @@ ppir_disassemble_instr(uint32_t *instr, unsigned offset) if (first) first = false; else - printf(", "); + fprintf(fp, ", "); - print_field[i](code, offset); + print_field[i](code, offset, fp); bit_offset += bits; } if (ctrl->sync) - printf(", sync"); + fprintf(fp, ", sync"); if (ctrl->stop) - printf(", stop"); + fprintf(fp, ", stop"); - printf("\n"); + fprintf(fp, "\n"); } diff --git a/src/gallium/drivers/lima/ir/pp/instr.c b/src/gallium/drivers/lima/ir/pp/instr.c index 8e1bc95158d..707055c48ac 100644 --- a/src/gallium/drivers/lima/ir/pp/instr.c +++ b/src/gallium/drivers/lima/ir/pp/instr.c @@ -186,18 +186,17 @@ bool ppir_instr_insert_node(ppir_instr *instr, ppir_node *node) uint8_t swizzle[4] = {0}; if (ppir_instr_insert_const(&ic, nc, swizzle)) { + instr->constant[i] = ic; ppir_node *succ = ppir_node_first_succ(node); - ppir_src *src = NULL; for (int s = 0; s < ppir_node_get_src_num(succ); s++) { - src = ppir_node_get_src(succ, s); - if (src->node == node) - break; - } - assert(src->node == node); + ppir_src *src = ppir_node_get_src(succ, s); + assert(src); + if (src->node != node) + continue; - instr->constant[i] = ic; - ppir_update_src_pipeline(ppir_pipeline_reg_const0 + i, src, - &c->dest, swizzle); + ppir_update_src_pipeline(ppir_pipeline_reg_const0 + i, src, + &c->dest, swizzle); + } break; } } @@ -284,7 +283,7 @@ void ppir_instr_print_list(ppir_compiler *comp) list_for_each_entry(ppir_block, block, &comp->block_list, list) { printf("-------block %3d-------\n", block->index); list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { - printf("%c%03d: ", instr->is_end ? '*' : ' ', instr->index); + printf("%c%03d: ", instr->stop ? '*' : ' ', instr->index); for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { ppir_node *node = instr->slots[i]; if (node) diff --git a/src/gallium/drivers/lima/ir/pp/liveness.c b/src/gallium/drivers/lima/ir/pp/liveness.c index 1799a53b165..02faa423b87 100644 --- a/src/gallium/drivers/lima/ir/pp/liveness.c +++ b/src/gallium/drivers/lima/ir/pp/liveness.c @@ -121,7 +121,7 @@ ppir_liveness_instr_srcs(ppir_compiler *comp, ppir_instr *instr) /* Update the liveness information of the instruction by removing its * dests from the live_in set. */ static void -ppir_liveness_instr_dest(ppir_compiler *comp, ppir_instr *instr) +ppir_liveness_instr_dest(ppir_compiler *comp, ppir_instr *instr, ppir_instr *last) { for (int i = PPIR_INSTR_SLOT_NUM-1; i >= 0; i--) { ppir_node *node = instr->slots[i]; @@ -146,9 +146,18 @@ ppir_liveness_instr_dest(ppir_compiler *comp, ppir_instr *instr) unsigned int index = reg->regalloc_index; bool live = BITSET_TEST(instr->live_set, index); + /* If it's an out reg, it's alive till the end of the block, so add it + * to live_set of the last instruction */ + if (!live && reg->out_reg && (instr != last)) { + BITSET_SET(last->live_set, index); + BITSET_CLEAR(instr->live_set, index); + continue; + } + /* If a register is written but wasn't read in a later instruction, it is - * either dead code or a bug. For now, assign an interference to it to - * ensure it doesn't get assigned a live register and overwrites it. */ + * either an output register in last instruction, dead code or a bug. + * For now, assign an interference to it to ensure it doesn't get assigned + * a live register and overwrites it. */ if (!live) { BITSET_SET(instr->live_internal, index); continue; @@ -224,13 +233,13 @@ ppir_liveness_compute_live_sets(ppir_compiler *comp) } } else { - ppir_instr *next_instr = LIST_ENTRY(ppir_instr, instr->list.next, list); + ppir_instr *next_instr = list_entry(instr->list.next, ppir_instr, list); ppir_liveness_propagate(comp, instr->live_set, next_instr->live_set, instr->live_mask, next_instr->live_mask); } - ppir_liveness_instr_dest(comp, instr); + ppir_liveness_instr_dest(comp, instr, last); ppir_liveness_instr_srcs(comp, instr); cont |= !ppir_liveness_set_equal(comp, diff --git a/src/gallium/drivers/lima/ir/pp/lower.c b/src/gallium/drivers/lima/ir/pp/lower.c index deed1c7f2c9..ecc19b79c2c 100644 --- a/src/gallium/drivers/lima/ir/pp/lower.c +++ b/src/gallium/drivers/lima/ir/pp/lower.c @@ -98,7 +98,8 @@ static bool ppir_lower_swap_args(ppir_block *block, ppir_node *node) static bool ppir_lower_load(ppir_block *block, ppir_node *node) { ppir_dest *dest = ppir_node_get_dest(node); - if (ppir_node_is_root(node) && dest->type == ppir_target_ssa) { + if (ppir_node_is_root(node) && !node->succ_different_block && + dest->type == ppir_target_ssa) { ppir_node_delete(node); return true; } @@ -107,7 +108,8 @@ static bool ppir_lower_load(ppir_block *block, ppir_node *node) * that has load node in source */ if ((ppir_node_has_single_src_succ(node) || ppir_node_is_root(node)) && - dest->type != ppir_target_register) { + !node->succ_different_block && + dest->type != ppir_target_register) { ppir_node *succ = ppir_node_first_succ(node); switch (succ->type) { case ppir_node_type_alu: @@ -322,6 +324,98 @@ static bool ppir_lower_sat(ppir_block *block, ppir_node *node) return true; } +static bool ppir_lower_branch_merge_condition(ppir_block *block, ppir_node *node) +{ + /* Check if we can merge a condition with a branch instruction, + * removing the need for a select instruction */ + assert(node->type == ppir_node_type_branch); + + if (!ppir_node_has_single_pred(node)) + return false; + + ppir_node *pred = ppir_node_first_pred(node); + assert(pred); + + if (pred->type != ppir_node_type_alu) + return false; + + switch (pred->op) + { + case ppir_op_lt: + case ppir_op_gt: + case ppir_op_le: + case ppir_op_ge: + case ppir_op_eq: + case ppir_op_ne: + break; + default: + return false; + } + + ppir_dest *dest = ppir_node_get_dest(pred); + if (!ppir_node_has_single_succ(pred) || dest->type != ppir_target_ssa) + return false; + + ppir_alu_node *cond = ppir_node_to_alu(pred); + /* branch can't reference pipeline registers */ + if (cond->src[0].type == ppir_target_pipeline || + cond->src[1].type == ppir_target_pipeline) + return false; + + /* branch can't use flags */ + if (cond->src[0].negate || cond->src[0].absolute || + cond->src[1].negate || cond->src[1].absolute) + return false; + + /* at this point, it can be successfully be replaced. */ + ppir_branch_node *branch = ppir_node_to_branch(node); + switch (pred->op) + { + case ppir_op_le: + branch->cond_gt = true; + break; + case ppir_op_lt: + branch->cond_eq = true; + branch->cond_gt = true; + break; + case ppir_op_ge: + branch->cond_lt = true; + break; + case ppir_op_gt: + branch->cond_eq = true; + branch->cond_lt = true; + break; + case ppir_op_eq: + branch->cond_lt = true; + branch->cond_gt = true; + break; + case ppir_op_ne: + branch->cond_eq = true; + break; + default: + assert(0); + break; + } + + assert(cond->num_src == 2); + + branch->num_src = 2; + branch->src[0] = cond->src[0]; + branch->src[1] = cond->src[1]; + + /* for all nodes before the condition */ + ppir_node_foreach_pred_safe(pred, dep) { + /* insert the branch node as successor */ + ppir_node *p = dep->pred; + ppir_node_remove_dep(dep); + ppir_node_add_dep(node, p, ppir_dep_src); + } + + ppir_node_delete(pred); + + return true; +} + static bool ppir_lower_branch(ppir_block *block, ppir_node *node) { ppir_branch_node *branch = ppir_node_to_branch(node); @@ -330,6 +424,12 @@ static bool ppir_lower_branch(ppir_block *block, ppir_node *node) if (branch->num_src == 0) return true; + /* Check if we can merge a condition with the branch */ + if (ppir_lower_branch_merge_condition(block, node)) + return true; + + /* If the condition cannot be merged, fall back to a + * comparison against zero */ ppir_const_node *zero = ppir_node_create(block, ppir_op_const, -1, 0); if (!zero) @@ -342,11 +442,6 @@ static bool ppir_lower_branch(ppir_block *block, ppir_node *node) zero->dest.ssa.num_components = 1; zero->dest.write_mask = 0x01; - /* For now we're just comparing branch condition with 0, - * in future we should look whether it's possible to move - * comparision node into branch itself and use current - * way as a fallback for complex conditions. - */ ppir_node_target_assign(&branch->src[1], &zero->node); if (branch->negate) diff --git a/src/gallium/drivers/lima/ir/pp/nir.c b/src/gallium/drivers/lima/ir/pp/nir.c index 5d2d2282233..517ec628b41 100644 --- a/src/gallium/drivers/lima/ir/pp/nir.c +++ b/src/gallium/drivers/lima/ir/pp/nir.c @@ -29,11 +29,12 @@ #include "util/bitscan.h" #include "compiler/nir/nir.h" #include "pipe/p_state.h" +#include "nir_legacy.h" #include "ppir.h" -static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa) +static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_def *ssa) { ppir_node *node = ppir_node_create(block, op, ssa->index, 0); if (!node) @@ -52,16 +53,16 @@ static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ss } static void *ppir_node_create_reg(ppir_block *block, ppir_op op, - nir_register *reg, unsigned mask) + nir_def *def, unsigned mask) { - ppir_node *node = ppir_node_create(block, op, reg->index, mask); + ppir_node *node = ppir_node_create(block, op, def->index, mask); if (!node) return NULL; ppir_dest *dest = ppir_node_get_dest(node); list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) { - if (r->index == reg->index) { + if (r->index == def->index) { dest->reg = r; break; } @@ -78,22 +79,22 @@ static void *ppir_node_create_reg(ppir_block *block, ppir_op op, } static void *ppir_node_create_dest(ppir_block *block, ppir_op op, - nir_dest *dest, unsigned mask) + nir_legacy_dest *dest, unsigned mask) { unsigned index = -1; if (dest) { if (dest->is_ssa) - return ppir_node_create_ssa(block, op, &dest->ssa); + return ppir_node_create_ssa(block, op, dest->ssa); else - return ppir_node_create_reg(block, op, dest->reg.reg, mask); + return ppir_node_create_reg(block, op, dest->reg.handle, mask); } return ppir_node_create(block, op, index, 0); } static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node, - ppir_src *ps, nir_src *ns, unsigned mask) + ppir_src *ps, nir_legacy_src *ns, unsigned mask) { ppir_node *child = NULL; @@ -103,15 +104,15 @@ static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node, ppir_node_add_dep(node, child, ppir_dep_src); } else { - nir_register *reg = ns->reg.reg; + nir_reg_src *rs = &ns->reg; while (mask) { int swizzle = ps->swizzle[u_bit_scan(&mask)]; - child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle]; + child = comp->var_nodes[(rs->handle->index << 2) + swizzle]; /* Reg is read before it was written, create a dummy node for it */ if (!child) { - child = ppir_node_create_reg(node->block, ppir_op_dummy, reg, + child = ppir_node_create_reg(node->block, ppir_op_dummy, rs->handle, u_bit_consecutive(0, 4)); - comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle] = child; + comp->var_nodes[(rs->handle->index << 2) + swizzle] = child; } /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */ if (child && node != child && child->op != ppir_op_dummy) @@ -119,13 +120,11 @@ static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node, } } + assert(child); ppir_node_target_assign(ps, child); } static int nir_to_ppir_opcodes[nir_num_opcodes] = { - /* not supported */ - [0 ... nir_last_opcode] = -1, - [nir_op_mov] = ppir_op_mov, [nir_op_fmul] = ppir_op_mul, [nir_op_fabs] = ppir_op_abs, @@ -160,21 +159,38 @@ static int nir_to_ppir_opcodes[nir_num_opcodes] = { static bool ppir_emit_alu(ppir_block *block, nir_instr *ni) { nir_alu_instr *instr = nir_instr_as_alu(ni); + nir_def *def = &instr->def; int op = nir_to_ppir_opcodes[instr->op]; - if (op < 0) { + if (op == ppir_op_unsupported) { ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name); return false; } + nir_legacy_alu_dest legacy_dest = nir_legacy_chase_alu_dest(def); + + /* Don't try to translate folded fsat since their source won't be valid */ + if (instr->op == nir_op_fsat && nir_legacy_fsat_folds(instr)) + return true; - ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest, - instr->dest.write_mask); + /* Skip folded fabs/fneg since we do not have dead code elimination */ + if ((instr->op == nir_op_fabs || instr->op == nir_op_fneg) && + nir_legacy_float_mod_folds(instr)) { + /* Add parent node as a the folded def node to keep + * the dependency chain */ + nir_alu_src *ns = &instr->src[0]; + ppir_node *parent = block->comp->var_nodes[ns->src.ssa->index]; + assert(parent); + block->comp->var_nodes[def->index] = parent; + return true; + } + + ppir_alu_node *node = ppir_node_create_dest(block, op, &legacy_dest.dest, + legacy_dest.write_mask); if (!node) return false; ppir_dest *pd = &node->dest; - nir_alu_dest *nd = &instr->dest; - if (nd->saturate) + if (legacy_dest.fsat) pd->modifier = ppir_outmod_clamp_fraction; unsigned src_mask; @@ -194,13 +210,13 @@ static bool ppir_emit_alu(ppir_block *block, nir_instr *ni) node->num_src = num_child; for (int i = 0; i < num_child; i++) { - nir_alu_src *ns = instr->src + i; + nir_legacy_alu_src ns = nir_legacy_chase_alu_src(instr->src + i, true); ppir_src *ps = node->src + i; - memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle)); - ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask); + memcpy(ps->swizzle, ns.swizzle, sizeof(ps->swizzle)); + ppir_node_add_src(block->comp, &node->node, ps, &ns.src, src_mask); - ps->absolute = ns->abs; - ps->negate = ns->negate; + ps->absolute = ns.fabs; + ps->negate = ns.fneg; } list_addtail(&node->node.list, &block->node_list); @@ -244,8 +260,9 @@ static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni) branch = ppir_node_to_branch(node); /* second src and condition will be updated during lowering */ + nir_legacy_src legacy_src = nir_legacy_chase_src(&instr->src[0]); ppir_node_add_src(block->comp, node, &branch->src[0], - &instr->src[0], u_bit_consecutive(0, instr->num_components)); + &legacy_src, u_bit_consecutive(0, instr->num_components)); branch->num_src = 1; branch->target = comp->discard_block; @@ -268,11 +285,22 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni) ppir_alu_node *alu_node; switch (instr->intrinsic) { - case nir_intrinsic_load_input: - if (!instr->dest.is_ssa) - mask = u_bit_consecutive(0, instr->num_components); + case nir_intrinsic_decl_reg: + case nir_intrinsic_store_reg: + /* Nothing to do for these */ + return true; + + case nir_intrinsic_load_reg: { + nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def); + lnode = ppir_node_create_dest(block, ppir_op_dummy, &legacy_dest, mask); + return true; + } + + case nir_intrinsic_load_input: { + mask = u_bit_consecutive(0, instr->num_components); - lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask); + nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def); + lnode = ppir_node_create_dest(block, ppir_op_load_varying, &legacy_dest, mask); if (!lnode) return false; @@ -282,16 +310,17 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni) lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4); else { lnode->num_src = 1; - ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1); + nir_legacy_src legacy_src = nir_legacy_chase_src(instr->src); + ppir_node_add_src(block->comp, &lnode->node, &lnode->src, &legacy_src, 1); } list_addtail(&lnode->node.list, &block->node_list); return true; + } case nir_intrinsic_load_frag_coord: case nir_intrinsic_load_point_coord: - case nir_intrinsic_load_front_face: - if (!instr->dest.is_ssa) - mask = u_bit_consecutive(0, instr->num_components); + case nir_intrinsic_load_front_face: { + mask = u_bit_consecutive(0, instr->num_components); ppir_op op; switch (instr->intrinsic) { @@ -309,19 +338,21 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni) break; } - lnode = ppir_node_create_dest(block, op, &instr->dest, mask); + nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def); + lnode = ppir_node_create_dest(block, op, &legacy_dest, mask); if (!lnode) return false; lnode->num_components = instr->num_components; list_addtail(&lnode->node.list, &block->node_list); return true; + } - case nir_intrinsic_load_uniform: - if (!instr->dest.is_ssa) - mask = u_bit_consecutive(0, instr->num_components); + case nir_intrinsic_load_uniform: { + mask = u_bit_consecutive(0, instr->num_components); - lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask); + nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def); + lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &legacy_dest, mask); if (!lnode) return false; @@ -331,11 +362,13 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni) lnode->index += (uint32_t)nir_src_as_float(instr->src[0]); else { lnode->num_src = 1; - ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1); + nir_legacy_src legacy_src = nir_legacy_chase_src(instr->src); + ppir_node_add_src(block->comp, &lnode->node, &lnode->src, &legacy_src, 1); } list_addtail(&lnode->node.list, &block->node_list); return true; + } case nir_intrinsic_store_output: { /* In simple cases where the store_output is ssa, that register @@ -345,16 +378,36 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni) * back to inserting a mov at the end. * If the source node will only be able to output to pipeline * registers, fall back to the mov as well. */ - if (!block->comp->uses_discard && instr->src->is_ssa) { + assert(nir_src_is_const(instr->src[1]) && + "lima doesn't support indirect outputs"); + + nir_io_semantics io = nir_intrinsic_io_semantics(instr); + unsigned offset = nir_src_as_uint(instr->src[1]); + unsigned slot = io.location + offset; + ppir_output_type out_type = ppir_nir_output_to_ppir(slot, + block->comp->dual_source_blend ? io.dual_source_blend_index : 0); + if (out_type == ppir_output_invalid) { + ppir_debug("Unsupported output type: %d\n", slot); + return false; + } + + if (!block->comp->uses_discard) { node = block->comp->var_nodes[instr->src->ssa->index]; + assert(node); switch (node->op) { case ppir_op_load_uniform: case ppir_op_load_texture: + case ppir_op_dummy: case ppir_op_const: break; - default: - node->is_end = 1; + default: { + ppir_dest *dest = ppir_node_get_dest(node); + dest->ssa.out_type = out_type; + dest->ssa.num_components = 4; + dest->write_mask = u_bit_consecutive(0, 4); + node->is_out = 1; return true; + } } } @@ -364,19 +417,21 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni) ppir_dest *dest = ppir_node_get_dest(&alu_node->node); dest->type = ppir_target_ssa; - dest->ssa.num_components = instr->num_components; + dest->ssa.num_components = 4; dest->ssa.index = 0; - dest->write_mask = u_bit_consecutive(0, instr->num_components); + dest->write_mask = u_bit_consecutive(0, 4); + dest->ssa.out_type = out_type; alu_node->num_src = 1; for (int i = 0; i < instr->num_components; i++) alu_node->src[0].swizzle[i] = i; - ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src, - u_bit_consecutive(0, instr->num_components)); + nir_legacy_src legacy_src = nir_legacy_chase_src(instr->src); + ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, &legacy_src, + u_bit_consecutive(0, 4)); - alu_node->node.is_end = 1; + alu_node->node.is_out = 1; list_addtail(&alu_node->node.list, &block->node_list); return true; @@ -418,7 +473,7 @@ static bool ppir_emit_load_const(ppir_block *block, nir_instr *ni) static bool ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni) { - nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(ni); + nir_undef_instr *undef = nir_instr_as_undef(ni); ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def); if (!node) return false; @@ -447,7 +502,9 @@ static bool ppir_emit_tex(ppir_block *block, nir_instr *ni) } switch (instr->sampler_dim) { + case GLSL_SAMPLER_DIM_1D: case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_3D: case GLSL_SAMPLER_DIM_CUBE: case GLSL_SAMPLER_DIM_RECT: case GLSL_SAMPLER_DIM_EXTERNAL: @@ -460,10 +517,10 @@ static bool ppir_emit_tex(ppir_block *block, nir_instr *ni) /* emit ld_tex node */ unsigned mask = 0; - if (!instr->dest.is_ssa) - mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr)); + mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr)); - node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask); + nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def); + node = ppir_node_create_dest(block, ppir_op_load_texture, &legacy_dest, mask); if (!node) return false; @@ -473,23 +530,28 @@ static bool ppir_emit_tex(ppir_block *block, nir_instr *ni) for (int i = 0; i < instr->coord_components; i++) node->src[0].swizzle[i] = i; + bool perspective = false; + for (int i = 0; i < instr->num_srcs; i++) { switch (instr->src[i].src_type) { + case nir_tex_src_backend1: + perspective = true; + FALLTHROUGH; case nir_tex_src_coord: { nir_src *ns = &instr->src[i].src; - if (ns->is_ssa) { - ppir_node *child = block->comp->var_nodes[ns->ssa->index]; - if (child->op == ppir_op_load_varying) { - /* If the successor is load_texture, promote it to load_coords */ - nir_tex_src *nts = (nir_tex_src *)ns; - if (nts->src_type == nir_tex_src_coord) - child->op = ppir_op_load_coords; - } + ppir_node *child = block->comp->var_nodes[ns->ssa->index]; + if (child->op == ppir_op_load_varying) { + /* If the successor is load_texture, promote it to load_coords */ + nir_tex_src *nts = (nir_tex_src *)ns; + if (nts->src_type == nir_tex_src_coord || + nts->src_type == nir_tex_src_backend1) + child->op = ppir_op_load_coords; } /* src[0] is not used by the ld_tex instruction but ensures * correct scheduling due to the pipeline dependency */ - ppir_node_add_src(block->comp, &node->node, &node->src[0], &instr->src[i].src, + nir_legacy_src legacy_src = nir_legacy_chase_src(&instr->src[i].src); + ppir_node_add_src(block->comp, &node->node, &node->src[0], &legacy_src, u_bit_consecutive(0, instr->coord_components)); node->num_src++; break; @@ -498,7 +560,8 @@ static bool ppir_emit_tex(ppir_block *block, nir_instr *ni) case nir_tex_src_lod: node->lod_bias_en = true; node->explicit_lod = (instr->src[i].src_type == nir_tex_src_lod); - ppir_node_add_src(block->comp, &node->node, &node->src[1], &instr->src[i].src, 1); + nir_legacy_src legacy_src = nir_legacy_chase_src(&instr->src[i].src); + ppir_node_add_src(block->comp, &node->node, &node->src[1], &legacy_src, 1); node->num_src++; break; default: @@ -526,13 +589,10 @@ static bool ppir_emit_tex(ppir_block *block, nir_instr *ni) load->src = node->src[0]; load->num_src = 1; - if (node->sampler_dim == GLSL_SAMPLER_DIM_CUBE) - load->num_components = 3; - else - load->num_components = 2; + load->num_components = instr->coord_components; ppir_debug("%s create load_coords node %d for %d\n", - __FUNCTION__, load->index, node->node.index); + __func__, load->index, node->node.index); ppir_node_foreach_pred_safe((&node->node), dep) { ppir_node *pred = dep->pred; @@ -543,6 +603,15 @@ static bool ppir_emit_tex(ppir_block *block, nir_instr *ni) } assert(load); + + if (perspective) { + if (instr->coord_components == 3) + load->perspective = ppir_perspective_z; + else + load->perspective = ppir_perspective_w; + } + + load->sampler_dim = instr->sampler_dim; node->src[0].type = load->dest.type = ppir_target_pipeline; node->src[0].pipeline = load->dest.pipeline = ppir_pipeline_reg_discard; @@ -598,7 +667,7 @@ static bool (*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = [nir_instr_type_alu] = ppir_emit_alu, [nir_instr_type_intrinsic] = ppir_emit_intrinsic, [nir_instr_type_load_const] = ppir_emit_load_const, - [nir_instr_type_ssa_undef] = ppir_emit_ssa_undef, + [nir_instr_type_undef] = ppir_emit_ssa_undef, [nir_instr_type_tex] = ppir_emit_tex, [nir_instr_type_jump] = ppir_emit_jump, }; @@ -650,8 +719,9 @@ static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt) if (!node) return false; else_branch = ppir_node_to_branch(node); + nir_legacy_src legacy_src = nir_legacy_chase_src(&if_stmt->condition); ppir_node_add_src(block->comp, node, &else_branch->src[0], - &if_stmt->condition, 1); + &legacy_src, 1); else_branch->num_src = 1; /* Negate condition to minimize branching. We're generating following: * current_block: { ...; if (!statement) branch else_block; } @@ -704,6 +774,7 @@ static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt) static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop) { + assert(!nir_loop_has_continue_construct(nloop)); ppir_block *save_loop_cont_block = comp->loop_cont_block; ppir_block *block; ppir_branch_node *loop_branch; @@ -769,10 +840,10 @@ static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list) return true; } -static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa) +static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_ssa) { ppir_compiler *comp = rzalloc_size( - prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *)); + prog, sizeof(*comp) + (num_ssa << 2) * sizeof(ppir_node *)); if (!comp) return NULL; @@ -782,8 +853,8 @@ static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigne comp->blocks = _mesa_hash_table_u64_create(prog); comp->var_nodes = (ppir_node **)(comp + 1); - comp->reg_base = num_ssa; comp->prog = prog; + return comp; } @@ -819,7 +890,7 @@ static void ppir_add_ordering_deps(ppir_compiler *comp) if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) { ppir_node_add_dep(prev_node, node, ppir_dep_sequence); } - if (node->is_end || + if (node->is_out || node->op == ppir_op_discard || node->op == ppir_op_store_temp || node->op == ppir_op_branch) { @@ -830,7 +901,7 @@ static void ppir_add_ordering_deps(ppir_compiler *comp) } static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp, - struct pipe_debug_callback *debug) + struct util_debug_callback *debug) { const struct shader_info *info = &nir->info; char *shaderdb; @@ -846,7 +917,7 @@ static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp, if (lima_debug & LIMA_DEBUG_SHADERDB) fprintf(stderr, "SHADER-DB: %s\n", shaderdb); - pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb); + util_debug_message(debug, SHADER_INFO, "%s", shaderdb); free(shaderdb); } @@ -876,22 +947,20 @@ static void ppir_add_write_after_read_deps(ppir_compiler *comp) bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *nir, struct ra_regs *ra, - struct pipe_debug_callback *debug) + struct util_debug_callback *debug) { nir_function_impl *func = nir_shader_get_entrypoint(nir); - ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc); + ppir_compiler *comp = ppir_compiler_create(prog, func->ssa_alloc); if (!comp) return false; comp->ra = ra; comp->uses_discard = nir->info.fs.uses_discard; + comp->dual_source_blend = nir->info.fs.color_is_dual_source; /* 1st pass: create ppir blocks */ - nir_foreach_function(function, nir) { - if (!function->impl) - continue; - - nir_foreach_block(nblock, function->impl) { + nir_foreach_function_impl(impl, nir) { + nir_foreach_block(nblock, impl) { ppir_block *block = ppir_block_create(comp); if (!block) return false; @@ -901,11 +970,8 @@ bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *n } /* 2nd pass: populate successors */ - nir_foreach_function(function, nir) { - if (!function->impl) - continue; - - nir_foreach_block(nblock, function->impl) { + nir_foreach_function_impl(impl, nir) { + nir_foreach_block(nblock, impl) { ppir_block *block = ppir_get_block(comp, nblock); assert(block); @@ -916,26 +982,19 @@ bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *n } } - /* Validate outputs, we support only gl_FragColor */ - nir_foreach_shader_out_variable(var, nir) { - switch (var->data.location) { - case FRAG_RESULT_COLOR: - case FRAG_RESULT_DATA0: - break; - default: - ppir_error("unsupported output type\n"); - goto err_out0; - break; - } - } + comp->out_type_to_reg = rzalloc_size(comp, sizeof(int) * ppir_output_num); + + /* -1 means reg is not written by the shader */ + for (int i = 0; i < ppir_output_num; i++) + comp->out_type_to_reg[i] = -1; - foreach_list_typed(nir_register, reg, node, &func->registers) { + nir_foreach_reg_decl(decl, func) { ppir_reg *r = rzalloc(comp, ppir_reg); if (!r) return false; - r->index = reg->index; - r->num_components = reg->num_components; + r->index = decl->def.index; + r->num_components = nir_intrinsic_num_components(decl); r->is_head = false; list_addtail(&r->list, &comp->reg_list); comp->reg_num++; diff --git a/src/gallium/drivers/lima/ir/pp/node.c b/src/gallium/drivers/lima/ir/pp/node.c index 99d025e2c05..e22a06ce5ee 100644 --- a/src/gallium/drivers/lima/ir/pp/node.c +++ b/src/gallium/drivers/lima/ir/pp/node.c @@ -29,6 +29,9 @@ #include "ppir.h" const ppir_op_info ppir_op_infos[] = { + [ppir_op_unsupported] = { + .name = "unsupported", + }, [ppir_op_mov] = { .name = "mov", .slots = (int []) { @@ -330,12 +333,14 @@ const ppir_op_info ppir_op_infos[] = { .name = "undef", .type = ppir_node_type_alu, .slots = (int []) { + PPIR_INSTR_SLOT_END }, }, [ppir_op_dummy] = { .name = "dummy", .type = ppir_node_type_alu, .slots = (int []) { + PPIR_INSTR_SLOT_END }, }, }; @@ -366,7 +371,7 @@ void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask) if (mask) { /* reg has 4 slots for each component write node */ while (mask) - comp->var_nodes[(index << 2) + comp->reg_base + u_bit_scan(&mask)] = node; + comp->var_nodes[(index << 2) + u_bit_scan(&mask)] = node; snprintf(node->name, sizeof(node->name), "reg%d", index); } else { comp->var_nodes[index] = node; @@ -618,9 +623,9 @@ static ppir_node *ppir_node_insert_mov_local(ppir_node *node) ppir_node_add_dep(move, node, ppir_dep_src); list_addtail(&move->list, &node->list); - if (node->is_end) { - node->is_end = false; - move->is_end = true; + if (node->is_out) { + node->is_out = false; + move->is_out = true; } return move; diff --git a/src/gallium/drivers/lima/ir/pp/node_to_instr.c b/src/gallium/drivers/lima/ir/pp/node_to_instr.c index a54be74ccfc..ffe8c5af89d 100644 --- a/src/gallium/drivers/lima/ir/pp/node_to_instr.c +++ b/src/gallium/drivers/lima/ir/pp/node_to_instr.c @@ -58,6 +58,18 @@ static bool ppir_do_node_to_instr_try_insert(ppir_block *block, ppir_node *node) return ppir_instr_insert_node(succ->instr, node); } + if (ppir_node_has_single_succ(node) && + ppir_node_has_single_pred(ppir_node_first_succ(node)) && + (ppir_node_first_succ(node)->type == ppir_node_type_branch)) { + + assert(ppir_node_has_single_succ(node)); + ppir_node *succ = ppir_node_first_succ(node); + assert(succ); + assert(succ->instr); + + return ppir_instr_insert_node(succ->instr, node); + } + switch (node->type) { case ppir_node_type_load: break; @@ -88,7 +100,8 @@ static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node) * by using pipeline reg ^vmul/^fmul */ ppir_alu_node *alu = ppir_node_to_alu(node); if (alu->dest.type == ppir_target_ssa && - ppir_node_has_single_succ(node)) { + ppir_node_has_single_succ(node) && + ppir_node_has_single_src_succ(node)) { ppir_node *succ = ppir_node_first_succ(node); if (succ->instr_pos == PPIR_INSTR_SLOT_ALU_VEC_ADD) { node->instr_pos = PPIR_INSTR_SLOT_ALU_VEC_MUL; @@ -202,7 +215,7 @@ static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node) case ppir_node_type_discard: if (!create_new_instr(block, node)) return false; - node->instr->is_end = true; + block->stop = true; break; case ppir_node_type_branch: if (!create_new_instr(block, node)) @@ -275,8 +288,13 @@ static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *root) if (!ppir_do_one_node_to_instr(block, node)) return false; - if (node->is_end) - node->instr->is_end = true; + /* The node writes output register. We can't stop at this exact + * instruction because there may be another node that writes another + * output, so set stop flag for the block. We will set stop flag on + * the last instruction of the block during codegen + */ + if (node->is_out) + block->stop = true; ppir_node_foreach_pred(node, dep) { ppir_node *pred = dep->pred; diff --git a/src/gallium/drivers/lima/ir/pp/ppir.h b/src/gallium/drivers/lima/ir/pp/ppir.h index 480fca9e689..f9191a1c5d3 100644 --- a/src/gallium/drivers/lima/ir/pp/ppir.h +++ b/src/gallium/drivers/lima/ir/pp/ppir.h @@ -32,6 +32,7 @@ #include "ir/lima_ir.h" typedef enum { + ppir_op_unsupported = 0, ppir_op_mov, ppir_op_abs, ppir_op_neg, @@ -161,7 +162,7 @@ typedef struct ppir_node { struct ppir_instr *instr; int instr_pos; struct ppir_block *block; - bool is_end; + bool is_out; bool succ_different_block; /* for scheduler */ @@ -179,9 +180,45 @@ typedef enum { ppir_pipeline_reg_discard, /* varying load */ } ppir_pipeline; +typedef enum { + ppir_output_color0, + ppir_output_color1, + ppir_output_depth, + ppir_output_num, + ppir_output_invalid = -1, +} ppir_output_type; + +static inline const char *ppir_output_type_to_str(ppir_output_type type) +{ + switch (type) { + case ppir_output_color0: + return "OUTPUT_COLOR0"; + case ppir_output_color1: + return "OUTPUT_COLOR1"; + case ppir_output_depth: + return "OUTPUT_DEPTH"; + default: + return "INVALID"; + } +} + +static inline ppir_output_type ppir_nir_output_to_ppir(gl_frag_result res, int dual_src_index) +{ + switch (res) { + case FRAG_RESULT_COLOR: + case FRAG_RESULT_DATA0: + return ppir_output_color0 + dual_src_index; + case FRAG_RESULT_DEPTH: + return ppir_output_depth; + default: + return ppir_output_invalid; + } +} + typedef struct ppir_reg { struct list_head list; int index; + ppir_output_type out_type; int regalloc_index; int num_components; @@ -191,6 +228,7 @@ typedef struct ppir_reg { bool is_head; bool spilled; bool undef; + bool out_reg; } ppir_reg; typedef enum { @@ -252,6 +290,12 @@ typedef struct { ppir_dest dest; } ppir_const_node; +typedef enum { + ppir_perspective_none = 0, + ppir_perspective_z, + ppir_perspective_w, +} ppir_perspective; + typedef struct { ppir_node node; int index; @@ -259,6 +303,8 @@ typedef struct { ppir_dest dest; ppir_src src; int num_src; + ppir_perspective perspective; + int sampler_dim; } ppir_load_node; typedef struct { @@ -308,7 +354,7 @@ typedef struct ppir_instr { ppir_node *slots[PPIR_INSTR_SLOT_NUM]; ppir_const constant[2]; - bool is_end; + bool stop; /* for scheduler */ struct list_head succ_list; @@ -332,6 +378,7 @@ typedef struct ppir_block { struct list_head list; struct list_head node_list; struct list_head instr_list; + bool stop; struct ppir_block *successors[2]; @@ -362,17 +409,18 @@ typedef struct ppir_compiler { struct hash_table_u64 *blocks; int cur_index; int cur_instr_index; + int *out_type_to_reg; struct list_head reg_list; int reg_num; /* array for searching ssa/reg node */ ppir_node **var_nodes; - unsigned reg_base; struct ra_regs *ra; struct lima_fs_compiled_shader *prog; bool uses_discard; + bool dual_source_blend; /* for scheduler */ int sched_instr_base; @@ -455,6 +503,7 @@ static inline ppir_node *ppir_node_first_pred(ppir_node *node) static inline ppir_dest *ppir_node_get_dest(ppir_node *node) { + assert(node); switch (node->type) { case ppir_node_type_alu: return &ppir_node_to_alu(node)->dest; @@ -471,6 +520,7 @@ static inline ppir_dest *ppir_node_get_dest(ppir_node *node) static inline int ppir_node_get_src_num(ppir_node *node) { + assert(node); switch (node->type) { case ppir_node_type_alu: return ppir_node_to_alu(node)->num_src; diff --git a/src/gallium/drivers/lima/ir/pp/regalloc.c b/src/gallium/drivers/lima/ir/pp/regalloc.c index 3ea136b5660..e80d468313b 100644 --- a/src/gallium/drivers/lima/ir/pp/regalloc.c +++ b/src/gallium/drivers/lima/ir/pp/regalloc.c @@ -82,9 +82,6 @@ static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp) { list_for_each_entry(ppir_block, block, &comp->block_list, list) { list_for_each_entry(ppir_node, node, &block->node_list, list) { - if (node->is_end) - continue; - if (!node->instr || node->op == ppir_op_const) continue; @@ -94,6 +91,8 @@ static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp) if (dest->type == ppir_target_ssa) { reg = &dest->ssa; + if (node->is_out) + reg->out_reg = true; list_addtail(®->list, &comp->reg_list); comp->reg_num++; } @@ -133,6 +132,14 @@ static void ppir_regalloc_print_result(ppir_compiler *comp) } } printf("--------------------------\n"); + + printf("======ppir output regs======\n"); + for (int i = 0; i < ppir_output_num; i++) { + if (comp->out_type_to_reg[i] != -1) + printf("%s: $%d\n", ppir_output_type_to_str(i), + (int)comp->out_type_to_reg[i]); + } + printf("--------------------------\n"); } static bool create_new_instr_after(ppir_block *block, ppir_instr *ref, @@ -411,6 +418,7 @@ static ppir_reg *ppir_regalloc_choose_spill_node(ppir_compiler *comp, * but not too much as to offset the num_components base cost. */ const float slot_scale = 1.1f; + memset(spill_costs, 0, sizeof(spill_costs[0]) * comp->reg_num); list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) { if (reg->spilled) { /* not considered for spilling */ @@ -578,6 +586,11 @@ static bool ppir_regalloc_prog_try(ppir_compiler *comp, bool *spilled) n = 0; list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) { reg->index = ra_get_node_reg(g, n++); + if (reg->out_reg) { + /* We need actual reg number, we don't have swizzle for output regs */ + assert(!(reg->index & 0x3) && "ppir: output regs don't have swizzle"); + comp->out_type_to_reg[reg->out_type] = reg->index / 4; + } } ralloc_free(g); @@ -604,8 +617,12 @@ bool ppir_regalloc_prog(ppir_compiler *comp) ppir_regalloc_update_reglist_ssa(comp); /* No registers? Probably shader consists of discard instruction */ - if (list_is_empty(&comp->reg_list)) + if (list_is_empty(&comp->reg_list)) { + comp->prog->state.frag_color0_reg = 0; + comp->prog->state.frag_color1_reg = -1; + comp->prog->state.frag_depth_reg = -1; return true; + } /* this will most likely succeed in the first * try, except for very complicated shaders */ @@ -613,5 +630,12 @@ bool ppir_regalloc_prog(ppir_compiler *comp) if (!spilled) return false; + comp->prog->state.frag_color0_reg = + comp->out_type_to_reg[ppir_output_color0]; + comp->prog->state.frag_color1_reg = + comp->out_type_to_reg[ppir_output_color1]; + comp->prog->state.frag_depth_reg = + comp->out_type_to_reg[ppir_output_depth]; + return true; } diff --git a/src/gallium/drivers/lima/ir/pp/scheduler.c b/src/gallium/drivers/lima/ir/pp/scheduler.c index 5e7a17c9bb9..b81e2d61936 100644 --- a/src/gallium/drivers/lima/ir/pp/scheduler.c +++ b/src/gallium/drivers/lima/ir/pp/scheduler.c @@ -26,6 +26,10 @@ #include "ppir.h" +static int cmp_int(const void *a, const void *b) +{ + return (*(int*)a - *(int*)b); +} static void ppir_schedule_calc_sched_info(ppir_instr *instr) { @@ -62,15 +66,7 @@ static void ppir_schedule_calc_sched_info(ppir_instr *instr) } /* sort */ - for (i = 0; i < n - 1; i++) { - for (int j = 0; j < n - i - 1; j++) { - if (reg[j] > reg[j + 1]) { - int tmp = reg[j + 1]; - reg[j + 1] = reg[j]; - reg[j] = tmp; - } - } - } + qsort(reg, n, sizeof(reg[0]), cmp_int); for (i = 0; i < n; i++) { int pressure = reg[i] + n - (i + 1); diff --git a/src/gallium/drivers/lima/lima_blit.c b/src/gallium/drivers/lima/lima_blit.c new file mode 100644 index 00000000000..0da8ee71727 --- /dev/null +++ b/src/gallium/drivers/lima/lima_blit.c @@ -0,0 +1,319 @@ +/* + * Copyright (C) 2022 Lima Project + * + * SPDX-License-Identifier: MIT + * + */ + +#include "drm-uapi/lima_drm.h" + +#include "util/u_math.h" +#include "util/format/u_format.h" +#include "util/u_surface.h" +#include "util/u_inlines.h" +#include "util/hash_table.h" + +#include "lima_context.h" +#include "lima_gpu.h" +#include "lima_resource.h" +#include "lima_texture.h" +#include "lima_format.h" +#include "lima_job.h" +#include "lima_screen.h" +#include "lima_bo.h" +#include "lima_parser.h" +#include "lima_util.h" +#include "lima_blit.h" + +void +lima_pack_blit_cmd(struct lima_job *job, + struct util_dynarray *cmd_array, + struct pipe_surface *psurf, + const struct pipe_box *src, + const struct pipe_box *dst, + unsigned filter, + bool scissor, + unsigned sample_mask, + unsigned mrt_idx) +{ + #define lima_blit_render_state_offset 0x0000 + #define lima_blit_gl_pos_offset 0x0040 + #define lima_blit_varying_offset 0x0080 + #define lima_blit_tex_desc_offset 0x00c0 + #define lima_blit_tex_array_offset 0x0100 + #define lima_blit_buffer_size 0x0140 + + struct lima_context *ctx = job->ctx; + struct lima_surface *surf = lima_surface(psurf); + int level = psurf->u.tex.level; + unsigned first_layer = psurf->u.tex.first_layer; + float fb_width = dst->width, fb_height = dst->height; + + uint32_t va; + void *cpu = lima_job_create_stream_bo( + job, LIMA_PIPE_PP, lima_blit_buffer_size, &va); + + struct lima_screen *screen = lima_screen(ctx->base.screen); + + uint32_t reload_shader_first_instr_size = + ((uint32_t *)(screen->pp_buffer->map + pp_reload_program_offset))[0] & 0x1f; + uint32_t reload_shader_va = screen->pp_buffer->va + pp_reload_program_offset; + + struct lima_render_state reload_render_state = { + .alpha_blend = 0xf03b1ad2, + .depth_test = 0x0000000e, + .depth_range = 0xffff0000, + .stencil_front = 0x00000007, + .stencil_back = 0x00000007, + .multi_sample = 0x00000007, + .shader_address = reload_shader_va | reload_shader_first_instr_size, + .varying_types = 0x00000001, + .textures_address = va + lima_blit_tex_array_offset, + .aux0 = 0x00004021, + .varyings_address = va + lima_blit_varying_offset, + }; + + reload_render_state.multi_sample |= (sample_mask << 12); + + if (job->key.cbuf) { + fb_width = job->key.cbuf->width; + fb_height = job->key.cbuf->height; + } else { + fb_width = job->key.zsbuf->width; + fb_height = job->key.zsbuf->height; + } + + if (util_format_is_depth_or_stencil(psurf->format)) { + reload_render_state.alpha_blend &= 0x0fffffff; + if (psurf->format != PIPE_FORMAT_Z16_UNORM) + reload_render_state.depth_test |= 0x400; + if (surf->reload & PIPE_CLEAR_DEPTH) + reload_render_state.depth_test |= 0x801; + if (surf->reload & PIPE_CLEAR_STENCIL) { + reload_render_state.depth_test |= 0x1000; + reload_render_state.stencil_front = 0x0000024f; + reload_render_state.stencil_back = 0x0000024f; + reload_render_state.stencil_test = 0x0000ffff; + } + } + + memcpy(cpu + lima_blit_render_state_offset, &reload_render_state, + sizeof(reload_render_state)); + + lima_tex_desc *td = cpu + lima_blit_tex_desc_offset; + memset(td, 0, lima_min_tex_desc_size); + lima_texture_desc_set_res(ctx, td, psurf->texture, level, level, + first_layer, mrt_idx); + td->format = lima_format_get_texel_reload(psurf->format); + td->unnorm_coords = 1; + td->sampler_dim = LIMA_SAMPLER_DIM_2D; + td->min_img_filter_nearest = 1; + td->mag_img_filter_nearest = 1; + td->wrap_s = LIMA_TEX_WRAP_CLAMP_TO_EDGE; + td->wrap_t = LIMA_TEX_WRAP_CLAMP_TO_EDGE; + td->wrap_r = LIMA_TEX_WRAP_CLAMP_TO_EDGE; + + if (filter != PIPE_TEX_FILTER_NEAREST) { + td->min_img_filter_nearest = 0; + td->mag_img_filter_nearest = 0; + } + + uint32_t *ta = cpu + lima_blit_tex_array_offset; + ta[0] = va + lima_blit_tex_desc_offset; + + float reload_gl_pos[] = { + dst->x + dst->width, dst->y, 0, 1, + dst->x, dst->y, 0, 1, + dst->x, dst->y + dst->height, 0, 1, + }; + memcpy(cpu + lima_blit_gl_pos_offset, reload_gl_pos, + sizeof(reload_gl_pos)); + + float reload_varying[] = { + src->x + src->width, src->y, + src->x, src->y, + src->x, src->y + src->height, + 0, 0, /* unused */ + }; + memcpy(cpu + lima_blit_varying_offset, reload_varying, + sizeof(reload_varying)); + + PLBU_CMD_BEGIN(cmd_array, scissor ? 22 : 20); + + PLBU_CMD_VIEWPORT_LEFT(0); + PLBU_CMD_VIEWPORT_RIGHT(fui(fb_width)); + PLBU_CMD_VIEWPORT_BOTTOM(0); + PLBU_CMD_VIEWPORT_TOP(fui(fb_height)); + + PLBU_CMD_RSW_VERTEX_ARRAY( + va + lima_blit_render_state_offset, + va + lima_blit_gl_pos_offset); + + + if (scissor) { + int minx = MIN2(dst->x, dst->x + dst->width); + int maxx = MAX2(dst->x, dst->x + dst->width); + int miny = MIN2(dst->y, dst->y + dst->height); + int maxy = MAX2(dst->y, dst->y + dst->height); + + PLBU_CMD_SCISSORS(minx, maxx, miny, maxy); + lima_damage_rect_union(&job->damage_rect, minx, maxx, miny, maxy); + } + + PLBU_CMD_UNKNOWN2(); + PLBU_CMD_UNKNOWN1(); + + PLBU_CMD_INDICES(screen->pp_buffer->va + pp_shared_index_offset); + PLBU_CMD_INDEXED_DEST(va + lima_blit_gl_pos_offset); + PLBU_CMD_DRAW_ELEMENTS(0xf, 0, 3); + + PLBU_CMD_END(); + + lima_dump_command_stream_print(job->dump, cpu, lima_blit_buffer_size, + false, "blit plbu cmd at va %x\n", va); +} + +static struct pipe_surface * +lima_get_blit_surface(struct pipe_context *pctx, + struct pipe_resource *prsc, + unsigned level) +{ + struct pipe_surface tmpl; + + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.format = prsc->format; + tmpl.u.tex.level = level; + tmpl.u.tex.first_layer = 0; + tmpl.u.tex.last_layer = 0; + + return pctx->create_surface(pctx, prsc, &tmpl); +} + +bool +lima_do_blit(struct pipe_context *pctx, + const struct pipe_blit_info *info) +{ + struct lima_context *ctx = lima_context(pctx); + unsigned reload_flags = PIPE_CLEAR_COLOR0; + uint8_t identity[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, + PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }; + + if (lima_debug & LIMA_DEBUG_NO_BLIT) + return false; + + /* Blitting of swizzled formats (R and RG) isn't implemented yet */ + if (memcmp(identity, + lima_format_get_texel_swizzle(info->src.resource->format), + sizeof(identity))) + return false; + + if (memcmp(identity, + lima_format_get_texel_swizzle(info->dst.resource->format), + sizeof(identity))) + return false; + + if (util_format_is_depth_or_stencil(info->src.resource->format)) { + const struct util_format_description *desc = + util_format_description(info->src.resource->format); + reload_flags = 0; + if (util_format_has_depth(desc)) + reload_flags |= PIPE_CLEAR_DEPTH; + if (util_format_has_stencil(desc)) + reload_flags |= PIPE_CLEAR_STENCIL; + } + + if (!lima_format_pixel_supported(info->dst.resource->format)) + return false; + + if (!lima_format_texel_supported(info->src.resource->format)) + return false; + + if (info->dst.resource->target != PIPE_TEXTURE_2D || + info->src.resource->target != PIPE_TEXTURE_2D) + return false; + + if (info->dst.box.x < 0 || info->dst.box.y < 0 || + info->src.box.x < 0 || info->src.box.y < 0) + return false; + + if (info->src.box.depth != 1 || + info->dst.box.depth != 1) + return false; + + /* Scissored blit isn't implemented yet */ + if (info->scissor_enable) + return false; + + if ((reload_flags & PIPE_CLEAR_COLOR) && !(info->mask & PIPE_MASK_RGBA)) + return false; + + if ((reload_flags & PIPE_CLEAR_DEPTH) && !(info->mask & PIPE_MASK_Z)) + return false; + + if ((reload_flags & PIPE_CLEAR_STENCIL) && !(info->mask & PIPE_MASK_S)) + return false; + + struct pipe_surface *dst_surf = + lima_get_blit_surface(pctx, info->dst.resource, info->dst.level); + struct lima_surface *lima_dst_surf = lima_surface(dst_surf); + + struct pipe_surface *src_surf = + lima_get_blit_surface(pctx, info->src.resource, info->src.level); + + struct lima_job *job; + + if (util_format_is_depth_or_stencil(info->dst.resource->format)) + job = lima_job_get_with_fb(ctx, NULL, dst_surf); + else + job = lima_job_get_with_fb(ctx, dst_surf, NULL); + + struct lima_resource *src_res = lima_resource(src_surf->texture); + struct lima_resource *dst_res = lima_resource(dst_surf->texture); + + lima_flush_job_accessing_bo(ctx, src_res->bo, true); + lima_flush_job_accessing_bo(ctx, dst_res->bo, true); + + lima_job_add_bo(job, LIMA_PIPE_PP, src_res->bo, LIMA_SUBMIT_BO_READ); + _mesa_hash_table_insert(ctx->write_jobs, &dst_res->base, job); + lima_job_add_bo(job, LIMA_PIPE_PP, dst_res->bo, LIMA_SUBMIT_BO_WRITE); + + if (info->src.resource->nr_samples > 1) { + for (int i = 0; i < MIN2(info->src.resource->nr_samples, LIMA_MAX_SAMPLES); i++) { + lima_pack_blit_cmd(job, &job->plbu_cmd_array, + src_surf, &info->src.box, + &info->dst.box, info->filter, true, + 1 << i, i); + } + } else { + lima_pack_blit_cmd(job, &job->plbu_cmd_array, + src_surf, &info->src.box, + &info->dst.box, info->filter, true, + 0xf, 0); + } + + bool tile_aligned = false; + + if (info->dst.box.x == 0 && info->dst.box.y == 0 && + info->dst.box.width == lima_dst_surf->base.width && + info->dst.box.height == lima_dst_surf->base.height) + tile_aligned = true; + + if (info->dst.box.x % 16 == 0 && info->dst.box.y % 16 == 0 && + info->dst.box.width % 16 == 0 && info->dst.box.height % 16 == 0) + tile_aligned = true; + + /* Reload if dest is not aligned to tile boundaries */ + if (!tile_aligned) + lima_dst_surf->reload = reload_flags; + else + lima_dst_surf->reload = 0; + + job->resolve = reload_flags; + + lima_do_job(job); + + pipe_surface_reference(&dst_surf, NULL); + pipe_surface_reference(&src_surf, NULL); + + return true; +} diff --git a/src/gallium/drivers/lima/lima_blit.h b/src/gallium/drivers/lima/lima_blit.h new file mode 100644 index 00000000000..54a404ec9a7 --- /dev/null +++ b/src/gallium/drivers/lima/lima_blit.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2022 Lima Project + * + * SPDX-License-Identifier: MIT + * + */ + +#ifndef H_LIMA_BLIT +#define H_LIMA_BLIT + +#include <stdbool.h> + +struct util_dynarray; + +void +lima_pack_blit_cmd(struct lima_job *job, + struct util_dynarray *cmd, + struct pipe_surface *psurf, + const struct pipe_box *src, + const struct pipe_box *dst, + unsigned filter, + bool scissor, + unsigned sample_mask, + unsigned mrt_idx); + +bool lima_do_blit(struct pipe_context *ctx, + const struct pipe_blit_info *blit_info); + +#endif + diff --git a/src/gallium/drivers/lima/lima_bo.c b/src/gallium/drivers/lima/lima_bo.c index 9e6d46b05de..e2daba76b69 100644 --- a/src/gallium/drivers/lima/lima_bo.c +++ b/src/gallium/drivers/lima/lima_bo.c @@ -32,7 +32,7 @@ #include "util/u_hash_table.h" #include "util/u_math.h" #include "util/os_time.h" -#include "os/os_mman.h" +#include "util/os_mman.h" #include "frontend/drm_driver.h" diff --git a/src/gallium/drivers/lima/lima_context.c b/src/gallium/drivers/lima/lima_context.c index e14f5ae0fb1..802f308f48e 100644 --- a/src/gallium/drivers/lima/lima_context.c +++ b/src/gallium/drivers/lima/lima_context.c @@ -29,6 +29,7 @@ #include "util/u_debug.h" #include "util/ralloc.h" #include "util/u_inlines.h" +#include "util/u_debug_cb.h" #include "util/hash_table.h" #include "lima_screen.h" @@ -138,13 +139,15 @@ lima_context_destroy(struct pipe_context *pctx) struct lima_context *ctx = lima_context(pctx); struct lima_screen *screen = lima_screen(pctx->screen); - lima_job_fini(ctx); + if (ctx->jobs) + lima_job_fini(ctx); for (int i = 0; i < lima_ctx_buff_num; i++) pipe_resource_reference(&ctx->buffer_state[i].res, NULL); lima_program_fini(ctx); lima_state_fini(ctx); + util_unreference_framebuffer_state(&ctx->framebuffer.base); if (ctx->blitter) util_blitter_destroy(ctx->blitter); @@ -187,25 +190,13 @@ plb_pp_stream_compare(const void *key1, const void *key2) return memcmp(key1, key2, sizeof(struct lima_ctx_plb_pp_stream_key)) == 0; } -static void -lima_set_debug_callback(struct pipe_context *pctx, - const struct pipe_debug_callback *cb) -{ - struct lima_context *ctx = lima_context(pctx); - - if (cb) - ctx->debug = *cb; - else - memset(&ctx->debug, 0, sizeof(ctx->debug)); -} - struct pipe_context * lima_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) { struct lima_screen *screen = lima_screen(pscreen); struct lima_context *ctx; - ctx = rzalloc(screen, struct lima_context); + ctx = rzalloc(NULL, struct lima_context); if (!ctx) return NULL; @@ -215,9 +206,11 @@ lima_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) return NULL; } + ctx->sample_mask = (1 << LIMA_MAX_SAMPLES) - 1; + ctx->base.screen = pscreen; ctx->base.destroy = lima_context_destroy; - ctx->base.set_debug_callback = lima_set_debug_callback; + ctx->base.set_debug_callback = u_default_set_debug_callback; ctx->base.invalidate_resource = lima_invalidate_resource; lima_resource_context_init(ctx); diff --git a/src/gallium/drivers/lima/lima_context.h b/src/gallium/drivers/lima/lima_context.h index 86a668cb4dc..cd753660623 100644 --- a/src/gallium/drivers/lima/lima_context.h +++ b/src/gallium/drivers/lima/lima_context.h @@ -27,6 +27,7 @@ #include "util/list.h" #include "util/slab.h" +#include "util/u_debug.h" #include "pipe/p_context.h" #include "pipe/p_state.h" @@ -49,6 +50,9 @@ struct lima_fs_compiled_shader { struct { int shader_size; int stack_size; + int frag_color0_reg; + int frag_color1_reg; + int frag_depth_reg; bool uses_discard; } state; }; @@ -200,6 +204,7 @@ struct lima_context { LIMA_CONTEXT_DIRTY_CLIP = (1 << 15), LIMA_CONTEXT_DIRTY_UNCOMPILED_VS = (1 << 16), LIMA_CONTEXT_DIRTY_UNCOMPILED_FS = (1 << 17), + LIMA_CONTEXT_DIRTY_SAMPLE_MASK = (1 << 18), } dirty; struct u_upload_mgr *uploader; @@ -209,6 +214,8 @@ struct lima_context { struct lima_context_framebuffer framebuffer; struct lima_context_viewport_state viewport; + /* input for PLBU_CMD_VIEWPORT_* */ + struct lima_context_viewport_state ext_viewport; struct pipe_scissor_state scissor; struct pipe_scissor_state clipped_scissor; struct lima_vs_compiled_shader *vs; @@ -227,6 +234,9 @@ struct lima_context { struct lima_texture_stateobj tex_stateobj; struct lima_pp_stream_state pp_stream; + #define LIMA_MAX_SAMPLES 4 + unsigned sample_mask; + unsigned min_index; unsigned max_index; @@ -270,8 +280,6 @@ struct lima_context { int id; - struct pipe_debug_callback debug; - unsigned index_offset; struct lima_resource *index_res; }; diff --git a/src/gallium/drivers/lima/lima_draw.c b/src/gallium/drivers/lima/lima_draw.c index 161fc7288a5..e64f1e88c1a 100644 --- a/src/gallium/drivers/lima/lima_draw.c +++ b/src/gallium/drivers/lima/lima_draw.c @@ -85,6 +85,32 @@ lima_clip_scissor_to_viewport(struct lima_context *ctx) cscissor->miny = cscissor->maxy; } +static void +lima_extend_viewport(struct lima_context *ctx, const struct pipe_draw_info *info) +{ + /* restore the original values */ + ctx->ext_viewport.left = ctx->viewport.left; + ctx->ext_viewport.right = ctx->viewport.right; + ctx->ext_viewport.bottom = ctx->viewport.bottom; + ctx->ext_viewport.top = ctx->viewport.top; + + if (info->mode != MESA_PRIM_LINES) + return; + + if (!ctx->rasterizer) + return; + + float line_width = ctx->rasterizer->base.line_width; + + if (line_width == 1.0f) + return; + + ctx->ext_viewport.left = ctx->viewport.left - line_width / 2; + ctx->ext_viewport.right = ctx->viewport.right + line_width / 2; + ctx->ext_viewport.bottom = ctx->viewport.bottom - line_width / 2; + ctx->ext_viewport.top = ctx->viewport.top + line_width / 2; +} + static bool lima_is_scissor_zero(struct lima_context *ctx) { @@ -121,17 +147,6 @@ lima_update_job_wb(struct lima_context *ctx, unsigned buffers) } static void -lima_damage_rect_union(struct pipe_scissor_state *rect, - unsigned minx, unsigned maxx, - unsigned miny, unsigned maxy) -{ - rect->minx = MIN2(rect->minx, minx); - rect->miny = MIN2(rect->miny, miny); - rect->maxx = MAX2(rect->maxx, maxx); - rect->maxy = MAX2(rect->maxy, maxy); -} - -static void lima_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor_state *scissor_state, const union pipe_color_union *color, double depth, unsigned stencil) { @@ -327,10 +342,10 @@ lima_pack_plbu_cmd(struct lima_context *ctx, const struct pipe_draw_info *info, struct lima_job *job = lima_job_get(ctx); PLBU_CMD_BEGIN(&job->plbu_cmd_array, 32); - PLBU_CMD_VIEWPORT_LEFT(fui(ctx->viewport.left)); - PLBU_CMD_VIEWPORT_RIGHT(fui(ctx->viewport.right)); - PLBU_CMD_VIEWPORT_BOTTOM(fui(ctx->viewport.bottom)); - PLBU_CMD_VIEWPORT_TOP(fui(ctx->viewport.top)); + PLBU_CMD_VIEWPORT_LEFT(fui(ctx->ext_viewport.left)); + PLBU_CMD_VIEWPORT_RIGHT(fui(ctx->ext_viewport.right)); + PLBU_CMD_VIEWPORT_BOTTOM(fui(ctx->ext_viewport.bottom)); + PLBU_CMD_VIEWPORT_TOP(fui(ctx->ext_viewport.top)); if (!info->index_size) PLBU_CMD_ARRAYS_SEMAPHORE_BEGIN(); @@ -348,11 +363,11 @@ lima_pack_plbu_cmd(struct lima_context *ctx, const struct pipe_draw_info *info, } /* Specify point size with PLBU command if shader doesn't write */ - if (info->mode == PIPE_PRIM_POINTS && ctx->vs->state.point_size_idx == -1) + if (info->mode == MESA_PRIM_POINTS && ctx->vs->state.point_size_idx == -1) force_point_size = true; /* Specify line width with PLBU command for lines */ - if (info->mode > PIPE_PRIM_POINTS && info->mode < PIPE_PRIM_TRIANGLES) + if (info->mode > MESA_PRIM_POINTS && info->mode < MESA_PRIM_TRIANGLES) force_point_size = true; PLBU_CMD_PRIMITIVE_SETUP(force_point_size, cull, info->index_size); @@ -377,10 +392,10 @@ lima_pack_plbu_cmd(struct lima_context *ctx, const struct pipe_draw_info *info, PLBU_CMD_DEPTH_RANGE_NEAR(fui(ctx->viewport.near)); PLBU_CMD_DEPTH_RANGE_FAR(fui(ctx->viewport.far)); - if ((info->mode == PIPE_PRIM_POINTS && ctx->vs->state.point_size_idx == -1) || - ((info->mode >= PIPE_PRIM_LINES) && (info->mode < PIPE_PRIM_TRIANGLES))) + if ((info->mode == MESA_PRIM_POINTS && ctx->vs->state.point_size_idx == -1) || + ((info->mode >= MESA_PRIM_LINES) && (info->mode < MESA_PRIM_TRIANGLES))) { - uint32_t v = info->mode == PIPE_PRIM_POINTS ? + uint32_t v = info->mode == MESA_PRIM_POINTS ? fui(ctx->rasterizer->base.point_size) : fui(ctx->rasterizer->base.line_width); PLBU_CMD_LOW_PRIM_SIZE(v); } @@ -424,106 +439,55 @@ lima_blend_func(enum pipe_blend_func pipe) } static int -lima_blend_factor_has_alpha(enum pipe_blendfactor pipe) -{ - /* Bit 4 is set if the blendfactor uses alpha */ - switch (pipe) { - case PIPE_BLENDFACTOR_SRC_ALPHA: - case PIPE_BLENDFACTOR_DST_ALPHA: - case PIPE_BLENDFACTOR_CONST_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - return 1; - - case PIPE_BLENDFACTOR_SRC_COLOR: - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - case PIPE_BLENDFACTOR_DST_COLOR: - case PIPE_BLENDFACTOR_INV_DST_COLOR: - case PIPE_BLENDFACTOR_CONST_COLOR: - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - case PIPE_BLENDFACTOR_ZERO: - case PIPE_BLENDFACTOR_ONE: - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return 0; - - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - return -1; /* not supported */ - } - return -1; -} - -static int -lima_blend_factor_is_inv(enum pipe_blendfactor pipe) -{ - /* Bit 3 is set if the blendfactor type is inverted */ - switch (pipe) { - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - case PIPE_BLENDFACTOR_INV_DST_COLOR: - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - case PIPE_BLENDFACTOR_ONE: - return 1; - - case PIPE_BLENDFACTOR_SRC_COLOR: - case PIPE_BLENDFACTOR_SRC_ALPHA: - case PIPE_BLENDFACTOR_DST_COLOR: - case PIPE_BLENDFACTOR_DST_ALPHA: - case PIPE_BLENDFACTOR_CONST_COLOR: - case PIPE_BLENDFACTOR_CONST_ALPHA: - case PIPE_BLENDFACTOR_ZERO: - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return 0; - - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - return -1; /* not supported */ - } - return -1; -} - -static int lima_blend_factor(enum pipe_blendfactor pipe) { - /* Bits 0-2 indicate the blendfactor type */ + /* Bits 0-2 indicate the blendfactor type, + * Bit 3 is set if blendfactor is inverted + * Bit 4 is set if blendfactor has alpha */ switch (pipe) { case PIPE_BLENDFACTOR_SRC_COLOR: + return 0 << 4 | 0 << 3 | 0; case PIPE_BLENDFACTOR_SRC_ALPHA: + return 1 << 4 | 0 << 3 | 0; case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return 0 << 4 | 1 << 3 | 0; case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - return 0; + return 1 << 4 | 1 << 3 | 0; case PIPE_BLENDFACTOR_DST_COLOR: + return 0 << 4 | 0 << 3 | 1; case PIPE_BLENDFACTOR_DST_ALPHA: + return 1 << 4 | 0 << 3 | 1; case PIPE_BLENDFACTOR_INV_DST_COLOR: + return 0 << 4 | 1 << 3 | 1; case PIPE_BLENDFACTOR_INV_DST_ALPHA: - return 1; + return 1 << 4 | 1 << 3 | 1; case PIPE_BLENDFACTOR_CONST_COLOR: + return 0 << 4 | 0 << 3 | 2; case PIPE_BLENDFACTOR_CONST_ALPHA: + return 1 << 4 | 0 << 3 | 2; case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return 0 << 4 | 1 << 3 | 2; case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - return 2; + return 1 << 4 | 1 << 3 | 2; case PIPE_BLENDFACTOR_ZERO: + return 0 << 4 | 0 << 3 | 3; case PIPE_BLENDFACTOR_ONE: - return 3; + return 0 << 4 | 1 << 3 | 3; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return 4; + return 0 << 4 | 0 << 3 | 4; case PIPE_BLENDFACTOR_SRC1_COLOR: + return 0 << 4 | 0 << 3 | 5; case PIPE_BLENDFACTOR_SRC1_ALPHA: + return 1 << 4 | 0 << 3 | 5; case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + return 0 << 4 | 1 << 3 | 5; case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - return -1; /* not supported */ + return 1 << 4 | 1 << 3 | 5; } return -1; } @@ -534,27 +498,37 @@ lima_calculate_alpha_blend(enum pipe_blend_func rgb_func, enum pipe_blend_func a enum pipe_blendfactor alpha_src_factor, enum pipe_blendfactor alpha_dst_factor) { /* PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE has to be changed to PIPE_BLENDFACTOR_ONE - * if it is set for alpha_src. + * if it is set for alpha_src or alpha_dst. */ if (alpha_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) alpha_src_factor = PIPE_BLENDFACTOR_ONE; + if (alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) + alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + + /* MIN and MAX ops actually do OP(As * S + Ad * D, Ad), so + * we need to set S to 1 and D to 0 to get correct result */ + if (alpha_func == PIPE_BLEND_MIN || + alpha_func == PIPE_BLEND_MAX) { + alpha_src_factor = PIPE_BLENDFACTOR_ONE; + alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + } + + /* MIN and MAX ops actually do OP(Cs * S + Cd * D, Cd), so + * we need to set S to 1 and D to 0 to get correct result */ + if (rgb_func == PIPE_BLEND_MIN || + rgb_func == PIPE_BLEND_MAX) { + rgb_src_factor = PIPE_BLENDFACTOR_ONE; + rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + } + return lima_blend_func(rgb_func) | (lima_blend_func(alpha_func) << 3) | - (lima_blend_factor(rgb_src_factor) << 6) | - (lima_blend_factor_is_inv(rgb_src_factor) << 9) | - (lima_blend_factor_has_alpha(rgb_src_factor) << 10) | - (lima_blend_factor(rgb_dst_factor) << 11) | - (lima_blend_factor_is_inv(rgb_dst_factor) << 14) | - (lima_blend_factor_has_alpha(rgb_dst_factor) << 15) | - - (lima_blend_factor(alpha_src_factor) << 16) | - (lima_blend_factor_is_inv(alpha_src_factor) << 19) | - - (lima_blend_factor(alpha_dst_factor) << 20) | - (lima_blend_factor_is_inv(alpha_dst_factor) << 23) | + /* alpha_src and alpha_dst are 4 bit, so need to mask 5th bit */ + ((lima_blend_factor(alpha_src_factor) & 0xf) << 16) | + ((lima_blend_factor(alpha_dst_factor) & 0xf) << 20) | 0x0C000000; /* need to check if this is GLESv1 glAlphaFunc */ } @@ -600,8 +574,7 @@ lima_calculate_depth_test(struct pipe_depth_stencil_alpha_state *depth, return (depth->depth_enabled && depth->depth_writemask) | ((int)func << 1) | (offset_scale << 16) | - (offset_units << 24) | - 0x30; /* find out what is this */ + (offset_units << 24); } static void @@ -647,20 +620,22 @@ lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *in struct pipe_rasterizer_state *rst = &ctx->rasterizer->base; render->depth_test = lima_calculate_depth_test(&ctx->zsa->base, rst); - ushort far, near; + if (!rst->depth_clip_near || ctx->viewport.near == 0.0f) + render->depth_test |= 0x10; /* don't clip depth near */ + if (!rst->depth_clip_far || ctx->viewport.far == 1.0f) + render->depth_test |= 0x20; /* don't clip depth far */ + + if (fs->state.frag_depth_reg != -1) { + render->depth_test |= (fs->state.frag_depth_reg << 6); + /* Shader writes depth */ + render->depth_test |= 0x801; + } + + uint16_t far, near; near = float_to_ushort(ctx->viewport.near); far = float_to_ushort(ctx->viewport.far); - /* Insert a small 'epsilon' difference between 'near' and 'far' when - * they are equal, to avoid application bugs. */ - if (far == near) { - if (near > 0) - near--; - if (far < USHRT_MAX) - far++; - } - /* overlap with plbu? any place can remove one? */ render->depth_range = near | (far << 16); @@ -699,14 +674,25 @@ lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *in } /* need more investigation */ - if (info->mode == PIPE_PRIM_POINTS) - render->multi_sample = 0x0000F000; - else if (info->mode < PIPE_PRIM_TRIANGLES) - render->multi_sample = 0x0000F400; + if (info->mode == MESA_PRIM_POINTS) + render->multi_sample = 0x00000000; + else if (info->mode < MESA_PRIM_TRIANGLES) + render->multi_sample = 0x00000400; else - render->multi_sample = 0x0000F800; + render->multi_sample = 0x00000800; if (ctx->framebuffer.base.samples) render->multi_sample |= 0x68; + if (ctx->blend->base.alpha_to_coverage) + render->multi_sample |= (1 << 7); + if (ctx->blend->base.alpha_to_one) + render->multi_sample |= (1 << 8); + render->multi_sample |= (ctx->sample_mask << 12); + + /* Set gl_FragColor register, need to specify it 4 times */ + render->multi_sample |= (fs->state.frag_color0_reg << 28) | + (fs->state.frag_color0_reg << 24) | + (fs->state.frag_color0_reg << 20) | + (fs->state.frag_color0_reg << 16); /* alpha test */ if (ctx->zsa->base.alpha_enabled) { @@ -726,12 +712,17 @@ lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *in render->textures_address = 0x00000000; render->aux0 = (ctx->vs->state.varying_stride >> 3); - render->aux1 = 0x00001000; + render->aux1 = 0x00000000; + if (ctx->rasterizer->base.front_ccw) + render->aux1 = 0x00001000; + if (ctx->blend->base.dither) render->aux1 |= 0x00002000; if (fs->state.uses_discard || - ctx->zsa->base.alpha_enabled) { + ctx->zsa->base.alpha_enabled || + fs->state.frag_depth_reg != -1 || + ctx->blend->base.alpha_to_coverage) { early_z = false; pixel_kill = false; } @@ -770,6 +761,10 @@ lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *in render->aux1 |= 0x10000; } + /* Set secondary output color */ + if (fs->state.frag_color1_reg != -1) + render->aux0 |= (fs->state.frag_color1_reg << 28); + if (ctx->vs->state.num_varyings) { render->varying_types = 0x00000000; render->varyings_address = ctx->gp_output->va + @@ -842,8 +837,8 @@ lima_update_gp_attribute_info(struct lima_context *ctx, const struct pipe_draw_i unsigned start = info->index_size ? (ctx->min_index + draw->index_bias) : draw->start; attribute[n++] = res->bo->va + pvb->buffer_offset + pve->src_offset - + start * pvb->stride; - attribute[n++] = (pvb->stride << 11) | + + start * pve->src_stride; + attribute[n++] = (pve->src_stride << 11) | (lima_pipe_format_to_attrib_type(pve->src_format) << 2) | (util_format_get_nr_components(pve->src_format) - 1); } @@ -1173,6 +1168,10 @@ lima_draw_vbo(struct pipe_context *pctx, if (lima_is_scissor_zero(ctx)) return; + /* extend the viewport in case of line draws with a line_width > 1.0f, + * otherwise use the original values */ + lima_extend_viewport(ctx, info); + if (!lima_update_fs_state(ctx) || !lima_update_vs_state(ctx)) return; @@ -1182,10 +1181,12 @@ lima_draw_vbo(struct pipe_context *pctx, lima_dump_command_stream_print( job->dump, ctx->vs->bo->map, ctx->vs->state.shader_size, false, "add vs at va %x\n", ctx->vs->bo->va); + lima_dump_shader(job->dump, ctx->vs->bo->map, ctx->vs->state.shader_size, false); lima_dump_command_stream_print( job->dump, ctx->fs->bo->map, ctx->fs->state.shader_size, false, "add fs at va %x\n", ctx->fs->bo->va); + lima_dump_shader(job->dump, ctx->fs->bo->map, ctx->fs->state.shader_size, true); lima_job_add_bo(job, LIMA_PIPE_GP, ctx->vs->bo, LIMA_SUBMIT_BO_READ); lima_job_add_bo(job, LIMA_PIPE_PP, ctx->fs->bo, LIMA_SUBMIT_BO_READ); @@ -1201,7 +1202,6 @@ lima_draw_vbo(struct pipe_context *pctx, if (job->draws > MAX_DRAWS_PER_JOB) { unsigned resolve = job->resolve; lima_do_job(job); - job = lima_job_get(ctx); /* Subsequent job will need to resolve the same buffers */ lima_update_job_wb(ctx, resolve); } diff --git a/src/gallium/drivers/lima/lima_format.h b/src/gallium/drivers/lima/lima_format.h index fb472641d6e..781f77e38e9 100644 --- a/src/gallium/drivers/lima/lima_format.h +++ b/src/gallium/drivers/lima/lima_format.h @@ -26,7 +26,7 @@ #include <stdbool.h> -#include <pipe/p_format.h> +#include <util/format/u_formats.h> bool lima_format_texel_supported(enum pipe_format f); bool lima_format_pixel_supported(enum pipe_format f); diff --git a/src/gallium/drivers/lima/lima_gpu.h b/src/gallium/drivers/lima/lima_gpu.h index 4d35640592f..4e3ea65d026 100644 --- a/src/gallium/drivers/lima/lima_gpu.h +++ b/src/gallium/drivers/lima/lima_gpu.h @@ -73,9 +73,9 @@ struct lima_pp_wb_reg { uint32_t downsample_factor; uint32_t pixel_layout; uint32_t pitch; + uint32_t flags; uint32_t mrt_bits; uint32_t mrt_pitch; - uint32_t zero; uint32_t unused0; uint32_t unused1; uint32_t unused2; @@ -120,11 +120,15 @@ struct lima_render_state { plbu_cmd[i++] = v2; \ } while (0) +#define PLBU_BLOCK_W_MASK 0xff +#define PLBU_BLOCK_H_MASK 0xff + #define PLBU_CMD_BLOCK_STEP(shift_min, shift_h, shift_w) \ PLBU_CMD(((shift_min) << 28) | ((shift_h) << 16) | (shift_w), 0x1000010C) #define PLBU_CMD_TILED_DIMENSIONS(tiled_w, tiled_h) \ PLBU_CMD((((tiled_w) - 1) << 24) | (((tiled_h) - 1) << 8), 0x10000109) -#define PLBU_CMD_BLOCK_STRIDE(block_w) PLBU_CMD((block_w) & 0xff, 0x30000000) +#define PLBU_CMD_BLOCK_STRIDE(block_w) \ + PLBU_CMD((block_w) & PLBU_BLOCK_W_MASK, 0x30000000) #define PLBU_CMD_ARRAY_ADDRESS(gp_stream, block_num) \ PLBU_CMD(gp_stream, 0x28000000 | ((block_num) - 1) | 1) #define PLBU_CMD_VIEWPORT_LEFT(v) PLBU_CMD(v, 0x10000107) diff --git a/src/gallium/drivers/lima/lima_job.c b/src/gallium/drivers/lima/lima_job.c index ef8a6444cb9..6400fdb2dd9 100644 --- a/src/gallium/drivers/lima/lima_job.c +++ b/src/gallium/drivers/lima/lima_job.c @@ -34,6 +34,7 @@ #include "util/format/u_format.h" #include "util/u_upload_mgr.h" #include "util/u_inlines.h" +#include "util/u_framebuffer.h" #include "lima_screen.h" #include "lima_context.h" @@ -45,6 +46,7 @@ #include "lima_texture.h" #include "lima_fence.h" #include "lima_gpu.h" +#include "lima_blit.h" #define VOID2U64(x) ((uint64_t)(unsigned long)(x)) @@ -53,9 +55,19 @@ lima_get_fb_info(struct lima_job *job) { struct lima_context *ctx = job->ctx; struct lima_job_fb_info *fb = &job->fb; + struct lima_surface *surf = lima_surface(job->key.cbuf); + + if (!surf) + surf = lima_surface(job->key.zsbuf); - fb->width = ctx->framebuffer.base.width; - fb->height = ctx->framebuffer.base.height; + if (!surf) { + /* We don't have neither cbuf nor zsbuf, use dimensions from ctx */ + fb->width = ctx->framebuffer.base.width; + fb->height = ctx->framebuffer.base.height; + } else { + fb->width = surf->base.width; + fb->height = surf->base.height; + } int width = align(fb->width, 16) >> 4; int height = align(fb->height, 16) >> 4; @@ -69,8 +81,9 @@ lima_get_fb_info(struct lima_job *job) fb->shift_w = 0; int limit = screen->plb_max_blk; - while ((width * height) > limit) { - if (width >= height) { + while ((width * height) > limit || + width > PLBU_BLOCK_W_MASK || height > PLBU_BLOCK_H_MASK) { + if (width >= height || width > PLBU_BLOCK_W_MASK) { width = (width + 1) >> 1; fb->shift_w++; } else { @@ -86,7 +99,9 @@ lima_get_fb_info(struct lima_job *job) } static struct lima_job * -lima_job_create(struct lima_context *ctx) +lima_job_create(struct lima_context *ctx, + struct pipe_surface *cbuf, + struct pipe_surface *zsbuf) { struct lima_job *s; @@ -112,9 +127,8 @@ lima_job_create(struct lima_context *ctx) util_dynarray_init(&s->plbu_cmd_array, s); util_dynarray_init(&s->plbu_cmd_head, s); - struct lima_context_framebuffer *fb = &ctx->framebuffer; - pipe_surface_reference(&s->key.cbuf, fb->base.cbufs[0]); - pipe_surface_reference(&s->key.zsbuf, fb->base.zsbuf); + pipe_surface_reference(&s->key.cbuf, cbuf); + pipe_surface_reference(&s->key.zsbuf, zsbuf); lima_get_fb_info(s); @@ -145,20 +159,21 @@ lima_job_free(struct lima_job *job) ralloc_free(job); } -static struct lima_job * -_lima_job_get(struct lima_context *ctx) +struct lima_job * +lima_job_get_with_fb(struct lima_context *ctx, + struct pipe_surface *cbuf, + struct pipe_surface *zsbuf) { - struct lima_context_framebuffer *fb = &ctx->framebuffer; struct lima_job_key local_key = { - .cbuf = fb->base.cbufs[0], - .zsbuf = fb->base.zsbuf, + .cbuf = cbuf, + .zsbuf = zsbuf, }; struct hash_entry *entry = _mesa_hash_table_search(ctx->jobs, &local_key); if (entry) return entry->data; - struct lima_job *job = lima_job_create(ctx); + struct lima_job *job = lima_job_create(ctx, cbuf, zsbuf); if (!job) return NULL; @@ -167,6 +182,14 @@ _lima_job_get(struct lima_context *ctx) return job; } +static struct lima_job * +_lima_job_get(struct lima_context *ctx) +{ + struct lima_context_framebuffer *fb = &ctx->framebuffer; + + return lima_job_get_with_fb(ctx, fb->base.cbufs[0], fb->base.zsbuf); +} + /* * Note: this function can only be called in draw code path, * must not exist in flush code path. @@ -337,112 +360,35 @@ lima_fb_zsbuf_needs_reload(struct lima_job *job) static void lima_pack_reload_plbu_cmd(struct lima_job *job, struct pipe_surface *psurf) { - #define lima_reload_render_state_offset 0x0000 - #define lima_reload_gl_pos_offset 0x0040 - #define lima_reload_varying_offset 0x0080 - #define lima_reload_tex_desc_offset 0x00c0 - #define lima_reload_tex_array_offset 0x0100 - #define lima_reload_buffer_size 0x0140 - + struct lima_job_fb_info *fb = &job->fb; struct lima_context *ctx = job->ctx; - struct lima_surface *surf = lima_surface(psurf); - int level = psurf->u.tex.level; - unsigned first_layer = psurf->u.tex.first_layer; - - uint32_t va; - void *cpu = lima_job_create_stream_bo( - job, LIMA_PIPE_PP, lima_reload_buffer_size, &va); - - struct lima_screen *screen = lima_screen(ctx->base.screen); + struct pipe_box src = { + .x = 0, + .y = 0, + .width = fb->width, + .height = fb->height, + }; - uint32_t reload_shader_first_instr_size = - ((uint32_t *)(screen->pp_buffer->map + pp_reload_program_offset))[0] & 0x1f; - uint32_t reload_shader_va = screen->pp_buffer->va + pp_reload_program_offset; - - struct lima_render_state reload_render_state = { - .alpha_blend = 0xf03b1ad2, - .depth_test = 0x0000000e, - .depth_range = 0xffff0000, - .stencil_front = 0x00000007, - .stencil_back = 0x00000007, - .multi_sample = 0x0000f007, - .shader_address = reload_shader_va | reload_shader_first_instr_size, - .varying_types = 0x00000001, - .textures_address = va + lima_reload_tex_array_offset, - .aux0 = 0x00004021, - .varyings_address = va + lima_reload_varying_offset, + struct pipe_box dst = { + .x = 0, + .y = 0, + .width = fb->width, + .height = fb->height, }; - if (util_format_is_depth_or_stencil(psurf->format)) { - reload_render_state.alpha_blend &= 0x0fffffff; - if (psurf->format != PIPE_FORMAT_Z16_UNORM) - reload_render_state.depth_test |= 0x400; - if (surf->reload & PIPE_CLEAR_DEPTH) - reload_render_state.depth_test |= 0x801; - if (surf->reload & PIPE_CLEAR_STENCIL) { - reload_render_state.depth_test |= 0x1000; - reload_render_state.stencil_front = 0x0000024f; - reload_render_state.stencil_back = 0x0000024f; - reload_render_state.stencil_test = 0x0000ffff; + if (ctx->framebuffer.base.samples > 1) { + for (int i = 0; i < LIMA_MAX_SAMPLES; i++) { + lima_pack_blit_cmd(job, &job->plbu_cmd_head, + psurf, &src, &dst, + PIPE_TEX_FILTER_NEAREST, false, + (1 << i), i); } + } else { + lima_pack_blit_cmd(job, &job->plbu_cmd_head, + psurf, &src, &dst, + PIPE_TEX_FILTER_NEAREST, false, + 0xf, 0); } - - memcpy(cpu + lima_reload_render_state_offset, &reload_render_state, - sizeof(reload_render_state)); - - lima_tex_desc *td = cpu + lima_reload_tex_desc_offset; - memset(td, 0, lima_min_tex_desc_size); - lima_texture_desc_set_res(ctx, td, psurf->texture, level, level, first_layer); - td->format = lima_format_get_texel_reload(psurf->format); - td->unnorm_coords = 1; - td->texture_type = LIMA_TEXTURE_TYPE_2D; - td->min_img_filter_nearest = 1; - td->mag_img_filter_nearest = 1; - td->wrap_s_clamp_to_edge = 1; - td->wrap_t_clamp_to_edge = 1; - td->unknown_2_2 = 0x1; - - uint32_t *ta = cpu + lima_reload_tex_array_offset; - ta[0] = va + lima_reload_tex_desc_offset; - - struct lima_job_fb_info *fb = &job->fb; - float reload_gl_pos[] = { - fb->width, 0, 0, 1, - 0, 0, 0, 1, - 0, fb->height, 0, 1, - }; - memcpy(cpu + lima_reload_gl_pos_offset, reload_gl_pos, - sizeof(reload_gl_pos)); - - float reload_varying[] = { - fb->width, 0, 0, 0, - 0, fb->height, 0, 0, - }; - memcpy(cpu + lima_reload_varying_offset, reload_varying, - sizeof(reload_varying)); - - PLBU_CMD_BEGIN(&job->plbu_cmd_head, 20); - - PLBU_CMD_VIEWPORT_LEFT(0); - PLBU_CMD_VIEWPORT_RIGHT(fui(fb->width)); - PLBU_CMD_VIEWPORT_BOTTOM(0); - PLBU_CMD_VIEWPORT_TOP(fui(fb->height)); - - PLBU_CMD_RSW_VERTEX_ARRAY( - va + lima_reload_render_state_offset, - va + lima_reload_gl_pos_offset); - - PLBU_CMD_UNKNOWN2(); - PLBU_CMD_UNKNOWN1(); - - PLBU_CMD_INDICES(screen->pp_buffer->va + pp_shared_index_offset); - PLBU_CMD_INDEXED_DEST(va + lima_reload_gl_pos_offset); - PLBU_CMD_DRAW_ELEMENTS(0xf, 0, 3); - - PLBU_CMD_END(); - - lima_dump_command_stream_print(job->dump, cpu, lima_reload_buffer_size, - false, "reload plbu cmd at va %x\n", va); } static void @@ -453,6 +399,9 @@ lima_pack_head_plbu_cmd(struct lima_job *job) PLBU_CMD_BEGIN(&job->plbu_cmd_head, 10); + assert((fb->block_w & PLBU_BLOCK_W_MASK) == fb->block_w); + assert((fb->block_h & PLBU_BLOCK_H_MASK) == fb->block_h); + PLBU_CMD_UNKNOWN2(); PLBU_CMD_BLOCK_STEP(fb->shift_min, fb->shift_h, fb->shift_w); PLBU_CMD_TILED_DIMENSIONS(fb->tiled_w, fb->tiled_h); @@ -464,8 +413,9 @@ lima_pack_head_plbu_cmd(struct lima_job *job) PLBU_CMD_END(); - if (lima_fb_cbuf_needs_reload(job)) + if (lima_fb_cbuf_needs_reload(job)) { lima_pack_reload_plbu_cmd(job, job->key.cbuf); + } if (lima_fb_zsbuf_needs_reload(job)) lima_pack_reload_plbu_cmd(job, job->key.zsbuf); @@ -543,7 +493,8 @@ lima_generate_pp_stream(struct lima_job *job, int off_x, int off_y, struct lima_pp_stream_state *ps = &ctx->pp_stream; struct lima_job_fb_info *fb = &job->fb; struct lima_screen *screen = lima_screen(ctx->base.screen); - int i, num_pp = screen->num_pp; + int num_pp = screen->num_pp; + assert(num_pp > 0); /* use hilbert_coords to generates 1D to 2D relationship. * 1D for pp stream index and 2D for plb block x/y on framebuffer. @@ -565,10 +516,10 @@ lima_generate_pp_stream(struct lima_job *job, int off_x, int off_y, count = 1 << (dim + dim); } - for (i = 0; i < num_pp; i++) + for (int i = 0; i < num_pp; i++) stream[i] = ps->map + ps->offset[i]; - for (i = 0; i < count; i++) { + for (int i = 0; i < count; i++) { int x, y; hilbert_coords(max, i, &x, &y); if (x < tiled_w && y < tiled_h) { @@ -589,7 +540,7 @@ lima_generate_pp_stream(struct lima_job *job, int off_x, int off_y, } } - for (i = 0; i < num_pp; i++) { + for (int i = 0; i < num_pp; i++) { stream[i][si[i]++] = 0; stream[i][si[i]++] = 0xBC000000; stream[i][si[i]++] = 0; @@ -800,7 +751,13 @@ lima_pack_wb_zsbuf_reg(struct lima_job *job, uint32_t *wb_reg, int wb_idx) wb[wb_idx].pixel_layout = 0x0; wb[wb_idx].pitch = res->levels[level].stride / 8; } - wb[wb_idx].mrt_bits = 0; + wb[wb_idx].flags = 0; + unsigned nr_samples = zsbuf->nr_samples ? + zsbuf->nr_samples : MAX2(1, zsbuf->texture->nr_samples); + if (nr_samples > 1) { + wb[wb_idx].mrt_pitch = res->mrt_pitch; + wb[wb_idx].mrt_bits = u_bit_consecutive(0, nr_samples); + } } static void @@ -829,7 +786,13 @@ lima_pack_wb_cbuf_reg(struct lima_job *job, uint32_t *frame_reg, wb[wb_idx].pixel_layout = 0x0; wb[wb_idx].pitch = res->levels[level].stride / 8; } - wb[wb_idx].mrt_bits = swap_channels ? 0x4 : 0x0; + wb[wb_idx].flags = swap_channels ? 0x4 : 0x0; + unsigned nr_samples = cbuf->nr_samples ? + cbuf->nr_samples : MAX2(1, cbuf->texture->nr_samples); + if (nr_samples > 1) { + wb[wb_idx].mrt_pitch = res->mrt_pitch; + wb[wb_idx].mrt_bits = u_bit_consecutive(0, nr_samples); + } } static void @@ -949,7 +912,7 @@ lima_do_job(struct lima_job *job) fprintf(stderr, "gp job error\n"); if (job->dump) { - if (lima_job_wait(job, LIMA_PIPE_GP, PIPE_TIMEOUT_INFINITE)) { + if (lima_job_wait(job, LIMA_PIPE_GP, OS_TIMEOUT_INFINITE)) { if (ctx->gp_output) { float *pos = lima_bo_map(ctx->gp_output); lima_dump_command_stream_print( @@ -1030,7 +993,7 @@ lima_do_job(struct lima_job *job) } if (job->dump) { - if (!lima_job_wait(job, LIMA_PIPE_PP, PIPE_TIMEOUT_INFINITE)) { + if (!lima_job_wait(job, LIMA_PIPE_PP, OS_TIMEOUT_INFINITE)) { fprintf(stderr, "pp wait error\n"); exit(1); } @@ -1111,6 +1074,14 @@ lima_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, } } +static void +lima_texture_barrier(struct pipe_context *pctx, unsigned flags) +{ + struct lima_context *ctx = lima_context(pctx); + + lima_flush(ctx); +} + static bool lima_job_compare(const void *s1, const void *s2) { @@ -1145,6 +1116,7 @@ bool lima_job_init(struct lima_context *ctx) } ctx->base.flush = lima_pipe_flush; + ctx->base.texture_barrier = lima_texture_barrier; return true; } diff --git a/src/gallium/drivers/lima/lima_job.h b/src/gallium/drivers/lima/lima_job.h index a43b8be1c10..0eb05a5378c 100644 --- a/src/gallium/drivers/lima/lima_job.h +++ b/src/gallium/drivers/lima/lima_job.h @@ -95,6 +95,9 @@ lima_job_has_draw_pending(struct lima_job *job) } struct lima_job *lima_job_get(struct lima_context *ctx); +struct lima_job * lima_job_get_with_fb(struct lima_context *ctx, + struct pipe_surface *cbuf, + struct pipe_surface *zsbuf); bool lima_job_add_bo(struct lima_job *job, int pipe, struct lima_bo *bo, uint32_t flags); diff --git a/src/gallium/drivers/lima/lima_parser.c b/src/gallium/drivers/lima/lima_parser.c index b0a6c86a9e5..da8379975da 100644 --- a/src/gallium/drivers/lima/lima_parser.c +++ b/src/gallium/drivers/lima/lima_parser.c @@ -32,6 +32,9 @@ #include "lima_parser.h" #include "lima_texture.h" +#include "lima/ir/gp/codegen.h" +#include "lima/ir/pp/codegen.h" + typedef struct { char *info; } render_state_info; @@ -433,6 +436,35 @@ lima_parse_plbu(FILE *fp, uint32_t *data, int size, uint32_t start) fprintf(fp, "\n"); } +void +lima_parse_shader(FILE *fp, uint32_t *data, int size, bool is_frag) +{ + uint32_t *value = &data[0]; + + if (is_frag) { + uint32_t *bin = value; + uint32_t offt = 0; + uint32_t next_instr_length = 0; + + fprintf(fp, "/* ============ FS DISASSEMBLY BEGIN ============== */\n"); + + do { + ppir_codegen_ctrl *ctrl = (ppir_codegen_ctrl *)bin; + fprintf(fp, "@%6d: ", offt); + ppir_disassemble_instr(bin, offt, fp); + bin += ctrl->count; + offt += ctrl->count; + next_instr_length = ctrl->next_count; + } while (next_instr_length); + + fprintf(fp, "/* ============ FS DISASSEMBLY END ================= */\n"); + } else { + fprintf(fp, "/* ============ VS DISASSEMBLY BEGIN ============== */\n"); + gpir_disassemble_program((gpir_codegen_instr *)value, size / sizeof(gpir_codegen_instr), fp); + fprintf(fp, "/* ============ VS DISASSEMBLY END ================= */\n"); + } +} + static void parse_rsw(FILE *fp, uint32_t *value, int i, uint32_t *helper) { @@ -489,7 +521,11 @@ parse_rsw(FILE *fp, uint32_t *value, int i, uint32_t *helper) if (*value & 0x1000) fprintf(fp, ", shader writes stencil"); fprintf(fp, " */\n\t\t\t\t\t\t/* %s(3)", render_state_infos[i].info); - fprintf(fp, ": unknown bits 4-9: 0x%08x", *value & 0x000003f0); + if ((*value & 0x00000010) == 0x00000010) + fprintf(fp, ": ignore depth clip near"); + if ((*value & 0x00000020) == 0x00000020) + fprintf(fp, ", ignore depth clip far"); + fprintf(fp, ", register for gl_FragDepth: $%d", (*value & 0x000003c0) >> 6); fprintf(fp, ", unknown bits 13-15: 0x%08x */\n", *value & 0x00000e000); break; case 4: /* DEPTH RANGE */ @@ -553,12 +589,27 @@ parse_rsw(FILE *fp, uint32_t *value, int i, uint32_t *helper) fprintf(fp, ": unknown"); if ((*value & 0x00000078) == 0x00000068) - fprintf(fp, ", fb_samples */\n"); + fprintf(fp, ", msaa */\n"); else if ((*value & 0x00000078) == 0x00000000) fprintf(fp, " */\n"); else - fprintf(fp, ", UNKNOWN\n"); - fprintf(fp, "\t\t\t\t\t\t/* %s(2)", render_state_infos[i].info); + fprintf(fp, ", UNKNOWN */\n"); + + fprintf(fp, "\t\t\t\t\t\t/* %s(3)", render_state_infos[i].info); + fprintf(fp, ": sample_mask: 0x%.x", ((*value & 0xf000) >> 12)); + if ((*value & (1 << 7))) + fprintf(fp, ", alpha_to_coverage"); + if ((*value & (1 << 8))) + fprintf(fp, ", alpha_to_one"); + fprintf(fp, " */\n"); + + fprintf(fp, "\t\t\t\t\t\t/* %s(4)", render_state_infos[i].info); + fprintf(fp, ", register for gl_FragColor: $%d $%d $%d $%d */\n", + (*value & 0xf0000000) >> 28, + (*value & 0x0f000000) >> 24, + (*value & 0x00f00000) >> 20, + (*value & 0x000f0000) >> 16); + fprintf(fp, "\t\t\t\t\t\t/* %s(3)", render_state_infos[i].info); fprintf(fp, ": alpha_test_func: %d (%s) */\n", (*value & 0x00000007), lima_get_compare_func_string((*value & 0x00000007))); /* alpha_test_func */ @@ -631,12 +682,23 @@ parse_rsw(FILE *fp, uint32_t *value, int i, uint32_t *helper) if ((*value & 0x00002000) == 0x00002000) /* bit 13 unknown */ fprintf(fp, ", bit 13 set"); + + fprintf(fp, " */\n"); + fprintf(fp, "\n\t\t\t\t\t\t/* %s(3):", render_state_infos[i].info); + fprintf(fp, " register for gl_SecondaryFragColor: $%d", + (*value & 0xf0000000) >> 28); fprintf(fp, " */\n"); break; case 14: /* AUX1 */ fprintf(fp, ": "); if ((*value & 0x00002000) == 0x00002000) fprintf(fp, "blend->base.dither true, "); + + if ((*value & 0x00001000) == 0x00001000) + fprintf(fp, "glFrontFace(GL_CCW), "); + else + fprintf(fp, "glFrontFace(GL_CW), "); + if ((*value & 0x00010000) == 0x00010000) fprintf(fp, "ctx->const_buffer[PIPE_SHADER_FRAGMENT].buffer true "); fprintf(fp, "*/\n"); @@ -686,14 +748,16 @@ parse_texture(FILE *fp, uint32_t *data, uint32_t start, uint32_t offset) fprintf(fp, "\t stride: 0x%x (%d)\n", desc->stride, desc->stride); fprintf(fp, "\t unknown_0_2: 0x%x (%d)\n", desc->unknown_0_2, desc->unknown_0_2); - /* Word 1 - 3 */ - fprintf(fp, "/* 0x%08x (0x%08x) */\t0x%08x 0x%08x 0x%08x\n", - start + i * 4, i * 4, *(&data[i + offset]), *(&data[i + 1 + offset]), *(&data[i + 2 + offset])); - i += 3; + /* Word 1 - 5 */ + fprintf(fp, "/* 0x%08x (0x%08x) */\t0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", + start + i * 4, i * 4, *(&data[i + offset]), *(&data[i + 1 + offset]), + *(&data[i + 2 + offset]), *(&data[i + 3 + offset]), *(&data[i + 4 + offset])); + i += 5; fprintf(fp, "\t unknown_1_1: 0x%x (%d)\n", desc->unknown_1_1, desc->unknown_1_1); fprintf(fp, "\t unnorm_coords: 0x%x (%d)\n", desc->unnorm_coords, desc->unnorm_coords); fprintf(fp, "\t unknown_1_2: 0x%x (%d)\n", desc->unknown_1_2, desc->unknown_1_2); - fprintf(fp, "\t texture_type: 0x%x (%d)\n", desc->texture_type, desc->texture_type); + fprintf(fp, "\t cube_map: 0x%x (%d)\n", desc->cube_map, desc->cube_map); + fprintf(fp, "\t sampler_dim: 0x%x (%d)\n", desc->sampler_dim, desc->sampler_dim); fprintf(fp, "\t min_lod: 0x%x (%d) (%f)\n", desc->min_lod, desc->min_lod, lima_fixed8_to_float(desc->min_lod)); fprintf(fp, "\t max_lod: 0x%x (%d) (%f)\n", desc->max_lod, desc->max_lod, lima_fixed8_to_float(desc->max_lod)); fprintf(fp, "\t lod_bias: 0x%x (%d) (%f)\n", desc->lod_bias, desc->lod_bias, lima_fixed8_to_float(desc->lod_bias)); @@ -702,29 +766,20 @@ parse_texture(FILE *fp, uint32_t *data, uint32_t start, uint32_t offset) fprintf(fp, "\t min_mipfilter_2: 0x%x (%d)\n", desc->min_mipfilter_2, desc->min_mipfilter_2); fprintf(fp, "\t min_img_filter_nearest: 0x%x (%d)\n", desc->min_img_filter_nearest, desc->min_img_filter_nearest); fprintf(fp, "\t mag_img_filter_nearest: 0x%x (%d)\n", desc->mag_img_filter_nearest, desc->mag_img_filter_nearest); - fprintf(fp, "\t wrap_s_clamp_to_edge: 0x%x (%d)\n", desc->wrap_s_clamp_to_edge, desc->wrap_s_clamp_to_edge); - fprintf(fp, "\t wrap_s_clamp: 0x%x (%d)\n", desc->wrap_s_clamp, desc->wrap_s_clamp); - fprintf(fp, "\t wrap_s_mirror_repeat: 0x%x (%d)\n", desc->wrap_s_mirror_repeat, desc->wrap_s_mirror_repeat); - fprintf(fp, "\t wrap_t_clamp_to_edge: 0x%x (%d)\n", desc->wrap_t_clamp_to_edge, desc->wrap_t_clamp_to_edge); - fprintf(fp, "\t wrap_t_clamp: 0x%x (%d)\n", desc->wrap_t_clamp, desc->wrap_t_clamp); - fprintf(fp, "\t wrap_t_mirror_repeat: 0x%x (%d)\n", desc->wrap_t_mirror_repeat, desc->wrap_t_mirror_repeat); - fprintf(fp, "\t unknown_2_2: 0x%x (%d)\n", desc->unknown_2_2, desc->unknown_2_2); + fprintf(fp, "\t wrap_s: %d (%s)\n", desc->wrap_s, + lima_get_wrap_mode_string(desc->wrap_s)); + fprintf(fp, "\t wrap_t: %d (%s)\n", desc->wrap_t, + lima_get_wrap_mode_string(desc->wrap_t)); + fprintf(fp, "\t wrap_r: %d (%s)\n", desc->wrap_r, + lima_get_wrap_mode_string(desc->wrap_r)); fprintf(fp, "\t width: 0x%x (%d)\n", desc->width, desc->width); fprintf(fp, "\t height: 0x%x (%d)\n", desc->height, desc->height); - fprintf(fp, "\t unknown_3_1: 0x%x (%d)\n", desc->unknown_3_1, desc->unknown_3_1); - fprintf(fp, "\t unknown_3_2: 0x%x (%d)\n", desc->unknown_3_2, desc->unknown_3_2); - - /* Word 4 */ - fprintf(fp, "/* 0x%08x (0x%08x) */\t0x%08x\n", - start + i * 4, i * 4, *(&data[i + offset])); - i++; - fprintf(fp, "\t unknown_4: 0x%x (%d)\n", desc->unknown_4, desc->unknown_4); - - /* Word 5 */ - fprintf(fp, "/* 0x%08x (0x%08x) */\t0x%08x\n", - start + i * 4, i * 4, *(&data[i + offset])); - i++; - fprintf(fp, "\t unknown_5: 0x%x (%d)\n", desc->unknown_5, desc->unknown_5); + fprintf(fp, "\t depth: 0x%x (%d)\n", desc->depth, desc->depth); + fprintf(fp, "\t border_red: 0x%x (%d)\n", desc->border_red, desc->border_red); + fprintf(fp, "\t border_green: 0x%x (%d)\n", desc->border_green, desc->border_green); + fprintf(fp, "\t border_blue: 0x%x (%d)\n", desc->border_blue, desc->border_blue); + fprintf(fp, "\t border_alpha: 0x%x (%d)\n", desc->border_alpha, desc->border_alpha); + fprintf(fp, "\t unknown_5_1: 0x%x (%d)\n", desc->unknown_5_1, desc->unknown_5_1); /* Word 6 - */ fprintf(fp, "/* 0x%08x (0x%08x) */", diff --git a/src/gallium/drivers/lima/lima_parser.h b/src/gallium/drivers/lima/lima_parser.h index 2378cfc02db..eed7926fb5e 100644 --- a/src/gallium/drivers/lima/lima_parser.h +++ b/src/gallium/drivers/lima/lima_parser.h @@ -61,7 +61,7 @@ static const char *PIPE_BLENDFACTOR_STRING[] = { "CONST_COLOR", /* 2 */ "ZERO", /* 3 */ "UNKNOWN_4", /* 4 */ - "UNKNOWN_5", /* 5 */ + "SRC2_COLOR", /* 5 */ "UNKNOWN_6", /* 6 */ "SRC_ALPHA_SAT", /* 7 */ "INV_SRC_COLOR", /* 8 */ @@ -69,7 +69,7 @@ static const char *PIPE_BLENDFACTOR_STRING[] = { "INV_CONST_COLOR", /* 10 */ "ONE", /* 11 */ "UNKNOWN_12", /* 12 */ - "UNKNOWN_13", /* 13 */ + "INV_SRC2_COLOR", /* 13 */ "UNKNOWN_14", /* 14 */ "UNKNOWN_15", /* 15 */ "SRC_ALPHA", /* 16 */ @@ -77,13 +77,26 @@ static const char *PIPE_BLENDFACTOR_STRING[] = { "CONST_ALPHA", /* 18 */ "UNKNOWN_19", /* 19 */ "UNKNOWN_20", /* 20 */ - "UNKNOWN_21", /* 21 */ + "SRC2_ALPHA", /* 21 */ "UNKNOWN_22", /* 22 */ "UNKNOWN_23", /* 23 */ "INV_SRC_ALPHA", /* 24 */ "INV_DST_ALPHA", /* 25 */ "INV_CONST_ALPHA", /* 26 */ + "UNKNOWN_27", /* 27 */ + "UNKNOWN_28", /* 28 */ + "INV_SRC2_ALPHA", /* 29 */ +}; +static const char *LIMA_WRAP_MODE_STRING[] = { + "TEX_WRAP_REPEAT", /* 0 */ + "TEX_WRAP_CLAMP_TO_EDGE", /* 1 */ + "TEX_WRAP_CLAMP", /* 2 */ + "TEX_WRAP_CLAMP_TO_BORDER", /* 3 */ + "TEX_WRAP_MIRROR_REPEAT", /* 4 */ + "TEX_WRAP_MIRROR_CLAMP_TO_EDGE", /* 5 */ + "TEX_WRAP_MIRROR_CLAMP", /* 6 */ + "TEX_WRAP_MIRROR_CLAMP_TO_BORDER", /* 7 */ }; static inline const char @@ -118,6 +131,15 @@ static inline const char return "UNKNOWN"; } +static inline const char +*lima_get_wrap_mode_string(int mode) { + if ((mode >= 0) && (mode <= 7)) + return LIMA_WRAP_MODE_STRING[mode]; + else + return "UNKNOWN"; +} + +void lima_parse_shader(FILE *fp, uint32_t *data, int size, bool is_frag); void lima_parse_vs(FILE *fp, uint32_t *data, int size, uint32_t start); void lima_parse_plbu(FILE *fp, uint32_t *data, int size, uint32_t start); void lima_parse_render_state(FILE *fp, uint32_t *data, int size, uint32_t start); diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c index a4d9b8a0ef9..0ef3ebbfc98 100644 --- a/src/gallium/drivers/lima/lima_program.c +++ b/src/gallium/drivers/lima/lima_program.c @@ -30,6 +30,7 @@ #include "compiler/nir/nir.h" #include "compiler/nir/nir_serialize.h" #include "nir/tgsi_to_nir.h" +#include "nir_legacy.h" #include "pipe/p_state.h" @@ -56,12 +57,14 @@ static const nir_shader_compiler_options vs_nir_options = { /* could be implemented by clamp */ .lower_fsat = true, .lower_bitops = true, - .lower_rotate = true, .lower_sincos = true, .lower_fceil = true, .lower_insert_byte = true, .lower_insert_word = true, - .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp), + .force_indirect_unrolling = nir_var_all, + .force_indirect_unrolling_sampler = true, + .lower_varying_from_uniform = true, + .max_unroll_iterations = 32, }; static const nir_shader_compiler_options fs_nir_options = { @@ -74,14 +77,16 @@ static const nir_shader_compiler_options fs_nir_options = { .lower_flrp32 = true, .lower_flrp64 = true, .lower_fsign = true, - .lower_rotate = true, .lower_fdot = true, .lower_fdph = true, .lower_insert_byte = true, .lower_insert_word = true, .lower_bitops = true, .lower_vector_cmp = true, - .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp), + .force_indirect_unrolling = (nir_var_shader_out | nir_var_function_temp), + .force_indirect_unrolling_sampler = true, + .lower_varying_from_uniform = true, + .max_unroll_iterations = 32, }; const void * @@ -115,7 +120,7 @@ lima_program_optimize_vs_nir(struct nir_shader *s) NIR_PASS_V(s, nir_lower_load_const_to_scalar); NIR_PASS_V(s, lima_nir_lower_uniform_to_scalar); NIR_PASS_V(s, nir_lower_io_to_scalar, - nir_var_shader_in|nir_var_shader_out); + nir_var_shader_in|nir_var_shader_out, NULL, NULL); do { progress = false; @@ -133,19 +138,21 @@ lima_program_optimize_vs_nir(struct nir_shader *s) NIR_PASS(progress, s, lima_nir_lower_ftrunc); NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_undef); + NIR_PASS(progress, s, nir_lower_undef_to_zero); NIR_PASS(progress, s, nir_opt_loop_unroll); + NIR_PASS(progress, s, nir_lower_undef_to_zero); } while (progress); NIR_PASS_V(s, nir_lower_int_to_float); /* int_to_float pass generates ftrunc, so lower it */ NIR_PASS(progress, s, lima_nir_lower_ftrunc); - NIR_PASS_V(s, nir_lower_bool_to_float); + NIR_PASS_V(s, nir_lower_bool_to_float, true); NIR_PASS_V(s, nir_copy_prop); NIR_PASS_V(s, nir_opt_dce); NIR_PASS_V(s, lima_nir_split_loads); - NIR_PASS_V(s, nir_lower_locals_to_regs); NIR_PASS_V(s, nir_convert_from_ssa, true); + NIR_PASS_V(s, nir_opt_dce); NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL); nir_sweep(s); } @@ -159,6 +166,10 @@ lima_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data) nir_alu_instr *alu = nir_instr_as_alu(instr); switch (alu->op) { case nir_op_frcp: + /* nir_op_idiv is lowered to frcp by lower_int_to_floats which + * will be run later, so lower idiv here + */ + case nir_op_idiv: case nir_op_frsq: case nir_op_flog2: case nir_op_fexp2: @@ -183,7 +194,7 @@ lima_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data) return false; } - int num_components = nir_dest_num_components(alu->dest.dest); + int num_components = alu->def.num_components; uint8_t swizzle = alu->src[0].swizzle[0]; @@ -195,7 +206,7 @@ lima_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data) } static bool -lima_vec_to_movs_filter_cb(const nir_instr *instr, unsigned writemask, +lima_vec_to_regs_filter_cb(const nir_instr *instr, unsigned writemask, const void *data) { assert(writemask > 0); @@ -214,8 +225,8 @@ lima_program_optimize_fs_nir(struct nir_shader *s, NIR_PASS_V(s, nir_lower_fragcoord_wtrans); NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size, 0); - NIR_PASS_V(s, nir_lower_regs_to_ssa); NIR_PASS_V(s, nir_lower_tex, tex_options); + NIR_PASS_V(s, lima_nir_lower_txp); do { progress = false; @@ -241,7 +252,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s, } while (progress); NIR_PASS_V(s, nir_lower_int_to_float); - NIR_PASS_V(s, nir_lower_bool_to_float); + NIR_PASS_V(s, nir_lower_bool_to_float, true); /* Some ops must be lowered after being converted from int ops, * so re-run nir_opt_algebraic after int lowering. */ @@ -253,23 +264,23 @@ lima_program_optimize_fs_nir(struct nir_shader *s, /* Must be run after optimization loop */ NIR_PASS_V(s, lima_nir_scale_trig); - /* Lower modifiers */ - NIR_PASS_V(s, nir_lower_to_source_mods, nir_lower_all_source_mods); NIR_PASS_V(s, nir_copy_prop); NIR_PASS_V(s, nir_opt_dce); - NIR_PASS_V(s, nir_lower_locals_to_regs); NIR_PASS_V(s, nir_convert_from_ssa, true); NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL); - NIR_PASS_V(s, nir_move_vec_src_uses_to_dest); - NIR_PASS_V(s, nir_lower_vec_to_movs, lima_vec_to_movs_filter_cb, NULL); + NIR_PASS_V(s, nir_move_vec_src_uses_to_dest, false); + NIR_PASS_V(s, nir_lower_vec_to_regs, lima_vec_to_regs_filter_cb, NULL); + NIR_PASS_V(s, nir_opt_dce); /* clean up any new dead code from vec to movs */ NIR_PASS_V(s, lima_nir_duplicate_load_uniforms); NIR_PASS_V(s, lima_nir_duplicate_load_inputs); NIR_PASS_V(s, lima_nir_duplicate_load_consts); + NIR_PASS_V(s, nir_legacy_trivialize, true); + nir_sweep(s); } @@ -283,8 +294,8 @@ lima_fs_compile_shader(struct lima_context *ctx, nir_shader *nir = nir_shader_clone(fs, ufs->base.ir.nir); struct nir_lower_tex_options tex_options = { - .lower_txp = ~0u, .swizzle_result = ~0u, + .lower_invalid_implicit_lod = true, }; for (int i = 0; i < ARRAY_SIZE(key->tex); i++) { @@ -297,7 +308,7 @@ lima_fs_compile_shader(struct lima_context *ctx, if (lima_debug & LIMA_DEBUG_PP) nir_print_shader(nir, stdout); - if (!ppir_compile_nir(fs, nir, screen->pp_ra, &ctx->debug)) { + if (!ppir_compile_nir(fs, nir, screen->pp_ra, &ctx->base.debug)) { ralloc_free(nir); return false; } @@ -312,15 +323,25 @@ static bool lima_fs_upload_shader(struct lima_context *ctx, struct lima_fs_compiled_shader *fs) { + static const uint32_t pp_clear_program[] = { + PP_CLEAR_PROGRAM + }; + int shader_size = sizeof(pp_clear_program); + void *shader = (void *)pp_clear_program; struct lima_screen *screen = lima_screen(ctx->base.screen); - fs->bo = lima_bo_create(screen, fs->state.shader_size, 0); + if (fs->state.shader_size) { + shader_size = fs->state.shader_size; + shader = fs->shader; + } + + fs->bo = lima_bo_create(screen, shader_size, 0); if (!fs->bo) { fprintf(stderr, "lima: create fs shader bo fail\n"); return false; } - memcpy(lima_bo_map(fs->bo), fs->shader, fs->state.shader_size); + memcpy(lima_bo_map(fs->bo), shader, shader_size); return true; } @@ -473,7 +494,7 @@ lima_vs_compile_shader(struct lima_context *ctx, if (lima_debug & LIMA_DEBUG_GP) nir_print_shader(nir, stdout); - if (!gpir_compile_nir(vs, nir, &ctx->debug)) { + if (!gpir_compile_nir(vs, nir, &ctx->base.debug)) { ralloc_free(nir); return false; } @@ -591,15 +612,19 @@ lima_update_fs_state(struct lima_context *ctx) memcpy(key->nir_sha1, ctx->uncomp_fs->nir_sha1, sizeof(ctx->uncomp_fs->nir_sha1)); + uint8_t identity[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, + PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }; for (int i = 0; i < lima_tex->num_textures; i++) { struct lima_sampler_view *sampler = lima_sampler_view(lima_tex->textures[i]); + if (!sampler) { + memcpy(key->tex[i].swizzle, identity, 4); + continue; + } for (int j = 0; j < 4; j++) key->tex[i].swizzle[j] = sampler->swizzle[j]; } /* Fill rest with identity swizzle */ - uint8_t identity[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, - PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }; for (int i = lima_tex->num_textures; i < ARRAY_SIZE(key->tex); i++) memcpy(key->tex[i].swizzle, identity, 4); diff --git a/src/gallium/drivers/lima/lima_resource.c b/src/gallium/drivers/lima/lima_resource.c index 946edc86df8..1e263e95e57 100644 --- a/src/gallium/drivers/lima/lima_resource.c +++ b/src/gallium/drivers/lima/lima_resource.c @@ -28,8 +28,10 @@ #include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_debug.h" +#include "util/u_resource.h" #include "util/u_transfer.h" #include "util/u_surface.h" +#include "util/u_transfer_helper.h" #include "util/hash_table.h" #include "util/ralloc.h" #include "util/u_drm.h" @@ -45,6 +47,7 @@ #include "lima_resource.h" #include "lima_bo.h" #include "lima_util.h" +#include "lima_blit.h" #include "pan_minmax_cache.h" #include "pan_tiling.h" @@ -57,7 +60,10 @@ lima_resource_create_scanout(struct pipe_screen *pscreen, struct lima_screen *screen = lima_screen(pscreen); struct renderonly_scanout *scanout; struct winsys_handle handle; - struct pipe_resource *pres; + + struct lima_resource *res = CALLOC_STRUCT(lima_resource); + if (!res) + return NULL; struct pipe_resource scanout_templat = *templat; scanout_templat.width0 = width; @@ -69,32 +75,44 @@ lima_resource_create_scanout(struct pipe_screen *pscreen, if (!scanout) return NULL; + res->base = *templat; + res->base.screen = pscreen; + pipe_reference_init(&res->base.reference, 1); + res->levels[0].offset = handle.offset; + res->levels[0].stride = handle.stride; + assert(handle.type == WINSYS_HANDLE_TYPE_FD); - pres = pscreen->resource_from_handle(pscreen, templat, &handle, - PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE); + res->bo = lima_bo_import(screen, &handle); + if (!res->bo) { + FREE(res); + return NULL; + } + + res->modifier_constant = true; close(handle.handle); - if (!pres) { + if (!res->bo) { renderonly_scanout_destroy(scanout, screen->ro); + FREE(res); return NULL; } - struct lima_resource *res = lima_resource(pres); res->scanout = scanout; - return pres; + return &res->base; } static uint32_t setup_miptree(struct lima_resource *res, unsigned width0, unsigned height0, - bool should_align_dimensions) + bool align_to_tile) { struct pipe_resource *pres = &res->base; unsigned level; unsigned width = width0; unsigned height = height0; unsigned depth = pres->depth0; + unsigned nr_samples = MAX2(pres->nr_samples, 1); uint32_t size = 0; for (level = 0; level <= pres->last_level; level++) { @@ -103,7 +121,7 @@ setup_miptree(struct lima_resource *res, unsigned aligned_width; unsigned aligned_height; - if (should_align_dimensions) { + if (align_to_tile) { aligned_width = align(width, 16); aligned_height = align(height, 16); } else { @@ -116,7 +134,6 @@ setup_miptree(struct lima_resource *res, util_format_get_nblocksy(pres->format, aligned_height) * pres->array_size * depth; - res->levels[level].width = aligned_width; res->levels[level].stride = stride; res->levels[level].offset = size; res->levels[level].layer_stride = util_format_get_stride(pres->format, align(width, 16)) * align(height, 16); @@ -124,19 +141,18 @@ setup_miptree(struct lima_resource *res, if (util_format_is_compressed(pres->format)) res->levels[level].layer_stride /= 4; - /* The start address of each level except the last level - * must be 64-aligned in order to be able to pass the - * addresses to the hardware. */ - if (level != pres->last_level) - size += align(actual_level_size, 64); - else - size += actual_level_size; /* Save some memory */ + size += align(actual_level_size, 64); width = u_minify(width, 1); height = u_minify(height, 1); depth = u_minify(depth, 1); } + if (nr_samples > 1) + res->mrt_pitch = size; + + size *= nr_samples; + return size; } @@ -144,7 +160,7 @@ static struct pipe_resource * lima_resource_create_bo(struct pipe_screen *pscreen, const struct pipe_resource *templat, unsigned width, unsigned height, - bool should_align_dimensions) + bool align_to_tile) { struct lima_screen *screen = lima_screen(pscreen); struct lima_resource *res; @@ -160,7 +176,7 @@ lima_resource_create_bo(struct pipe_screen *pscreen, pres = &res->base; - uint32_t size = setup_miptree(res, width, height, should_align_dimensions); + uint32_t size = setup_miptree(res, width, height, align_to_tile); size = align(size, LIMA_PAGE_SIZE); res->bo = lima_bo_create(screen, size, 0); @@ -181,8 +197,8 @@ _lima_resource_create_with_modifiers(struct pipe_screen *pscreen, struct lima_screen *screen = lima_screen(pscreen); bool should_tile = lima_debug & LIMA_DEBUG_NO_TILING ? false : true; unsigned width, height; - bool should_align_dimensions; bool has_user_modifiers = true; + bool align_to_tile = false; if (count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID) has_user_modifiers = false; @@ -203,24 +219,25 @@ _lima_resource_create_with_modifiers(struct pipe_screen *pscreen, modifiers, count)) should_tile = false; - if (should_tile || (templat->bind & PIPE_BIND_RENDER_TARGET) || - (templat->bind & PIPE_BIND_DEPTH_STENCIL)) { - should_align_dimensions = true; - width = align(templat->width0, 16); - height = align(templat->height0, 16); - } - else { - should_align_dimensions = false; - width = templat->width0; - height = templat->height0; + width = templat->width0; + height = templat->height0; + + /* Don't align index, vertex or constant buffers */ + if (!(templat->bind & (PIPE_BIND_INDEX_BUFFER | + PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_CONSTANT_BUFFER))) { + if (templat->bind & PIPE_BIND_SHARED) { + width = align(width, 16); + height = align(height, 16); + } + align_to_tile = true; } struct pipe_resource *pres; if (screen->ro && (templat->bind & PIPE_BIND_SCANOUT)) pres = lima_resource_create_scanout(pscreen, templat, width, height); else - pres = lima_resource_create_bo(pscreen, templat, width, height, - should_align_dimensions); + pres = lima_resource_create_bo(pscreen, templat, width, height, align_to_tile); if (pres) { struct lima_resource *res = lima_resource(pres); @@ -346,12 +363,11 @@ lima_resource_from_handle(struct pipe_screen *pscreen, /* check alignment for the buffer */ if (res->tiled || (pres->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL))) { - unsigned width, height, stride, size; + unsigned width, stride, size; width = align(pres->width0, 16); - height = align(pres->height0, 16); stride = util_format_get_stride(pres->format, width); - size = util_format_get_2d_size(pres->format, stride, height); + size = util_format_get_2d_size(pres->format, stride, pres->height0); if (res->tiled && res->levels[0].stride != stride) { fprintf(stderr, "tiled imported buffer has mismatching stride: %d (BO) != %d (expected)", @@ -375,11 +391,7 @@ lima_resource_from_handle(struct pipe_screen *pscreen, (res->bo->size - res->levels[0].offset), size); goto err_out; } - - res->levels[0].width = width; } - else - res->levels[0].width = pres->width0; if (screen->ro) { /* Make sure that renderonly has a handle to our buffer in the @@ -435,7 +447,8 @@ lima_resource_get_param(struct pipe_screen *pscreen, enum pipe_resource_param param, unsigned usage, uint64_t *value) { - struct lima_resource *res = lima_resource(pres); + struct lima_resource *res = + (struct lima_resource *)util_resource_at_index(pres, plane); switch (param) { case PIPE_RESOURCE_PARAM_STRIDE: @@ -449,7 +462,9 @@ lima_resource_get_param(struct pipe_screen *pscreen, *value = DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED; else *value = DRM_FORMAT_MOD_LINEAR; - + return true; + case PIPE_RESOURCE_PARAM_NPLANES: + *value = util_resource_num(pres); return true; default: return false; @@ -543,18 +558,6 @@ lima_resource_set_damage_region(struct pipe_screen *pscreen, damage->num_region = nrects; } -void -lima_resource_screen_init(struct lima_screen *screen) -{ - screen->base.resource_create = lima_resource_create; - screen->base.resource_create_with_modifiers = lima_resource_create_with_modifiers; - screen->base.resource_from_handle = lima_resource_from_handle; - screen->base.resource_destroy = lima_resource_destroy; - screen->base.resource_get_handle = lima_resource_get_handle; - screen->base.resource_get_param = lima_resource_get_param; - screen->base.set_damage_region = lima_resource_set_damage_region; -} - static struct pipe_surface * lima_surface_create(struct pipe_context *pctx, struct pipe_resource *pres, @@ -577,6 +580,7 @@ lima_surface_create(struct pipe_context *pctx, psurf->format = surf_tmpl->format; psurf->width = u_minify(pres->width0, level); psurf->height = u_minify(pres->height0, level); + psurf->nr_samples = surf_tmpl->nr_samples; psurf->u.tex.level = level; psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer; psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer; @@ -651,17 +655,16 @@ lima_transfer_map(struct pipe_context *pctx, unsigned op = usage & PIPE_MAP_WRITE ? LIMA_GEM_WAIT_WRITE : LIMA_GEM_WAIT_READ; - lima_bo_wait(bo, op, PIPE_TIMEOUT_INFINITE); + lima_bo_wait(bo, op, OS_TIMEOUT_INFINITE); } if (!lima_bo_map(bo)) return NULL; - trans = slab_alloc(&ctx->transfer_pool); + trans = slab_zalloc(&ctx->transfer_pool); if (!trans) return NULL; - memset(trans, 0, sizeof(*trans)); ptrans = &trans->base; pipe_resource_reference(&ptrans->resource, pres); @@ -678,6 +681,10 @@ lima_transfer_map(struct pipe_context *pctx, trans->staging = malloc(ptrans->stride * ptrans->box.height * ptrans->box.depth); if (usage & PIPE_MAP_READ) { + unsigned line_stride = res->levels[level].stride; + unsigned row_height = util_format_is_compressed(pres->format) ? 4 : 16; + unsigned row_stride = line_stride * row_height; + unsigned i; for (i = 0; i < ptrans->box.depth; i++) panfrost_load_tiled_image( @@ -686,7 +693,7 @@ lima_transfer_map(struct pipe_context *pctx, ptrans->box.x, ptrans->box.y, ptrans->box.width, ptrans->box.height, ptrans->stride, - res->levels[level].stride, + row_stride, pres->format); } @@ -701,7 +708,7 @@ lima_transfer_map(struct pipe_context *pctx, ptrans->layer_stride = res->levels[level].layer_stride; if ((usage & PIPE_MAP_WRITE) && (usage & PIPE_MAP_DIRECTLY)) - panfrost_minmax_cache_invalidate(res->index_cache, ptrans); + panfrost_minmax_cache_invalidate(res->index_cache, ptrans->box.x, ptrans->box.width); return bo->map + res->levels[level].offset + box->z * res->levels[level].layer_stride + @@ -711,14 +718,6 @@ lima_transfer_map(struct pipe_context *pctx, } } -static void -lima_transfer_flush_region(struct pipe_context *pctx, - struct pipe_transfer *ptrans, - const struct pipe_box *box) -{ - -} - static bool lima_should_convert_linear(struct lima_resource *res, struct pipe_transfer *ptrans) @@ -752,9 +751,11 @@ lima_should_convert_linear(struct lima_resource *res, } static void -lima_transfer_unmap_inner(struct lima_context *ctx, - struct pipe_transfer *ptrans) +lima_transfer_flush_region(struct pipe_context *pctx, + struct pipe_transfer *ptrans, + const struct pipe_box *box) { + struct lima_context *ctx = lima_context(pctx); struct lima_resource *res = lima_resource(ptrans->resource); struct lima_transfer *trans = lima_transfer(ptrans); struct lima_bo *bo = res->bo; @@ -784,13 +785,17 @@ lima_transfer_unmap_inner(struct lima_context *ctx, /* Update texture descriptor */ ctx->dirty |= LIMA_CONTEXT_DIRTY_TEXTURES; } else { + unsigned line_stride = res->levels[ptrans->level].stride; + unsigned row_height = util_format_is_compressed(pres->format) ? 4 : 16; + unsigned row_stride = line_stride * row_height; + for (i = 0; i < trans->base.box.depth; i++) panfrost_store_tiled_image( bo->map + res->levels[trans->base.level].offset + (i + trans->base.box.z) * res->levels[trans->base.level].layer_stride, trans->staging + i * ptrans->stride * ptrans->box.height, ptrans->box.x, ptrans->box.y, ptrans->box.width, ptrans->box.height, - res->levels[ptrans->level].stride, + row_stride, ptrans->stride, pres->format); } @@ -806,10 +811,14 @@ lima_transfer_unmap(struct pipe_context *pctx, struct lima_transfer *trans = lima_transfer(ptrans); struct lima_resource *res = lima_resource(ptrans->resource); - lima_transfer_unmap_inner(ctx, ptrans); + struct pipe_box box; + u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box); + lima_transfer_flush_region(pctx, ptrans, &box); if (trans->staging) free(trans->staging); - panfrost_minmax_cache_invalidate(res->index_cache, ptrans); + if (ptrans->usage & PIPE_MAP_WRITE) { + panfrost_minmax_cache_invalidate(res->index_cache, ptrans->box.x, ptrans->box.width); + } pipe_resource_reference(&ptrans->resource, NULL); slab_free(&ctx->transfer_pool, trans); @@ -829,8 +838,8 @@ lima_util_blitter_save_states(struct lima_context *ctx) util_blitter_save_scissor(ctx->blitter, &ctx->scissor); util_blitter_save_vertex_elements(ctx->blitter, ctx->vertex_elements); - util_blitter_save_vertex_buffer_slot(ctx->blitter, - ctx->vertex_buffers.vb); + util_blitter_save_vertex_buffers(ctx->blitter, + ctx->vertex_buffers.vb, ctx->vertex_buffers.count); util_blitter_save_framebuffer(ctx->blitter, &ctx->framebuffer.base); @@ -848,7 +857,11 @@ lima_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) struct lima_context *ctx = lima_context(pctx); struct pipe_blit_info info = *blit_info; - if (util_try_blit_via_copy_region(pctx, &info)) { + if (lima_do_blit(pctx, blit_info)) { + return; + } + + if (util_try_blit_via_copy_region(pctx, &info, false)) { return; /* done */ } @@ -883,7 +896,7 @@ lima_texture_subdata(struct pipe_context *pctx, const struct pipe_box *box, const void *data, unsigned stride, - unsigned layer_stride) + uintptr_t layer_stride) { struct lima_context *ctx = lima_context(pctx); struct lima_resource *res = lima_resource(prsc); @@ -909,11 +922,41 @@ lima_texture_subdata(struct pipe_context *pctx, }; lima_flush_job_accessing_bo(ctx, res->bo, true); - lima_bo_wait(res->bo, LIMA_GEM_WAIT_WRITE, PIPE_TIMEOUT_INFINITE); + lima_bo_wait(res->bo, LIMA_GEM_WAIT_WRITE, OS_TIMEOUT_INFINITE); if (!lima_bo_map(res->bo)) return; - lima_transfer_unmap_inner(ctx, &t.base); + struct pipe_box tbox; + u_box_2d(0, 0, t.base.box.width, t.base.box.height, &tbox); + lima_transfer_flush_region(pctx, &t.base, &tbox); +} + +static const struct u_transfer_vtbl transfer_vtbl = { + .resource_create = lima_resource_create, + .resource_destroy = lima_resource_destroy, + .transfer_map = lima_transfer_map, + .transfer_unmap = lima_transfer_unmap, + .transfer_flush_region = lima_transfer_flush_region, +}; + +void +lima_resource_screen_init(struct lima_screen *screen) +{ + screen->base.resource_create = lima_resource_create; + screen->base.resource_create_with_modifiers = lima_resource_create_with_modifiers; + screen->base.resource_from_handle = lima_resource_from_handle; + screen->base.resource_destroy = lima_resource_destroy; + screen->base.resource_get_handle = lima_resource_get_handle; + screen->base.resource_get_param = lima_resource_get_param; + screen->base.set_damage_region = lima_resource_set_damage_region; + screen->base.transfer_helper = u_transfer_helper_create(&transfer_vtbl, + U_TRANSFER_HELPER_MSAA_MAP); +} + +void +lima_resource_screen_destroy(struct lima_screen *screen) +{ + u_transfer_helper_destroy(screen->base.transfer_helper); } void @@ -932,11 +975,11 @@ lima_resource_context_init(struct lima_context *ctx) ctx->base.blit = lima_blit; - ctx->base.buffer_map = lima_transfer_map; - ctx->base.texture_map = lima_transfer_map; - ctx->base.transfer_flush_region = lima_transfer_flush_region; - ctx->base.buffer_unmap = lima_transfer_unmap; - ctx->base.texture_unmap = lima_transfer_unmap; + ctx->base.buffer_map = u_transfer_helper_transfer_map; + ctx->base.texture_map = u_transfer_helper_transfer_map; + ctx->base.transfer_flush_region = u_transfer_helper_transfer_flush_region; + ctx->base.buffer_unmap = u_transfer_helper_transfer_unmap; + ctx->base.texture_unmap = u_transfer_helper_transfer_unmap; ctx->base.flush_resource = lima_flush_resource; } diff --git a/src/gallium/drivers/lima/lima_resource.h b/src/gallium/drivers/lima/lima_resource.h index 91443e540b5..300a606f55c 100644 --- a/src/gallium/drivers/lima/lima_resource.h +++ b/src/gallium/drivers/lima/lima_resource.h @@ -35,7 +35,6 @@ struct lima_screen; struct panfrost_minmax_cache; struct lima_resource_level { - uint32_t width; uint32_t stride; uint32_t offset; uint32_t layer_stride; @@ -55,6 +54,7 @@ struct lima_resource { struct renderonly_scanout *scanout; struct lima_bo *bo; struct panfrost_minmax_cache *index_cache; + uint32_t mrt_pitch; bool tiled; bool modifier_constant; unsigned full_updates; @@ -95,6 +95,9 @@ void lima_resource_screen_init(struct lima_screen *screen); void +lima_resource_screen_destroy(struct lima_screen *screen); + +void lima_resource_context_init(struct lima_context *ctx); #endif diff --git a/src/gallium/drivers/lima/lima_screen.c b/src/gallium/drivers/lima/lima_screen.c index 9c52f30de7d..2e779693d0e 100644 --- a/src/gallium/drivers/lima/lima_screen.c +++ b/src/gallium/drivers/lima/lima_screen.c @@ -63,6 +63,7 @@ lima_screen_destroy(struct pipe_screen *pscreen) lima_bo_cache_fini(screen); lima_bo_table_fini(screen); disk_cache_destroy(screen->disk_cache); + lima_resource_screen_destroy(screen); ralloc_free(screen); } @@ -84,7 +85,7 @@ lima_screen_get_name(struct pipe_screen *pscreen) static const char * lima_screen_get_vendor(struct pipe_screen *pscreen) { - return "lima"; + return "Mesa"; } static const char * @@ -101,27 +102,25 @@ lima_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_ACCELERATED: case PIPE_CAP_UMA: + case PIPE_CAP_CLIP_HALFZ: case PIPE_CAP_NATIVE_FENCE_FD: case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD: case PIPE_CAP_TEXTURE_SWIZZLE: case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: - return 1; - - /* Unimplemented, but for exporting OpenGL 2.0 */ - case PIPE_CAP_OCCLUSION_QUERY: - case PIPE_CAP_POINT_SPRITE: + case PIPE_CAP_TEXTURE_BARRIER: + case PIPE_CAP_SURFACE_SAMPLE_COUNT: return 1; /* not clear supported */ - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + case PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER: + case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER: return 1; - case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: - case PIPE_CAP_TGSI_FS_POINT_IS_SYSVAL: - case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: + case PIPE_CAP_FS_POSITION_IS_SYSVAL: + case PIPE_CAP_FS_POINT_IS_SYSVAL: + case PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL: return 1; case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: @@ -145,7 +144,7 @@ lima_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PCI_FUNCTION: return 0; - case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + case PIPE_CAP_TEXTURE_TRANSFER_MODES: case PIPE_CAP_SHAREABLE_SHADERS: return 0; @@ -160,6 +159,16 @@ lima_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES: return 1; + /* Mali4x0 PP doesn't have a swizzle for load_input, so use POT-aligned + * varyings to avoid unnecessary movs for vec3 and precision downgrade + * in case if this vec3 is coordinates for a sampler + */ + case PIPE_CAP_PREFER_POT_ALIGNED_VARYINGS: + return 1; + + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + return 1; + default: return u_pipe_screen_get_param_defaults(pscreen, param); } @@ -169,10 +178,18 @@ static float lima_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) { switch (param) { + case PIPE_CAPF_MIN_LINE_WIDTH: + case PIPE_CAPF_MIN_LINE_WIDTH_AA: + case PIPE_CAPF_MIN_POINT_SIZE: + case PIPE_CAPF_MIN_POINT_SIZE_AA: + return 1; + case PIPE_CAPF_POINT_SIZE_GRANULARITY: + case PIPE_CAPF_LINE_WIDTH_GRANULARITY: + return 0.1; case PIPE_CAPF_MAX_LINE_WIDTH: case PIPE_CAPF_MAX_LINE_WIDTH_AA: - case PIPE_CAPF_MAX_POINT_WIDTH: - case PIPE_CAPF_MAX_POINT_WIDTH_AA: + case PIPE_CAPF_MAX_POINT_SIZE: + case PIPE_CAPF_MAX_POINT_SIZE_AA: return 100.0f; case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: return 16.0f; @@ -206,21 +223,15 @@ get_vertex_shader_param(struct lima_screen *screen, /* Mali-400 GP provides space for 304 vec4 uniforms, globals and * temporary variables. */ - case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: + case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE: return 304 * 4 * sizeof(float); case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: return 1; - case PIPE_SHADER_CAP_PREFERRED_IR: - return PIPE_SHADER_IR_NIR; - case PIPE_SHADER_CAP_MAX_TEMPS: return 256; /* need investigate */ - case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: - return 32; - default: return 0; } @@ -247,7 +258,7 @@ get_fragment_shader_param(struct lima_screen *screen, * However, indirect access to an uniform only supports indices up * to 8192 (a 2048 vec4 array). To prevent indices bigger than that, * limit max const buffer size to 8192 for now. */ - case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: + case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE: return 2048 * 4 * sizeof(float); case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: @@ -257,9 +268,6 @@ get_fragment_shader_param(struct lima_screen *screen, case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: return 16; /* need investigate */ - case PIPE_SHADER_CAP_PREFERRED_IR: - return PIPE_SHADER_IR_NIR; - case PIPE_SHADER_CAP_MAX_TEMPS: return 256; /* need investigate */ @@ -271,9 +279,6 @@ get_fragment_shader_param(struct lima_screen *screen, case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: return 0; - case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: - return 32; - default: return 0; } @@ -309,6 +314,7 @@ lima_screen_is_format_supported(struct pipe_screen *pscreen, case PIPE_BUFFER: case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_3D: case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_CUBE: break; @@ -319,7 +325,7 @@ lima_screen_is_format_supported(struct pipe_screen *pscreen, if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) return false; - /* be able to support 16, now limit to 4 */ + /* Utgard supports 16x, but for now limit it to 4x */ if (sample_count > 1 && sample_count != 4) return false; @@ -583,7 +589,9 @@ static const struct debug_named_value lima_debug_options[] = { "Precompile shaders for shader-db" }, { "diskcache", LIMA_DEBUG_DISK_CACHE, "print debug info for shader disk cache" }, - { NULL } + { "noblit", LIMA_DEBUG_NO_BLIT, + "use generic u_blitter instead of lima-specific" }, + DEBUG_NAMED_VALUE_END }; DEBUG_GET_ONCE_FLAGS_OPTION(lima_debug, "LIMA_DEBUG", lima_debug_options, 0) @@ -633,8 +641,16 @@ lima_get_disk_shader_cache (struct pipe_screen *pscreen) return screen->disk_cache; } +static int +lima_screen_get_fd(struct pipe_screen *pscreen) +{ + struct lima_screen *screen = lima_screen(pscreen); + return screen->fd; +} + struct pipe_screen * -lima_screen_create(int fd, struct renderonly *ro) +lima_screen_create(int fd, const struct pipe_screen_config *config, + struct renderonly *ro) { uint64_t system_memory; struct lima_screen *screen; @@ -676,17 +692,15 @@ lima_screen_create(int fd, struct renderonly *ro) screen->pp_buffer->cacheable = false; /* fs program for clear buffer? - * const0 1 0 0 -1.67773, mov.v0 $0 ^const0.xxxx, stop */ static const uint32_t pp_clear_program[] = { - 0x00020425, 0x0000000c, 0x01e007cf, 0xb0000000, - 0x000005f5, 0x00000000, 0x00000000, 0x00000000, + PP_CLEAR_PROGRAM }; memcpy(lima_bo_map(screen->pp_buffer) + pp_clear_program_offset, pp_clear_program, sizeof(pp_clear_program)); /* copy texture to framebuffer, used to reload gpu tile buffer - * load.v $1 0.xy, texld_2d 0, mov.v0 $0 ^tex_sampler, sync, stop + * load.v $1 0.xy, texld 0, mov.v0 $0 ^tex_sampler, sync, stop */ static const uint32_t pp_reload_program[] = { 0x000005e6, 0xf1003c20, 0x00000000, 0x39001000, @@ -717,6 +731,7 @@ lima_screen_create(int fd, struct renderonly *ro) pp_frame_rsw[13] = 0x00000100; screen->base.destroy = lima_screen_destroy; + screen->base.get_screen_fd = lima_screen_get_fd; screen->base.get_name = lima_screen_get_name; screen->base.get_vendor = lima_screen_get_vendor; screen->base.get_device_vendor = lima_screen_get_device_vendor; @@ -736,8 +751,6 @@ lima_screen_create(int fd, struct renderonly *ro) slab_create_parent(&screen->transfer_pool, sizeof(struct lima_transfer), 16); - screen->refcnt = 1; - return &screen->base; err_out2: diff --git a/src/gallium/drivers/lima/lima_screen.h b/src/gallium/drivers/lima/lima_screen.h index bc08a490236..5f76edd551a 100644 --- a/src/gallium/drivers/lima/lima_screen.h +++ b/src/gallium/drivers/lima/lima_screen.h @@ -30,7 +30,7 @@ #include "util/slab.h" #include "util/list.h" #include "util/disk_cache.h" -#include "os/os_thread.h" +#include "util/u_thread.h" #include "pipe/p_screen.h" @@ -45,6 +45,7 @@ #define LIMA_DEBUG_SINGLE_JOB (1 << 8) #define LIMA_DEBUG_PRECOMPILE (1 << 9) #define LIMA_DEBUG_DISK_CACHE (1 << 10) +#define LIMA_DEBUG_NO_BLIT (1 << 11) extern uint32_t lima_debug; extern int lima_ctx_num_plb; @@ -59,13 +60,15 @@ struct ra_regs; #define NR_BO_CACHE_BUCKETS (MAX_BO_CACHE_BUCKET - MIN_BO_CACHE_BUCKET + 1) +/* const0 1 0 0 -1.67773, mov.v0 $0 ^const0.xxxx, stop */ +#define PP_CLEAR_PROGRAM \ + 0x00020425, 0x0000000c, 0x01e007cf, 0xb0000000, \ + 0x000005f5, 0x00000000, 0x00000000, 0x00000000, \ + struct lima_screen { struct pipe_screen base; struct renderonly *ro; - int refcnt; - void *winsys_priv; - int fd; int gpu_type; int num_pp; @@ -103,6 +106,7 @@ lima_screen(struct pipe_screen *pscreen) } struct pipe_screen * -lima_screen_create(int fd, struct renderonly *ro); +lima_screen_create(int fd, const struct pipe_screen_config *config, + struct renderonly *ro); #endif diff --git a/src/gallium/drivers/lima/lima_state.c b/src/gallium/drivers/lima/lima_state.c index eafe772554e..198c2982534 100644 --- a/src/gallium/drivers/lima/lima_state.c +++ b/src/gallium/drivers/lima/lima_state.c @@ -29,6 +29,7 @@ #include "util/u_helpers.h" #include "util/u_debug.h" #include "util/u_framebuffer.h" +#include "util/u_viewport.h" #include "pipe/p_state.h" @@ -185,18 +186,14 @@ lima_delete_vertex_elements_state(struct pipe_context *pctx, void *hwcso) static void lima_set_vertex_buffers(struct pipe_context *pctx, - unsigned start_slot, unsigned count, - unsigned unbind_num_trailing_slots, - bool take_ownership, + unsigned count, const struct pipe_vertex_buffer *vb) { struct lima_context *ctx = lima_context(pctx); struct lima_context_vertex_buffer *so = &ctx->vertex_buffers; util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, - vb, start_slot, count, - unbind_num_trailing_slots, - take_ownership); + vb, count, true); so->count = util_last_bit(so->enabled_mask); ctx->dirty |= LIMA_CONTEXT_DIRTY_VERTEX_BUFF; @@ -211,18 +208,22 @@ lima_set_viewport_states(struct pipe_context *pctx, struct lima_context *ctx = lima_context(pctx); /* reverse calculate the parameter of glViewport */ - ctx->viewport.left = viewport->translate[0] - fabsf(viewport->scale[0]); - ctx->viewport.right = viewport->translate[0] + fabsf(viewport->scale[0]); - ctx->viewport.bottom = viewport->translate[1] - fabsf(viewport->scale[1]); - ctx->viewport.top = viewport->translate[1] + fabsf(viewport->scale[1]); + ctx->viewport.left = ctx->ext_viewport.left = + viewport->translate[0] - fabsf(viewport->scale[0]); + ctx->viewport.right = ctx->ext_viewport.right = + viewport->translate[0] + fabsf(viewport->scale[0]); + ctx->viewport.bottom = ctx->ext_viewport.bottom = + viewport->translate[1] - fabsf(viewport->scale[1]); + ctx->viewport.top = ctx->ext_viewport.top = + viewport->translate[1] + fabsf(viewport->scale[1]); /* reverse calculate the parameter of glDepthRange */ float near, far; - near = viewport->translate[2] - viewport->scale[2]; - far = viewport->translate[2] + viewport->scale[2]; + bool halfz = ctx->rasterizer && ctx->rasterizer->base.clip_halfz; + util_viewport_zmin_zmax(viewport, halfz, &near, &far); - ctx->viewport.near = MIN2(near, far); - ctx->viewport.far = MAX2(near, far); + ctx->viewport.near = ctx->rasterizer && ctx->rasterizer->base.depth_clip_near ? near : 0.0f; + ctx->viewport.far = ctx->rasterizer && ctx->rasterizer->base.depth_clip_far ? far : 1.0f; ctx->viewport.transform = *viewport; ctx->dirty |= LIMA_CONTEXT_DIRTY_VIEWPORT; @@ -415,6 +416,9 @@ static void lima_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask) { + struct lima_context *ctx = lima_context(pctx); + ctx->sample_mask = sample_mask & ((1 << LIMA_MAX_SAMPLES) - 1); + ctx->dirty |= LIMA_CONTEXT_DIRTY_SAMPLE_MASK; } void @@ -464,8 +468,5 @@ lima_state_fini(struct lima_context *ctx) struct lima_context_vertex_buffer *so = &ctx->vertex_buffers; util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, NULL, - 0, 0, ARRAY_SIZE(so->vb), false); - - pipe_surface_reference(&ctx->framebuffer.base.cbufs[0], NULL); - pipe_surface_reference(&ctx->framebuffer.base.zsbuf, NULL); + 0, false); } diff --git a/src/gallium/drivers/lima/lima_texture.c b/src/gallium/drivers/lima/lima_texture.c index 7079865a3b5..90413eb22ec 100644 --- a/src/gallium/drivers/lima/lima_texture.c +++ b/src/gallium/drivers/lima/lima_texture.c @@ -23,6 +23,7 @@ * */ +#include "util/compiler.h" #include "util/u_memory.h" #include "util/u_upload_mgr.h" #include "util/u_math.h" @@ -70,23 +71,26 @@ lima_texture_desc_set_va(lima_tex_desc *desc, void lima_texture_desc_set_res(struct lima_context *ctx, lima_tex_desc *desc, struct pipe_resource *prsc, - unsigned first_level, unsigned last_level, unsigned first_layer) + unsigned first_level, unsigned last_level, + unsigned first_layer, unsigned mrt_idx) { - unsigned width, height, layout, i; + unsigned width, height, depth, layout, i; struct lima_resource *lima_res = lima_resource(prsc); width = prsc->width0; height = prsc->height0; + depth = prsc->depth0; if (first_level != 0) { width = u_minify(width, first_level); height = u_minify(height, first_level); + depth = u_minify(depth, first_level); } desc->format = lima_format_get_texel(prsc->format); desc->swap_r_b = lima_format_get_texel_swap_rb(prsc->format); desc->width = width; desc->height = height; - desc->unknown_3_1 = 1; + desc->depth = depth; if (lima_res->tiled) layout = 3; @@ -99,7 +103,9 @@ lima_texture_desc_set_res(struct lima_context *ctx, lima_tex_desc *desc, uint32_t base_va = lima_res->bo->va; /* attach first level */ - uint32_t first_va = base_va + lima_res->levels[first_level].offset + first_layer * lima_res->levels[first_level].layer_stride; + uint32_t first_va = base_va + lima_res->levels[first_level].offset + + first_layer * lima_res->levels[first_level].layer_stride + + mrt_idx * lima_res->mrt_pitch; desc->va_s.va_0 = first_va >> 6; desc->va_s.layout = layout; @@ -112,6 +118,37 @@ lima_texture_desc_set_res(struct lima_context *ctx, lima_tex_desc *desc, } } +static unsigned +pipe_wrap_to_lima(unsigned pipe_wrap, bool using_nearest) +{ + switch (pipe_wrap) { + case PIPE_TEX_WRAP_REPEAT: + return LIMA_TEX_WRAP_REPEAT; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return LIMA_TEX_WRAP_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_CLAMP: + if (using_nearest) + return LIMA_TEX_WRAP_CLAMP_TO_EDGE; + else + return LIMA_TEX_WRAP_CLAMP; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return LIMA_TEX_WRAP_CLAMP_TO_BORDER; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return LIMA_TEX_WRAP_MIRROR_REPEAT; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return LIMA_TEX_WRAP_MIRROR_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + if (using_nearest) + return LIMA_TEX_WRAP_MIRROR_CLAMP_TO_EDGE; + else + return LIMA_TEX_WRAP_MIRROR_CLAMP; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return LIMA_TEX_WRAP_MIRROR_CLAMP_TO_BORDER; + default: + return LIMA_TEX_WRAP_REPEAT; + } +} + static void lima_update_tex_desc(struct lima_context *ctx, struct lima_sampler_state *sampler, struct lima_sampler_view *texture, void *pdesc, @@ -127,19 +164,28 @@ lima_update_tex_desc(struct lima_context *ctx, struct lima_sampler_state *sample memset(desc, 0, desc_size); + if (!texture) + return; + switch (texture->base.target) { + case PIPE_TEXTURE_1D: + desc->sampler_dim = LIMA_SAMPLER_DIM_1D; + break; case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: - desc->texture_type = LIMA_TEXTURE_TYPE_2D; + desc->sampler_dim = LIMA_SAMPLER_DIM_2D; break; case PIPE_TEXTURE_CUBE: - desc->texture_type = LIMA_TEXTURE_TYPE_CUBE; + desc->cube_map = 1; + FALLTHROUGH; + case PIPE_TEXTURE_3D: + desc->sampler_dim = LIMA_SAMPLER_DIM_3D; break; default: break; } - if (!sampler->base.normalized_coords) + if (sampler->base.unnormalized_coords) desc->unnorm_coords = 1; first_level = texture->base.u.tex.first_level; @@ -190,39 +236,19 @@ lima_update_tex_desc(struct lima_context *ctx, struct lima_sampler_state *sample break; } - /* Only clamp, clamp to edge, repeat and mirror repeat are supported */ - switch (sampler->base.wrap_s) { - case PIPE_TEX_WRAP_CLAMP: - desc->wrap_s_clamp = 1; - break; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - desc->wrap_s_clamp_to_edge = 1; - break; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - desc->wrap_s_mirror_repeat = 1; - break; - case PIPE_TEX_WRAP_REPEAT: - default: - break; - } + /* Panfrost mentions that GL_CLAMP is broken for NEAREST filter on Midgard, + * looks like it also broken on Utgard, since it fails in piglit + */ + bool using_nearest = sampler->base.min_img_filter == PIPE_TEX_FILTER_NEAREST; - /* Only clamp, clamp to edge, repeat and mirror repeat are supported */ - switch (sampler->base.wrap_t) { - case PIPE_TEX_WRAP_CLAMP: - desc->wrap_t_clamp = 1; - break; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - desc->wrap_t_clamp_to_edge = 1; - break; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - desc->wrap_t_mirror_repeat = 1; - break; - case PIPE_TEX_WRAP_REPEAT: - default: - break; - } + desc->wrap_s = pipe_wrap_to_lima(sampler->base.wrap_s, using_nearest); + desc->wrap_t = pipe_wrap_to_lima(sampler->base.wrap_t, using_nearest); + desc->wrap_r = pipe_wrap_to_lima(sampler->base.wrap_r, using_nearest); + + desc->border_red = float_to_ushort(sampler->base.border_color.f[0]); + desc->border_green = float_to_ushort(sampler->base.border_color.f[1]); + desc->border_blue = float_to_ushort(sampler->base.border_color.f[2]); + desc->border_alpha = float_to_ushort(sampler->base.border_color.f[3]); if (desc->min_img_filter_nearest && desc->mag_img_filter_nearest && desc->min_mipfilter_2 == 0 && @@ -232,7 +258,7 @@ lima_update_tex_desc(struct lima_context *ctx, struct lima_sampler_state *sample desc->lod_bias += lod_bias_delta; lima_texture_desc_set_res(ctx, desc, texture->base.texture, - first_level, last_level, first_layer); + first_level, last_level, first_layer, 0); } static unsigned @@ -240,6 +266,10 @@ lima_calc_tex_desc_size(struct lima_sampler_view *texture) { unsigned size = offsetof(lima_tex_desc, va); unsigned va_bit_size; + + if (!texture) + return lima_min_tex_desc_size; + unsigned first_level = texture->base.u.tex.first_level; unsigned last_level = texture->base.u.tex.last_level; @@ -268,6 +298,8 @@ lima_update_textures(struct lima_context *ctx) /* we always need to add texture bo to job */ for (int i = 0; i < lima_tex->num_samplers; i++) { struct lima_sampler_view *texture = lima_sampler_view(lima_tex->textures[i]); + if (!texture) + continue; struct lima_resource *rsc = lima_resource(texture->base.texture); lima_flush_previous_job_writing_resource(ctx, texture->base.texture); lima_job_add_bo(job, LIMA_PIPE_PP, rsc->bo, LIMA_SUBMIT_BO_READ); diff --git a/src/gallium/drivers/lima/lima_texture.h b/src/gallium/drivers/lima/lima_texture.h index 08a961ba4ae..18cc9c7050a 100644 --- a/src/gallium/drivers/lima/lima_texture.h +++ b/src/gallium/drivers/lima/lima_texture.h @@ -27,8 +27,18 @@ #define lima_min_tex_desc_size 64 -#define LIMA_TEXTURE_TYPE_2D 2 -#define LIMA_TEXTURE_TYPE_CUBE 5 +#define LIMA_SAMPLER_DIM_1D 0 +#define LIMA_SAMPLER_DIM_2D 1 +#define LIMA_SAMPLER_DIM_3D 2 + +#define LIMA_TEX_WRAP_REPEAT 0 +#define LIMA_TEX_WRAP_CLAMP_TO_EDGE 1 +#define LIMA_TEX_WRAP_CLAMP 2 +#define LIMA_TEX_WRAP_CLAMP_TO_BORDER 3 +#define LIMA_TEX_WRAP_MIRROR_REPEAT 4 +#define LIMA_TEX_WRAP_MIRROR_CLAMP_TO_EDGE 5 +#define LIMA_TEX_WRAP_MIRROR_CLAMP 6 +#define LIMA_TEX_WRAP_MIRROR_CLAMP_TO_BORDER 7 typedef struct __attribute__((__packed__)) { /* Word 0 */ @@ -43,7 +53,8 @@ typedef struct __attribute__((__packed__)) { uint32_t unknown_1_1: 7; uint32_t unnorm_coords: 1; uint32_t unknown_1_2: 1; - uint32_t texture_type: 3; + uint32_t cube_map: 1; + uint32_t sampler_dim: 2; uint32_t min_lod: 8; /* Fixed point, 4.4, unsigned */ uint32_t max_lod: 8; /* Fixed point, 4.4, unsigned */ uint32_t lod_bias: 9; /* Fixed point, signed, 1.4.4 */ @@ -52,23 +63,20 @@ typedef struct __attribute__((__packed__)) { uint32_t min_mipfilter_2: 2; /* 0x3 for linear, 0x0 for nearest */ uint32_t min_img_filter_nearest: 1; uint32_t mag_img_filter_nearest: 1; - uint32_t wrap_s_clamp_to_edge: 1; - uint32_t wrap_s_clamp: 1; - uint32_t wrap_s_mirror_repeat: 1; - uint32_t wrap_t_clamp_to_edge: 1; - uint32_t wrap_t_clamp: 1; - uint32_t wrap_t_mirror_repeat: 1; - uint32_t unknown_2_2: 3; + uint32_t wrap_s: 3; + uint32_t wrap_t: 3; + uint32_t wrap_r: 3; uint32_t width: 13; uint32_t height: 13; - uint32_t unknown_3_1: 1; - uint32_t unknown_3_2: 15; + uint32_t depth: 13; - /* Word 4 */ - uint32_t unknown_4; + uint32_t border_red: 16; + uint32_t border_green: 16; + uint32_t border_blue: 16; + uint32_t border_alpha: 16; - /* Word 5 */ - uint32_t unknown_5; + /* Word 5 (last 3 bits) */ + uint32_t unknown_5_1: 3; /* Word 6-15 */ /* layout is in va[0] bit 13-14 */ @@ -93,7 +101,7 @@ typedef struct __attribute__((__packed__)) { void lima_texture_desc_set_res(struct lima_context *ctx, lima_tex_desc *desc, struct pipe_resource *prsc, unsigned first_level, unsigned last_level, - unsigned first_layer); + unsigned first_layer, unsigned mrt_idx); void lima_update_textures(struct lima_context *ctx); diff --git a/src/gallium/drivers/lima/lima_util.c b/src/gallium/drivers/lima/lima_util.c index dca9307c991..1587ac18f5c 100644 --- a/src/gallium/drivers/lima/lima_util.c +++ b/src/gallium/drivers/lima/lima_util.c @@ -29,6 +29,8 @@ #include "util/u_debug.h" #include "util/u_memory.h" +#include "util/box.h" +#include "pipe/p_state.h" #include "lima_util.h" #include "lima_parser.h" @@ -39,24 +41,6 @@ struct lima_dump { int id; }; -bool lima_get_absolute_timeout(uint64_t *timeout) -{ - struct timespec current; - uint64_t current_ns; - - if (*timeout == PIPE_TIMEOUT_INFINITE) - return true; - - if (clock_gettime(CLOCK_MONOTONIC, ¤t)) - return false; - - current_ns = ((uint64_t)current.tv_sec) * 1000000000ull; - current_ns += current.tv_nsec; - *timeout += current_ns; - - return true; -} - static void lima_dump_blob(FILE *fp, void *data, int size, bool is_float) { @@ -79,6 +63,13 @@ lima_dump_blob(FILE *fp, void *data, int size, bool is_float) } void +lima_dump_shader(struct lima_dump *dump, void *data, int size, bool is_frag) +{ + if (dump) + lima_parse_shader(dump->fp, (uint32_t *)data, size, is_frag); +} + +void lima_dump_vs_command_stream_print(struct lima_dump *dump, void *data, int size, uint32_t start) { @@ -175,3 +166,14 @@ _lima_dump_command_stream_print(struct lima_dump *dump, void *data, lima_dump_blob(dump->fp, data, size, is_float); } + +void +lima_damage_rect_union(struct pipe_scissor_state *rect, + unsigned minx, unsigned maxx, + unsigned miny, unsigned maxy) +{ + rect->minx = MIN2(rect->minx, minx); + rect->miny = MIN2(rect->miny, miny); + rect->maxx = MAX2(rect->maxx, maxx); + rect->maxy = MAX2(rect->maxy, maxy); +} diff --git a/src/gallium/drivers/lima/lima_util.h b/src/gallium/drivers/lima/lima_util.h index 3749523f3a1..56b441ecaa9 100644 --- a/src/gallium/drivers/lima/lima_util.h +++ b/src/gallium/drivers/lima/lima_util.h @@ -31,12 +31,11 @@ struct lima_dump; -bool lima_get_absolute_timeout(uint64_t *timeout); - struct lima_dump *lima_dump_create(void); struct lima_dump *lima_dump_next(struct lima_dump *dump); void lima_dump_free(struct lima_dump *dump); +void lima_dump_shader(struct lima_dump *dump, void *data, int size, bool is_frag); void lima_dump_vs_command_stream_print(struct lima_dump *dump, void *data, int size, uint32_t start); void lima_dump_plbu_command_stream_print(struct lima_dump *dump, void *data, @@ -54,4 +53,9 @@ void _lima_dump_command_stream_print(struct lima_dump *dump, void *data, _lima_dump_command_stream_print(dump, __VA_ARGS__); \ } while (0) +struct pipe_scissor_state; + +void lima_damage_rect_union(struct pipe_scissor_state *rect, + unsigned minx, unsigned maxx, + unsigned miny, unsigned maxy); #endif diff --git a/src/gallium/drivers/lima/meson.build b/src/gallium/drivers/lima/meson.build index 5bc6fbbf869..95a5094cc39 100644 --- a/src/gallium/drivers/lima/meson.build +++ b/src/gallium/drivers/lima/meson.build @@ -51,6 +51,7 @@ files_lima = files( 'ir/lima_nir_lower_uniform_to_scalar.c', 'ir/lima_nir_split_load_input.c', 'ir/lima_nir_split_loads.c', + 'ir/lima_nir_lower_txp.c', 'ir/lima_ir.h', @@ -81,6 +82,8 @@ files_lima = files( 'lima_format.h', 'lima_format.c', 'lima_gpu.h', + 'lima_blit.c', + 'lima_blit.h', ) lima_nir_algebraic_c = custom_target( @@ -88,11 +91,10 @@ lima_nir_algebraic_c = custom_target( input : 'ir/lima_nir_algebraic.py', output : 'lima_nir_algebraic.c', command : [ - prog_python, '@INPUT@', - '-p', join_paths(meson.source_root(), 'src/compiler/nir/'), + prog_python, '@INPUT@', '-p', dir_compiler_nir, ], capture : true, - depend_files : nir_algebraic_py, + depend_files : nir_algebraic_depends, ) liblima = static_library( @@ -119,7 +121,7 @@ lima_compiler = executable( 'standalone/glsl.cpp' ), include_directories : [ - inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_gallium_drivers, inc_mesa, inc_mapi, inc_compiler, + inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_gallium_drivers, inc_mesa, inc_mapi, ], dependencies : [ idep_nir, @@ -141,7 +143,7 @@ lima_disasm = executable( 'standalone/lima_disasm.c', ), include_directories : [ - inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_gallium_drivers, inc_mesa, inc_mapi, inc_compiler, + inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_gallium_drivers, inc_mesa, inc_mapi, ], dependencies : [ idep_mesautil, diff --git a/src/gallium/drivers/lima/standalone/glsl.cpp b/src/gallium/drivers/lima/standalone/glsl.cpp index 3cef68277ce..7b929e9b074 100644 --- a/src/gallium/drivers/lima/standalone/glsl.cpp +++ b/src/gallium/drivers/lima/standalone/glsl.cpp @@ -37,5 +37,5 @@ lima_do_glsl_optimizations(struct exec_list *ir) int st_glsl_type_size(const glsl_type *type, bool bindless) { - return type->count_attribute_slots(false); + return glsl_count_attribute_slots(type, false); } diff --git a/src/gallium/drivers/lima/standalone/lima_compiler_cmdline.c b/src/gallium/drivers/lima/standalone/lima_compiler_cmdline.c index f3e1fa68397..3e5971fe4ce 100644 --- a/src/gallium/drivers/lima/standalone/lima_compiler_cmdline.c +++ b/src/gallium/drivers/lima/standalone/lima_compiler_cmdline.c @@ -28,10 +28,10 @@ #include "main/mtypes.h" +#include "compiler/glsl_types.h" #include "compiler/glsl/standalone.h" #include "compiler/glsl/glsl_to_nir.h" #include "compiler/glsl/gl_nir.h" -#include "compiler/nir_types.h" #include "lima_context.h" #include "lima_program.h" @@ -112,7 +112,9 @@ load_glsl(unsigned num_files, char* const* files, gl_shader_stage stage) lima_do_glsl_optimizations(prog->_LinkedShaders[stage]->ir); - nir_shader *nir = glsl_to_nir(&local_ctx, prog, stage, nir_options); + nir_shader *nir = glsl_to_nir(&local_ctx.Const, prog, stage, nir_options); + + gl_nir_inline_functions(nir); /* required NIR passes: */ if (nir_options->lower_all_io_to_temps || @@ -135,7 +137,7 @@ load_glsl(unsigned num_files, char* const* files, gl_shader_stage stage) NIR_PASS_V(nir, nir_lower_var_copies); nir_print_shader(nir, stdout); NIR_PASS_V(nir, gl_nir_lower_atomics, prog, true); - NIR_PASS_V(nir, nir_lower_atomics_to_ssbo); + NIR_PASS_V(nir, nir_lower_atomics_to_ssbo, 0); nir_print_shader(nir, stdout); switch (stage) { @@ -214,6 +216,7 @@ main(int argc, char **argv) struct nir_lower_tex_options tex_options = { .lower_txp = ~0u, + .lower_invalid_implicit_lod = true, }; nir_shader *nir = load_glsl(1, filename, stage); diff --git a/src/gallium/drivers/lima/standalone/lima_disasm.c b/src/gallium/drivers/lima/standalone/lima_disasm.c index ee4460d5fc2..9c8278cddd9 100644 --- a/src/gallium/drivers/lima/standalone/lima_disasm.c +++ b/src/gallium/drivers/lima/standalone/lima_disasm.c @@ -166,7 +166,7 @@ main(int argc, char **argv) } char *filename = NULL; - filename = argv[n]; + filename = argv[argc - 1]; uint32_t size = 0; uint32_t *prog = extract_shader_binary(filename, &size, &is_frag); @@ -183,13 +183,13 @@ main(int argc, char **argv) do { ppir_codegen_ctrl *ctrl = (ppir_codegen_ctrl *)bin; printf("@%6d: ", offset); - ppir_disassemble_instr(bin, offset); + ppir_disassemble_instr(bin, offset, stdout); bin += ctrl->count; offset += ctrl->count; size -= ctrl->count; } while (size); } else { - gpir_disassemble_program((gpir_codegen_instr *)prog, size / (sizeof(gpir_codegen_instr))); + gpir_disassemble_program((gpir_codegen_instr *)prog, size / (sizeof(gpir_codegen_instr)), stdout); } ralloc_free(prog); |