summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/lima
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/lima')
-rw-r--r--src/gallium/drivers/lima/ci/deqp-lima-fails.txt59
-rw-r--r--src/gallium/drivers/lima/ci/deqp-lima-mali450-deqp.toml35
-rw-r--r--src/gallium/drivers/lima/ci/deqp-lima-mali450-piglit.toml4
-rw-r--r--src/gallium/drivers/lima/ci/deqp-lima-skips.txt3
-rw-r--r--src/gallium/drivers/lima/ci/gitlab-ci.yml46
-rw-r--r--src/gallium/drivers/lima/ci/lima-fails.txt635
-rw-r--r--src/gallium/drivers/lima/ci/lima-flakes.txt4
-rw-r--r--src/gallium/drivers/lima/ci/lima-skips.txt91
-rw-r--r--src/gallium/drivers/lima/drm-shim/meson.build2
-rw-r--r--src/gallium/drivers/lima/ir/gp/codegen.c2
-rw-r--r--src/gallium/drivers/lima/ir/gp/codegen.h2
-rw-r--r--src/gallium/drivers/lima/ir/gp/disasm.c332
-rw-r--r--src/gallium/drivers/lima/ir/gp/gpir.h7
-rw-r--r--src/gallium/drivers/lima/ir/gp/nir.c136
-rw-r--r--src/gallium/drivers/lima/ir/gp/node.c3
-rw-r--r--src/gallium/drivers/lima/ir/gp/optimize.c4
-rw-r--r--src/gallium/drivers/lima/ir/gp/reduce_scheduler.c17
-rw-r--r--src/gallium/drivers/lima/ir/gp/regalloc.c5
-rw-r--r--src/gallium/drivers/lima/ir/gp/scheduler.c4
-rw-r--r--src/gallium/drivers/lima/ir/lima_ir.h6
-rw-r--r--src/gallium/drivers/lima/ir/lima_nir_duplicate_consts.c26
-rw-r--r--src/gallium/drivers/lima/ir/lima_nir_duplicate_intrinsic.c59
-rw-r--r--src/gallium/drivers/lima/ir/lima_nir_lower_txp.c163
-rw-r--r--src/gallium/drivers/lima/ir/lima_nir_lower_uniform_to_scalar.c33
-rw-r--r--src/gallium/drivers/lima/ir/lima_nir_split_load_input.c148
-rw-r--r--src/gallium/drivers/lima/ir/lima_nir_split_loads.c81
-rw-r--r--src/gallium/drivers/lima/ir/pp/codegen.c78
-rw-r--r--src/gallium/drivers/lima/ir/pp/codegen.h6
-rw-r--r--src/gallium/drivers/lima/ir/pp/disasm.c374
-rw-r--r--src/gallium/drivers/lima/ir/pp/instr.c19
-rw-r--r--src/gallium/drivers/lima/ir/pp/liveness.c19
-rw-r--r--src/gallium/drivers/lima/ir/pp/lower.c109
-rw-r--r--src/gallium/drivers/lima/ir/pp/nir.c263
-rw-r--r--src/gallium/drivers/lima/ir/pp/node.c13
-rw-r--r--src/gallium/drivers/lima/ir/pp/node_to_instr.c26
-rw-r--r--src/gallium/drivers/lima/ir/pp/ppir.h56
-rw-r--r--src/gallium/drivers/lima/ir/pp/regalloc.c32
-rw-r--r--src/gallium/drivers/lima/ir/pp/scheduler.c14
-rw-r--r--src/gallium/drivers/lima/lima_blit.c319
-rw-r--r--src/gallium/drivers/lima/lima_blit.h30
-rw-r--r--src/gallium/drivers/lima/lima_bo.c2
-rw-r--r--src/gallium/drivers/lima/lima_context.c23
-rw-r--r--src/gallium/drivers/lima/lima_context.h12
-rw-r--r--src/gallium/drivers/lima/lima_draw.c256
-rw-r--r--src/gallium/drivers/lima/lima_format.h2
-rw-r--r--src/gallium/drivers/lima/lima_gpu.h8
-rw-r--r--src/gallium/drivers/lima/lima_job.c218
-rw-r--r--src/gallium/drivers/lima/lima_job.h3
-rw-r--r--src/gallium/drivers/lima/lima_parser.c115
-rw-r--r--src/gallium/drivers/lima/lima_parser.h28
-rw-r--r--src/gallium/drivers/lima/lima_program.c71
-rw-r--r--src/gallium/drivers/lima/lima_resource.c205
-rw-r--r--src/gallium/drivers/lima/lima_resource.h5
-rw-r--r--src/gallium/drivers/lima/lima_screen.c91
-rw-r--r--src/gallium/drivers/lima/lima_screen.h14
-rw-r--r--src/gallium/drivers/lima/lima_state.c37
-rw-r--r--src/gallium/drivers/lima/lima_texture.c112
-rw-r--r--src/gallium/drivers/lima/lima_texture.h42
-rw-r--r--src/gallium/drivers/lima/lima_util.c38
-rw-r--r--src/gallium/drivers/lima/lima_util.h8
-rw-r--r--src/gallium/drivers/lima/meson.build12
-rw-r--r--src/gallium/drivers/lima/standalone/glsl.cpp2
-rw-r--r--src/gallium/drivers/lima/standalone/lima_compiler_cmdline.c9
-rw-r--r--src/gallium/drivers/lima/standalone/lima_disasm.c6
64 files changed, 3127 insertions, 1457 deletions
diff --git a/src/gallium/drivers/lima/ci/deqp-lima-fails.txt b/src/gallium/drivers/lima/ci/deqp-lima-fails.txt
deleted file mode 100644
index 680b8f247fd..00000000000
--- a/src/gallium/drivers/lima/ci/deqp-lima-fails.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_center,Fail
-dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_corner,Fail
-dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_neg_x_neg_y_pos_z_and_pos_x_pos_y_neg_z,Fail
-dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_neg_x_pos_y_pos_z_and_pos_x_neg_y_neg_z,Fail
-dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_pos_x_neg_y_pos_z_and_neg_x_pos_y_neg_z,Fail
-dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_pos_x_pos_y_pos_z_and_neg_x_neg_y_neg_z,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.0,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.1,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.10,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.11,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.12,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.13,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.14,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.15,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.16,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.17,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.18,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.19,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.2,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.20,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.21,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.22,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.23,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.24,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.3,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.4,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.5,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.6,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.7,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.8,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.random.9,Fail
-dEQP-GLES2.functional.fragment_ops.depth_stencil.write_mask.stencil,Fail
-dEQP-GLES2.functional.negative_api.shader.uniform_matrixfv_invalid_transpose,Fail
-dEQP-GLES2.functional.negative_api.texture.generatemipmap_zero_level_array_compressed,Fail
-dEQP-GLES2.functional.shaders.builtin_variable.frontfacing,Fail
-dEQP-GLES2.functional.shaders.indexing.matrix_subscript.mat4_dynamic_loop_write_dynamic_loop_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.matrix_subscript.mat4_dynamic_loop_write_dynamic_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.matrix_subscript.mat4_dynamic_loop_write_static_loop_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.matrix_subscript.mat4_dynamic_loop_write_static_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_dynamic_loop_write_dynamic_loop_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_dynamic_loop_write_dynamic_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_dynamic_loop_write_static_loop_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_dynamic_loop_write_static_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.varying_array.vec4_dynamic_loop_write_dynamic_loop_read,Fail
-dEQP-GLES2.functional.shaders.indexing.varying_array.vec4_dynamic_loop_write_dynamic_read,Fail
-dEQP-GLES2.functional.shaders.indexing.varying_array.vec4_dynamic_loop_write_static_loop_read,Fail
-dEQP-GLES2.functional.shaders.indexing.varying_array.vec4_dynamic_loop_write_static_read,Fail
-dEQP-GLES2.functional.shaders.texture_functions.fragment.texture2d_bias,Fail
-dEQP-GLES2.functional.shaders.texture_functions.fragment.texture2dproj_vec4_bias,Fail
-dEQP-GLES2.functional.texture.filtering.2d.linear_mipmap_linear_linear_clamp_rgba8888,Fail
-dEQP-GLES2.functional.texture.filtering.2d.linear_mipmap_linear_linear_mirror_rgba8888,Fail
-dEQP-GLES2.functional.texture.filtering.2d.linear_mipmap_linear_nearest_clamp_rgba8888,Fail
-dEQP-GLES2.functional.texture.filtering.2d.linear_mipmap_linear_nearest_mirror_rgba8888,Fail
-dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_linear,Fail
-dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest,Fail
-dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_linear,Fail
-dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_nearest,Fail
-dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_linear,Fail
-dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_nearest,Fail
diff --git a/src/gallium/drivers/lima/ci/deqp-lima-mali450-deqp.toml b/src/gallium/drivers/lima/ci/deqp-lima-mali450-deqp.toml
new file mode 100644
index 00000000000..588edb95ab1
--- /dev/null
+++ b/src/gallium/drivers/lima/ci/deqp-lima-mali450-deqp.toml
@@ -0,0 +1,35 @@
+[[deqp]]
+deqp = "/deqp/modules/gles2/deqp-gles2"
+caselists = ["/deqp/mustpass/gles2-main.txt"]
+tests_per_group = 250
+deqp_args = [
+ "--deqp-surface-width=256", "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+]
+version_check = "GL ES 2.0.*git"
+renderer_check = "Mali450"
+
+# wayland
+[[deqp]]
+deqp = "/deqp/modules/egl/deqp-egl-wayland"
+caselists = ["/deqp/mustpass/egl-main.txt"]
+tests_per_group = 250
+deqp_args = [
+ "--deqp-surface-width=256", "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+]
+prefix = "wayland-"
+
+# x11
+[[deqp]]
+deqp = "/deqp/modules/egl/deqp-egl-x11"
+caselists = ["/deqp/mustpass/egl-main.txt"]
+tests_per_group = 250
+deqp_args = [
+ "--deqp-surface-width=256", "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer", "--deqp-visibility=hidden",
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+]
+prefix = "x11-"
diff --git a/src/gallium/drivers/lima/ci/deqp-lima-mali450-piglit.toml b/src/gallium/drivers/lima/ci/deqp-lima-mali450-piglit.toml
new file mode 100644
index 00000000000..1cc878998b0
--- /dev/null
+++ b/src/gallium/drivers/lima/ci/deqp-lima-mali450-piglit.toml
@@ -0,0 +1,4 @@
+[[piglit]]
+piglit_folder = "/piglit"
+profile = "gpu"
+process_isolation = true
diff --git a/src/gallium/drivers/lima/ci/deqp-lima-skips.txt b/src/gallium/drivers/lima/ci/deqp-lima-skips.txt
deleted file mode 100644
index 824ea20af4c..00000000000
--- a/src/gallium/drivers/lima/ci/deqp-lima-skips.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-# Note: skips lists for CI are just a list of lines that, when
-# non-zero-length and not starting with '#', will regex match to
-# delete lines from the test list. Be careful.
diff --git a/src/gallium/drivers/lima/ci/gitlab-ci.yml b/src/gallium/drivers/lima/ci/gitlab-ci.yml
index ee42f3c6397..680ed8c1f5a 100644
--- a/src/gallium/drivers/lima/ci/gitlab-ci.yml
+++ b/src/gallium/drivers/lima/ci/gitlab-ci.yml
@@ -1,15 +1,43 @@
-lima-mali450-test:arm64:
+.lima-rules:
+ stage: arm
+ rules:
+ - !reference [.test, rules]
+ - !reference [.lima-farm-rules, rules]
+ - !reference [.gl-rules, rules]
+ - changes:
+ - src/gallium/drivers/lima/**/*
+ - src/gallium/winsys/lima/**/*
+ - src/lima/**/*
+ when: on_success
+
+# 4 devices (2023-12-16)
+.lava-meson-gxl-s805x-libretech-ac:arm64:
+ variables:
+ DEVICE_TYPE: meson-gxl-s805x-libretech-ac
+ DTB: meson-gxl-s805x-libretech-ac
+ FDO_CI_CONCURRENT: 4
+ GPU_VERSION: lima
+ RUNNER_TAG: mesa-ci-x86-64-lava-lima
+
+lima-mali450-deqp:arm64:
extends:
- - .lava-test:arm64
+ - .lava-test-deqp:arm64
- .lima-rules
+ - .lava-meson-gxl-s805x-libretech-ac:arm64
variables:
- DEVICE_TYPE: meson-gxl-s805x-libretech-ac
- DTB: ${DEVICE_TYPE}
FDO_HTTP_CACHE_URI: ''
- GPU_VERSION: lima
- DEQP_PARALLEL: 4
- DEQP_EXPECTED_RENDERER: Mali450
+ HWCI_START_WESTON: 1
+ DEQP_SUITE: lima-mali450-deqp
VISIBILITY_GROUP: "mesa-ci"
- tags:
- - mesa-ci-x86-64-lava-lima
+lima-mali450-piglit:arm64:
+ extends:
+ - .lava-test-deqp:arm64
+ - .lima-rules
+ - .lava-meson-gxl-s805x-libretech-ac:arm64
+ variables:
+ DEQP_SUITE: lima-mali450-piglit
+ FDO_HTTP_CACHE_URI: ''
+ PIGLIT_PLATFORM: gbm
+ VISIBILITY_GROUP: "mesa-ci"
+ parallel: 2
diff --git a/src/gallium/drivers/lima/ci/lima-fails.txt b/src/gallium/drivers/lima/ci/lima-fails.txt
new file mode 100644
index 00000000000..ca73d800d81
--- /dev/null
+++ b/src/gallium/drivers/lima/ci/lima-fails.txt
@@ -0,0 +1,635 @@
+dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_neg_x_neg_y_pos_z_and_pos_x_pos_y_neg_z,Fail
+dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_neg_x_pos_y_pos_z_and_pos_x_neg_y_neg_z,Fail
+dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_pos_x_neg_y_pos_z_and_neg_x_pos_y_neg_z,Fail
+dEQP-GLES2.functional.clipping.triangle_vertex.clip_two.clip_pos_x_pos_y_pos_z_and_neg_x_neg_y_neg_z,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.0,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.1,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.10,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.11,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.12,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.13,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.14,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.15,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.16,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.17,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.18,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.19,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.2,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.20,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.21,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.22,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.23,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.24,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.3,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.4,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.5,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.6,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.7,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.8,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.random.9,Fail
+dEQP-GLES2.functional.fragment_ops.depth_stencil.write_mask.stencil,Fail
+dEQP-GLES2.functional.shaders.texture_functions.fragment.texture2d_bias,Fail
+dEQP-GLES2.functional.shaders.texture_functions.fragment.texture2dproj_vec3_bias,Fail
+dEQP-GLES2.functional.shaders.texture_functions.fragment.texture2dproj_vec4_bias,Fail
+dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_dynamic_read_vertex,Fail
+dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_dynamic_read_vertex,Fail
+dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_dynamic_read_vertex,Fail
+dEQP-GLES2.functional.texture.filtering.2d.linear_mipmap_linear_linear_clamp_rgba8888,Fail
+dEQP-GLES2.functional.texture.filtering.2d.linear_mipmap_linear_linear_mirror_rgba8888,Fail
+dEQP-GLES2.functional.texture.filtering.2d.linear_mipmap_linear_nearest_clamp_rgba8888,Fail
+dEQP-GLES2.functional.texture.filtering.2d.linear_mipmap_linear_nearest_mirror_rgba8888,Fail
+dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_linear,Fail
+dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest,Fail
+dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_linear,Fail
+dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_nearest,Fail
+dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_linear,Fail
+dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_nearest,Fail
+
+wayland-dEQP-EGL.functional.create_context.no_config,Fail
+wayland-dEQP-EGL.functional.image.modify.renderbuffer_depth16_renderbuffer_clear_depth,Fail
+wayland-dEQP-EGL.functional.render.multi_context.gles2.rgb888_window,Fail
+wayland-dEQP-EGL.functional.render.multi_thread.gles2.rgb888_window,Fail
+wayland-dEQP-EGL.functional.wide_color.window_fp16_default_colorspace,Fail
+
+x11-dEQP-EGL.functional.create_context.no_config,Fail
+x11-dEQP-EGL.functional.image.modify.renderbuffer_depth16_renderbuffer_clear_depth,Fail
+x11-dEQP-EGL.functional.render.multi_context.gles2.rgb888_window,Fail
+x11-dEQP-EGL.functional.render.multi_context.gles2.rgba8888_pbuffer,Fail
+x11-dEQP-EGL.functional.render.multi_thread.gles2.rgb888_window,Fail
+x11-dEQP-EGL.functional.render.multi_thread.gles2.rgba8888_pbuffer,Fail
+x11-dEQP-EGL.functional.wide_color.pbuffer_8888_colorspace_srgb,Fail
+x11-dEQP-EGL.functional.wide_color.window_8888_colorspace_srgb,Fail
+
+shaders@glsl-arb-fragment-coord-conventions,Fail
+shaders@glsl-bug-110796,Fail
+shaders@glsl-fs-flat-color,Fail
+shaders@glsl-predication-on-large-array,Fail
+shaders@glsl-routing,Fail
+spec@arb_color_buffer_float@gl_rgba8-render,Fail
+spec@arb_color_buffer_float@gl_rgba8-render-sanity,Fail
+spec@arb_depth_texture@fbo-generatemipmap-formats,Fail
+spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT24,Fail
+spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT24 NPOT,Fail
+spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT32,Fail
+spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT32 NPOT,Fail
+spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT,Fail
+spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT NPOT,Fail
+spec@arb_depth_texture@texdepth,Fail
+spec@arb_draw_elements_base_vertex@arb_draw_elements_base_vertex-negative-index,Fail
+spec@arb_draw_elements_base_vertex@arb_draw_elements_base_vertex-negative-index-user_varrays,Fail
+spec@arb_es2_compatibility@fbo-blending-formats,Fail
+spec@arb_es2_compatibility@fbo-blending-formats@GL_RGB565,Fail
+spec@arb_fragment_coord_conventions@fp-arb-fragment-coord-conventions-integer,Fail
+spec@arb_fragment_program@fdo38145,Fail
+spec@arb_fragment_program@fp-abs-01,Fail
+spec@arb_fragment_program_shadow@masked,Fail
+spec@arb_fragment_program_shadow@tex-shadow1d,Fail
+spec@arb_fragment_program_shadow@tex-shadow2d,Fail
+spec@arb_fragment_program_shadow@tex-shadow2drect,Fail
+spec@arb_fragment_program_shadow@txp-shadow1d,Fail
+spec@arb_fragment_program_shadow@txp-shadow2d,Fail
+spec@arb_fragment_program_shadow@txp-shadow2drect,Fail
+spec@arb_framebuffer_srgb@arb_framebuffer_srgb-srgb_conformance,Fail
+spec@arb_framebuffer_srgb@framebuffer-srgb,Fail
+spec@arb_occlusion_query@occlusion_query_conform,Fail
+spec@arb_occlusion_query@occlusion_query_conform@GetObjivAval_multi2,Fail
+spec@arb_pixel_buffer_object@cubemap npot pbo,Fail
+spec@arb_pixel_buffer_object@cubemap pbo,Fail
+spec@arb_pixel_buffer_object@fbo-pbo-readpixels-small,Fail
+spec@arb_pixel_buffer_object@pbo-getteximage,Fail
+spec@arb_pixel_buffer_object@texsubimage-unpack pbo,Fail
+spec@arb_point_sprite@arb_point_sprite-checkerboard,Fail
+spec@arb_point_sprite@arb_point_sprite-mipmap,Fail
+spec@arb_provoking_vertex@arb-provoking-vertex-clipped-geometry-flatshading,Fail
+spec@arb_provoking_vertex@arb-provoking-vertex-render,Fail
+spec@arb_sampler_objects@gl_ext_texture_srgb_decode,Fail
+spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
+spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgrad,Fail
+spec@arb_shader_texture_lod@execution@glsl-fs-shadow2dgradarb-01,Fail
+spec@arb_shader_texture_lod@execution@glsl-fs-shadow2dgradarb-02,Fail
+spec@arb_shader_texture_lod@execution@glsl-fs-shadow2dgradarb-03,Fail
+spec@arb_shader_texture_lod@execution@glsl-fs-shadow2dgradarb-04,Fail
+spec@arb_shader_texture_lod@execution@glsl-fs-shadow2dgradarb-05,Fail
+spec@arb_shader_texture_lod@execution@glsl-fs-shadow2dgradarb-07,Fail
+spec@arb_shader_texture_lod@execution@glsl-fs-shadow2dgradarb-08,Fail
+spec@arb_shader_texture_lod@execution@glsl-fs-shadow2dgradarb-cumulative,Fail
+spec@arb_texture_cube_map@copyteximage cube,Fail
+spec@arb_texture_cube_map@copyteximage cube samples=2,Fail
+spec@arb_texture_cube_map@copyteximage cube samples=4,Fail
+spec@arb_texture_cube_map@cubemap,Fail
+spec@arb_texture_cube_map@cubemap npot,Fail
+spec@arb_texture_cube_map@cubemap-shader,Fail
+spec@arb_texture_rectangle@1-1-linear-texture,Fail
+spec@arb_texture_rectangle@copyteximage rect samples=2,Fail
+spec@arb_texture_rectangle@copyteximage rect samples=4,Fail
+spec@arb_texture_rectangle@glsl-fs-shadow2drect-01,Fail
+spec@arb_texture_rectangle@glsl-fs-shadow2drect-02,Fail
+spec@arb_texture_rectangle@glsl-fs-shadow2drect-03,Fail
+spec@arb_texture_rectangle@glsl-fs-shadow2drect-04,Fail
+spec@arb_texture_rectangle@glsl-fs-shadow2drect-05,Fail
+spec@arb_texture_rectangle@glsl-fs-shadow2drect-07,Fail
+spec@arb_texture_rectangle@glsl-fs-shadow2drect-08,Fail
+spec@arb_texture_rectangle@glsl-fs-shadow2drect,Fail
+spec@arb_texture_rectangle@glsl-fs-shadow2drectproj,Fail
+spec@arb_texture_rg@execution@fs-shadow2d-red-01,Fail
+spec@arb_texture_rg@execution@fs-shadow2d-red-02,Fail
+spec@arb_texture_rg@execution@fs-shadow2d-red-03,Fail
+spec@arb_texture_rg@fbo-blending-formats,Fail
+spec@arb_texture_rg@fbo-blending-formats@GL_R16,Fail
+spec@arb_texture_rg@fbo-blending-formats@GL_R8,Fail
+spec@arb_texture_rg@fbo-blending-formats@GL_RG16,Fail
+spec@arb_texture_rg@fbo-blending-formats@GL_RG8,Fail
+spec@arb_texture_rg@fbo-blending-formats@GL_RG,Fail
+spec@arb_texture_rg@texwrap formats bordercolor,Fail
+spec@arb_texture_rg@texwrap formats bordercolor@GL_RG8- border color only,Fail
+spec@arb_texture_rg@texwrap formats bordercolor-swizzled,Fail
+spec@arb_texture_rg@texwrap formats bordercolor-swizzled@GL_RG8- swizzled- border color only,Fail
+spec@arb_texture_storage@texture-storage@cube array texture,Fail
+spec@arb_vertex_program@arl,Fail
+spec@arb_vertex_program@big-param,Fail
+spec@arb_vertex_program@clip-plane-transformation arb,Fail
+spec@arb_vertex_program@instructions@arl,Fail
+spec@arb_vertex_program@vp-address-01,Fail
+spec@arb_vertex_program@vp-arl-constant-array,Fail
+spec@arb_vertex_program@vp-arl-constant-array-huge,Fail
+spec@arb_vertex_program@vp-arl-constant-array-huge-offset,Fail
+spec@arb_vertex_program@vp-arl-constant-array-huge-offset-neg,Fail
+spec@arb_vertex_program@vp-arl-constant-array-huge-relative-offset,Fail
+spec@arb_vertex_program@vp-arl-constant-array-huge-varying,Fail
+spec@arb_vertex_program@vp-arl-constant-array-varying,Fail
+spec@arb_vertex_program@vp-arl-env-array,Fail
+spec@arb_vertex_program@vp-arl-local-array,Fail
+spec@arb_vertex_program@vp-arl-neg-array-2,Fail
+spec@arb_vertex_program@vp-arl-neg-array,Fail
+spec@ati_fragment_shader@ati_fragment_shader-render-default,Fail
+spec@ati_fragment_shader@ati_fragment_shader-render-notexture,Fail
+spec@ati_fragment_shader@ati_fragment_shader-render-sources,Fail
+spec@egl 1.4@eglterminate then unbind context,Fail
+spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail
+spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_rgba,Fail
+spec@egl_khr_surfaceless_context@viewport,Fail
+spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
+spec@ext_framebuffer_multisample@dlist,Fail
+spec@ext_framebuffer_multisample@renderbuffer-samples,Fail
+spec@ext_framebuffer_multisample@samples,Fail
+spec@ext_framebuffer_object@ext_framebuffer_object-error-handling,Fail
+spec@ext_framebuffer_object@fbo-blending-formats@3,Fail
+spec@ext_framebuffer_object@fbo-blending-formats,Fail
+spec@ext_framebuffer_object@fbo-blending-formats@GL_R3_G3_B2,Fail
+spec@ext_framebuffer_object@fbo-blending-formats@GL_RGB10,Fail
+spec@ext_framebuffer_object@fbo-blending-formats@GL_RGB12,Fail
+spec@ext_framebuffer_object@fbo-blending-formats@GL_RGB16,Fail
+spec@ext_framebuffer_object@fbo-blending-formats@GL_RGB5,Fail
+spec@ext_framebuffer_object@fbo-blending-formats@GL_RGB8,Fail
+spec@ext_framebuffer_object@fbo-blending-formats@GL_RGB,Fail
+spec@ext_framebuffer_object@fbo-cubemap,Fail
+spec@ext_framebuffer_object@fbo-depth-sample-compare,Fail
+spec@ext_framebuffer_object@fbo-maxsize,Fail
+spec@ext_framebuffer_object@fbo-scissor-bitmap,Fail
+spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index16-blit,Fail
+spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index16-copypixels,Fail
+spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index1-blit,Fail
+spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index1-copypixels,Fail
+spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index4-blit,Fail
+spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index4-copypixels,Fail
+spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index8-blit,Fail
+spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index8-copypixels,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y212,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y410,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y412,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y416,Fail
+spec@ext_packed_depth_stencil@depth_stencil texture,Fail
+spec@ext_packed_depth_stencil@fbo-blit-d24s8,Fail
+spec@ext_packed_depth_stencil@fbo-stencil-gl_depth24_stencil8-blit,Fail
+spec@ext_packed_depth_stencil@fbo-stencil-gl_depth24_stencil8-copypixels,Fail
+spec@ext_provoking_vertex@provoking-vertex,Fail
+spec@ext_texture_format_bgra8888@api-errors,Fail
+spec@ext_texture_lod_bias@lodbias,Fail
+spec@ext_texture_srgb@tex-srgb,Fail
+spec@ext_texture_srgb@texwrap formats bordercolor,Fail
+spec@ext_texture_srgb@texwrap formats bordercolor@GL_SRGB8_ALPHA8- border color only,Fail
+spec@ext_texture_srgb@texwrap formats bordercolor@GL_SRGB8- border color only,Fail
+spec@ext_texture_srgb@texwrap formats bordercolor-swizzled,Fail
+spec@ext_texture_srgb@texwrap formats bordercolor-swizzled@GL_SRGB8_ALPHA8- swizzled- border color only,Fail
+spec@ext_texture_srgb@texwrap formats bordercolor-swizzled@GL_SRGB8- swizzled- border color only,Fail
+spec@ext_texture_srgb@texwrap formats,Fail
+spec@ext_texture_srgb@texwrap formats@GL_SRGB8_ALPHA8,Fail
+spec@ext_texture_srgb@texwrap formats@GL_SRGB8_ALPHA8- NPOT,Fail
+spec@ext_texture_srgb@texwrap formats@GL_SRGB8_ALPHA8- swizzled,Fail
+spec@ext_texture_srgb@texwrap formats@GL_SRGB8,Fail
+spec@ext_texture_srgb@texwrap formats@GL_SRGB8- NPOT,Fail
+spec@ext_texture_srgb@texwrap formats@GL_SRGB8- swizzled,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled@GL_COMPRESSED_SRGB_ALPHA- swizzled- border color only,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc bordercolor-swizzled@GL_COMPRESSED_SRGB- swizzled- border color only,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc bordercolor,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc bordercolor@GL_COMPRESSED_SRGB_ALPHA- border color only,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc bordercolor@GL_COMPRESSED_SRGB- border color only,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc@GL_COMPRESSED_SRGB_ALPHA- NPOT,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc@GL_COMPRESSED_SRGB_ALPHA- swizzled,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc@GL_COMPRESSED_SRGB_ALPHA,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc@GL_COMPRESSED_SRGB- NPOT,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc@GL_COMPRESSED_SRGB- swizzled,Fail
+spec@ext_texture_srgb@texwrap formats-s3tc@GL_COMPRESSED_SRGB,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-atan-float-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-atan-vec2-vec2,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-atan-vec3-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-atan-vec4-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-degrees-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-degrees-vec2,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-degrees-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-degrees-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-dot-float-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-dot-vec4-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-exp-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-exp-vec2,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-exp-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-exp-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-fract-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-inversesqrt-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-length-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-log2-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-log-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-mix-float-float-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-mod-float-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-assign-div-float-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-assign-div-int-int,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-assign-div-ivec2-ivec2,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-assign-div-ivec3-ivec3,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-assign-div-ivec4-int,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-assign-div-ivec4-ivec4,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-assign-div-mat3-mat3,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-assign-div-mat4-mat4,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-assign-div-vec3-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-assign-mult-float-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-div-float-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-div-float-mat2,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-div-float-mat3,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-div-float-mat4,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-div-float-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-div-int-int,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-div-int-ivec2,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-div-int-ivec4,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-div-ivec2-ivec2,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-div-ivec3-ivec3,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-div-ivec4-int,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-div-ivec4-ivec4,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-div-mat3-mat3,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-div-mat4-mat4,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-div-vec3-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-op-mult-float-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-pow-float-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-radians-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-sin-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-smoothstep-float-float-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-tan-float,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-op-assign-div-ivec2-ivec2,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-op-assign-div-ivec3-ivec3,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-op-assign-div-ivec4-ivec4,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-op-div-ivec2-ivec2,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-op-div-ivec3-ivec3,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-op-div-ivec4-ivec4,Fail
+spec@glsl-1.10@execution@clipping@clip-plane-transformation clipvert_pos,Fail
+spec@glsl-1.10@execution@clipping@clip-plane-transformation fixed,Fail
+spec@glsl-1.10@execution@clipping@clip-plane-transformation pos_clipvert,Fail
+spec@glsl-1.10@execution@derivatives@glsl-derivs-abs-sign,Fail
+spec@glsl-1.10@execution@derivatives@glsl-derivs-swizzle,Fail
+spec@glsl-1.10@execution@glsl-vs-large-uniform-array,Fail
+spec@glsl-1.10@execution@glsl-vs-uniform-array-4,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backcolor-flat-fixed,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backcolor-flat-none,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backcolor-flat-vertex,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backcolor-smooth-fixed,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backcolor-smooth-none,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backcolor-smooth-vertex,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backsecondarycolor-flat-fixed,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backsecondarycolor-flat-none,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backsecondarycolor-flat-vertex,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backsecondarycolor-smooth-fixed,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backsecondarycolor-smooth-none,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_backsecondarycolor-smooth-vertex,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_frontcolor-flat-fixed,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_frontcolor-flat-none,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_frontcolor-flat-vertex,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_frontcolor-smooth-vertex,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_frontsecondarycolor-flat-fixed,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_frontsecondarycolor-flat-none,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_frontsecondarycolor-flat-vertex,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-gl_frontsecondarycolor-smooth-vertex,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-other-flat-vertex,Fail
+spec@glsl-1.10@execution@interpolation@interpolation-none-other-smooth-vertex,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow1d-01,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow1d-02,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow1d-03,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow1d-04,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow1d-05,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow1d-07,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow1d-08,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow1d-bias,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow1d,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow1dproj-bias,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow1dproj,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-01,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-02,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-03,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-04,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-05,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-07,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-08,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-bias,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d-clamp-z,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow2d,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow2dproj-bias,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-shadow2dproj,Fail
+spec@glsl-1.10@execution@temp-array-indexing@glsl-vs-giant-temp-array,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat2-col-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat2-col-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat2-index-col-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat2-index-col-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat2-index-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat2-index-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat2-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat2-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat3-col-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat3-col-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat3-index-col-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat3-index-col-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat3-index-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat3-index-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat3-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat3-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat4-col-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat4-col-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat4-index-col-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat4-index-col-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat4-index-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat4-index-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat4-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-array-mat4-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-mat2-col-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-mat2-col-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-mat2-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-mat2-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-mat3-col-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-mat3-col-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-mat3-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-mat3-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-mat4-col-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-mat4-col-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-mat4-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@fs-temp-mat4-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat2-col-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat2-col-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat2-index-col-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat2-index-col-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat2-index-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat2-index-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat2-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat2-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat3-col-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat3-col-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat3-index-col-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat3-index-col-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat3-index-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat3-index-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat3-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-array-mat3-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-mat2-col-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-mat2-col-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-mat2-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-mat2-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-mat3-col-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-mat3-col-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-mat3-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-mat3-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-mat4-col-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-mat4-col-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-mat4-row-wr,Fail
+spec@glsl-1.10@execution@variable-indexing@vs-varying-mat4-wr,Fail
+spec@glsl-1.10@glsl-fs-discard-only,Fail
+spec@glsl-1.10@linker@glsl-link-varyings-3,Fail
+spec@glsl-1.20@execution@built-in-functions@fs-op-assign-div-mat2x4-mat2x4,Fail
+spec@glsl-1.20@execution@built-in-functions@fs-op-assign-div-mat3x2-mat3x2,Fail
+spec@glsl-1.20@execution@built-in-functions@fs-op-assign-div-mat3x4-mat3x4,Fail
+spec@glsl-1.20@execution@built-in-functions@fs-op-assign-div-mat4x3-mat4x3,Fail
+spec@glsl-1.20@execution@built-in-functions@fs-op-div-float-mat2x4,Fail
+spec@glsl-1.20@execution@built-in-functions@fs-op-div-float-mat3x2,Fail
+spec@glsl-1.20@execution@built-in-functions@fs-op-div-float-mat3x4,Fail
+spec@glsl-1.20@execution@built-in-functions@fs-op-div-float-mat4x3,Fail
+spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat2x4-mat2x4,Fail
+spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat3x2-mat3x2,Fail
+spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat3x4-mat3x4,Fail
+spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat4x3-mat4x3,Fail
+spec@glsl-1.20@execution@built-in-functions@fs-op-mult-mat4x3-mat3x4,Fail
+spec@glsl-1.20@execution@clipping@fixed-clip-enables,Fail
+spec@glsl-1.20@execution@clipping@vs-clip-vertex-const-reject,Fail
+spec@glsl-1.20@execution@clipping@vs-clip-vertex-different-from-position,Fail
+spec@glsl-1.20@execution@clipping@vs-clip-vertex-enables,Fail
+spec@glsl-1.20@execution@clipping@vs-clip-vertex-equal-to-position,Fail
+spec@glsl-1.20@execution@clipping@vs-clip-vertex-homogeneity,Fail
+spec@glsl-1.20@execution@clipping@vs-clip-vertex-primitives,Fail
+spec@glsl-1.20@execution@fs-underflow-mul-compare-zero,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat2-col-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat2-col-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat2-index-col-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat2-index-col-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat2-index-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat2-index-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat2-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat2-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat3-col-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat3-col-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat3-index-col-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat3-index-col-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat3-index-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat3-index-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat3-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat3-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat4-col-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat4-col-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat4-index-col-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat4-index-col-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat4-index-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat4-index-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat4-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat4-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-mat2-col-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-mat2-col-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-mat2-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-mat2-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-mat3-col-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-mat3-col-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-mat3-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-mat3-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-mat4-col-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-mat4-col-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-mat4-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-mat4-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat2-col-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat2-col-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat2-index-col-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat2-index-col-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat2-index-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat2-index-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat2-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat2-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat3-col-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat3-col-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat3-index-col-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat3-index-col-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat3-index-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat3-index-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat3-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-array-mat3-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-mat2-col-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-mat2-col-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-mat2-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-mat2-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-mat3-col-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-mat3-col-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-mat3-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-mat3-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-mat4-col-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-mat4-col-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-mat4-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-varying-mat4-wr,Fail
+spec@intel_performance_query@intel_performance_query-issue_2235,Fail
+spec@khr_texture_compression_astc@basic-gles,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail
+spec@oes_point_sprite@arb_point_sprite-checkerboard_gles1,Fail
+spec@!opengl 1.0@gl-1.0-dlist-bitmap,Fail
+spec@!opengl 1.0@gl-1.0-dlist-materials,Fail
+spec@!opengl 1.0@gl-1.0-dlist-shademodel,Fail
+spec@!opengl 1.0@gl-1.0-drawbuffer-modes,Fail
+spec@!opengl 1.0@gl-1.0-edgeflag-const,Fail
+spec@!opengl 1.0@gl-1.0-edgeflag,Fail
+spec@!opengl 1.0@gl-1.0-edgeflag-quads,Fail
+spec@!opengl 1.0@gl-1.0-logicop@GL_AND,Fail
+spec@!opengl 1.0@gl-1.0-logicop@GL_AND_INVERTED,Fail
+spec@!opengl 1.0@gl-1.0-logicop@GL_AND_REVERSE,Fail
+spec@!opengl 1.0@gl-1.0-logicop@GL_CLEAR,Fail
+spec@!opengl 1.0@gl-1.0-logicop@GL_COPY_INVERTED,Fail
+spec@!opengl 1.0@gl-1.0-logicop@GL_EQUIV,Fail
+spec@!opengl 1.0@gl-1.0-logicop@GL_INVERT,Fail
+spec@!opengl 1.0@gl-1.0-logicop@GL_NAND,Fail
+spec@!opengl 1.0@gl-1.0-logicop@GL_NOOP,Fail
+spec@!opengl 1.0@gl-1.0-logicop@GL_NOR,Fail
+spec@!opengl 1.0@gl-1.0-logicop@GL_OR,Fail
+spec@!opengl 1.0@gl-1.0-logicop@GL_OR_INVERTED,Fail
+spec@!opengl 1.0@gl-1.0-logicop@GL_OR_REVERSE,Fail
+spec@!opengl 1.0@gl-1.0-logicop@GL_SET,Fail
+spec@!opengl 1.0@gl-1.0-logicop@GL_XOR,Fail
+spec@!opengl 1.0@gl-1.0-no-op-paths,Fail
+spec@!opengl 1.0@gl-1.0-ortho-pos,Fail
+spec@!opengl 1.0@gl-1.0-rastercolor,Fail
+spec@!opengl 1.0@gl-1.0-scissor-bitmap,Fail
+spec@!opengl 1.0@gl-1.0-swapbuffers-behavior,Fail
+spec@!opengl 1.0@gl-1.0-user-clip-all-planes,Fail
+spec@!opengl 1.1@gl-1.1-xor-copypixels,Fail
+spec@!opengl 1.1@gl-1.1-xor,Fail
+spec@!opengl 1.1@line-flat-clip-color,Fail
+spec@!opengl 1.1@linestipple@Factor 2x,Fail
+spec@!opengl 1.1@linestipple@Factor 3x,Fail
+spec@!opengl 1.1@linestipple,Fail
+spec@!opengl 1.1@linestipple@Line loop,Fail
+spec@!opengl 1.1@linestipple@Line strip,Fail
+spec@!opengl 1.1@linestipple@Restarting lines within a single Begin-End block,Fail
+spec@!opengl 1.1@polygon-mode,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 1: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 1: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on bottom edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on left edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on bottom edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on left edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 5: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 5: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset,Fail
+spec@!opengl 1.1@teximage-scale-bias,Fail
+spec@!opengl 1.1@texsubimage,Fail
+spec@!opengl 1.1@texsubimage-unpack,Fail
+spec@!opengl 1.1@user-clip,Fail
+spec@!opengl 1.3@gl-1.3-texture-env,Fail
+spec@!opengl 1.4@copy-pixels,Fail
+spec@!opengl 1.4@gl-1.4-polygon-offset,Fail
+spec@!opengl 1.5@depth-tex-compare,Fail
+spec@!opengl 2.0@early-z,Fail
+spec@!opengl 2.0@gl-2.0-edgeflag,Fail
+spec@!opengl 2.0@gl-2.0-edgeflag-immediate,Fail
+spec@!opengl 2.0@vertex-program-two-side back front2 back2,Fail
+spec@!opengl 2.0@vertex-program-two-side back front2 back2@vs and fs,Fail
+spec@!opengl 2.0@vertex-program-two-side back front2,Fail
+spec@!opengl 2.0@vertex-program-two-side back front2@vs and fs,Fail
+spec@!opengl 2.0@vertex-program-two-side enabled back2,Fail
+spec@!opengl 2.0@vertex-program-two-side enabled back2@vs and fs,Fail
+spec@!opengl 2.0@vertex-program-two-side enabled back back2,Fail
+spec@!opengl 2.0@vertex-program-two-side enabled back back2@vs and fs,Fail
+spec@!opengl 2.0@vertex-program-two-side enabled back,Fail
+spec@!opengl 2.0@vertex-program-two-side enabled back front2 back2,Fail
+spec@!opengl 2.0@vertex-program-two-side enabled back front2 back2@vs and fs,Fail
+spec@!opengl 2.0@vertex-program-two-side enabled back front2,Fail
+spec@!opengl 2.0@vertex-program-two-side enabled back front2@vs and fs,Fail
+spec@!opengl 2.0@vertex-program-two-side enabled back@vs and fs,Fail
+spec@!opengl 2.0@vertex-program-two-side enabled front back2,Fail
+spec@!opengl 2.0@vertex-program-two-side enabled front back2@vs and fs,Fail
+spec@!opengl 2.0@vertex-program-two-side enabled front back back2,Fail
+spec@!opengl 2.0@vertex-program-two-side enabled front back back2@vs and fs,Fail
+spec@!opengl 2.0@vertex-program-two-side enabled front front2 back2,Fail
+spec@!opengl 2.0@vertex-program-two-side enabled front front2 back2@vs and fs,Fail
+spec@!opengl 2.1@pbo,Fail
+spec@!opengl 2.1@pbo@test_bitmap,Fail
+spec@!opengl 2.1@pbo@test_polygon_stip,Fail
+spec@!opengl 2.1@polygon-stipple-fs,Fail
+spec@!opengl es 2.0@glsl-fs-pointcoord,Fail
+
+# see https://gitlab.freedesktop.org/mesa/piglit/-/merge_requests/730
+# and https://gitlab.freedesktop.org/mesa/mesa/-/issues/7208
+spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgb_s3tc_dxt1_ext,Fail
+spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt1_ext,Fail
+spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt3_ext,Fail
+spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt5_ext,Fail
+spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt1_ext,Fail
+spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt3_ext,Fail
+spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt5_ext,Fail
+spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_s3tc_dxt1_ext,Fail
+
+# Expects that some format/internal_format combinations should be supported when they currently aren't.
+spec@arb_clear_texture@arb_clear_texture-supported-formats,Fail
+
+# Precision issue when lowering GL_RGB16 and GL_RGBA16
+spec@arb_clear_texture@arb_clear_texture-sized-formats,Fail
+
+spec@!opengl 1.1@line-smooth-stipple,Fail
+
+# bookworm update
+spec@ext_framebuffer_multisample@renderbufferstorage-samples,Fail
+
+# remove this after https://gitlab.freedesktop.org/mesa/piglit/-/merge_requests/843
+# is merged and piglit is updated
+spec@glsl-1.10@execution@glsl-1.10-built-in-uniform-state,Fail
+
+x11-dEQP-EGL.functional.wide_color.pbuffer_888_colorspace_srgb,Fail
+x11-dEQP-EGL.functional.wide_color.window_888_colorspace_srgb,Fail
diff --git a/src/gallium/drivers/lima/ci/lima-flakes.txt b/src/gallium/drivers/lima/ci/lima-flakes.txt
new file mode 100644
index 00000000000..403fc9c63cb
--- /dev/null
+++ b/src/gallium/drivers/lima/ci/lima-flakes.txt
@@ -0,0 +1,4 @@
+# dEQP error: terminate called after throwing an instance of 'tcu::TestError'
+# dEQP error: what(): Runtime check failed: '!m_requiresRestart' at teglGLES2SharingThreadedTests.cpp:2271
+x11-dEQP-EGL.functional.sharing.gles2.multithread.random.programs.link.19
+wayland-dEQP-EGL.functional.sharing.gles2.multithread.random.programs.link.19
diff --git a/src/gallium/drivers/lima/ci/lima-skips.txt b/src/gallium/drivers/lima/ci/lima-skips.txt
new file mode 100644
index 00000000000..497abcb8ef0
--- /dev/null
+++ b/src/gallium/drivers/lima/ci/lima-skips.txt
@@ -0,0 +1,91 @@
+# Note: skips lists for CI are just a list of lines that, when
+# non-zero-length and not starting with '#', will regex match to
+# delete lines from the test list. Be careful.
+
+# deqp-egl skips
+# slow
+dEQP-EGL.functional.multicontext
+
+# piglit skips
+gles3
+glsl-1.3
+glsl-1.4
+glsl-1.5
+glsl-3
+glsl-4
+glsl-es-3
+opengl 3
+opengl 4
+opengl es 3
+glx@
+
+# dmesg-fail
+spec@arb_draw_elements_base_vertex@arb_draw_elements_base_vertex-negative-index
+spec@arb_draw_elements_base_vertex@arb_draw_elements_base_vertex-negative-index-user_varrays
+spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count
+spec@!opengl 1.4@triangle-rasterization-overdraw
+
+# oom
+spec@!opengl 1.1@streaming-texture-leak
+spec@!opengl 1.2@tex3d-maxsize
+
+# Allocates >1GB CPU memory and more GPU, and ooms.
+ppgtt_memory_alignment
+
+# slow
+shaders@glsl-predication-on-large-array
+spec@glsl-1.10@execution@temp-array-indexing@glsl-fs-giant-temp-array
+
+# timeout
+shaders@glsl-uniform-interstage-limits@subdivide 5
+shaders@glsl-uniform-interstage-limits@subdivide 5- statechanges
+spec@arb_internalformat_query2@all internalformat_<x>_type pname checks
+
+# very large list of fails
+spec@!opengl 1.1@clipflat
+
+# large lists of skips with "Failed to create waffle_context for OpenGL [34].x" errors
+spec@amd_shader_trinary_minmax
+spec@arb_bindless_texture
+spec@arb_compute_shader
+spec@arb_compute_variable_group_size
+spec@arb_direct_state_access
+spec@arb_draw_indirect
+spec@arb_enhanced_layouts
+spec@arb_es3_compatibility
+spec@arb_explicit_uniform_location
+spec@arb_geometry_shader4
+spec@arb_gl_spirv
+spec@arb_gpu_shader5
+spec@arb_gpu_shader_fp64
+spec@arb_gpu_shader_int64
+spec@arb_pipeline_statistics_query
+spec@arb_program_interface_query
+spec@arb_sample_shading
+spec@arb_shader_atomic_counters
+spec@arb_shader_draw_parameters
+spec@arb_shader_image_load_store
+spec@arb_shader_precision
+spec@arb_shader_storage_buffer_object
+spec@arb_shader_texture_image_samples
+spec@arb_sparse_buffer
+spec@arb_stencil_texturing
+spec@arb_tessellation_shader
+spec@arb_texture_barrier
+spec@arb_texture_buffer_object
+spec@arb_texture_cube_map_array
+spec@arb_texture_gather
+spec@arb_texture_stencil8
+spec@arb_texture_view
+spec@arb_transform_feedback3
+spec@arb_uniform_buffer_object
+spec@arb_vertex_attrib_64bit
+spec@arb_viewport_array
+spec@ext_shader_samples_identical
+spec@ext_texture_array
+spec@ext_transform_feedback
+spec@intel_conservative_rasterization
+spec@intel_shader_integer_functions2
+spec@nv_alpha_to_coverage_dither_control
+spec@nv_compute_shader_derivatives
+spec@nv_shader_atomic_int64
diff --git a/src/gallium/drivers/lima/drm-shim/meson.build b/src/gallium/drivers/lima/drm-shim/meson.build
index a978d3505ba..db1806064bb 100644
--- a/src/gallium/drivers/lima/drm-shim/meson.build
+++ b/src/gallium/drivers/lima/drm-shim/meson.build
@@ -20,7 +20,7 @@
# IN THE SOFTWARE.
liblima_noop_drm_shim = shared_library(
- ['lima_noop_drm_shim'],
+ 'lima_noop_drm_shim',
'lima_noop.c',
include_directories: [inc_include, inc_src],
dependencies: dep_drm_shim,
diff --git a/src/gallium/drivers/lima/ir/gp/codegen.c b/src/gallium/drivers/lima/ir/gp/codegen.c
index d9a46f86a90..aa0a0496b06 100644
--- a/src/gallium/drivers/lima/ir/gp/codegen.c
+++ b/src/gallium/drivers/lima/ir/gp/codegen.c
@@ -608,7 +608,7 @@ bool gpir_codegen_prog(gpir_compiler *comp)
if (lima_debug & LIMA_DEBUG_GP) {
gpir_codegen_print_prog(comp);
- gpir_disassemble_program(code, num_instr);
+ gpir_disassemble_program(code, num_instr, stdout);
}
return true;
diff --git a/src/gallium/drivers/lima/ir/gp/codegen.h b/src/gallium/drivers/lima/ir/gp/codegen.h
index d24b31b41f7..f6bf4eb1923 100644
--- a/src/gallium/drivers/lima/ir/gp/codegen.h
+++ b/src/gallium/drivers/lima/ir/gp/codegen.h
@@ -161,6 +161,6 @@ typedef struct __attribute__((__packed__)) {
unsigned branch_target : 8;
} gpir_codegen_instr;
-void gpir_disassemble_program(gpir_codegen_instr *code, unsigned num_instr);
+void gpir_disassemble_program(gpir_codegen_instr *code, unsigned num_instr, FILE *fp);
#endif
diff --git a/src/gallium/drivers/lima/ir/gp/disasm.c b/src/gallium/drivers/lima/ir/gp/disasm.c
index bc0ce3bec4d..eb15fdb5e1a 100644
--- a/src/gallium/drivers/lima/ir/gp/disasm.c
+++ b/src/gallium/drivers/lima/ir/gp/disasm.c
@@ -47,9 +47,9 @@ static const gpir_codegen_store_src gp_unit_to_store_src[num_units] = {
};
static void
-print_dest(gpir_codegen_instr *instr, gp_unit unit, unsigned cur_dest_index)
+print_dest(gpir_codegen_instr *instr, gp_unit unit, unsigned cur_dest_index, FILE *fp)
{
- printf("^%u", cur_dest_index + unit);
+ fprintf(fp, "^%u", cur_dest_index + unit);
gpir_codegen_store_src src = gp_unit_to_store_src[unit];
@@ -59,54 +59,54 @@ print_dest(gpir_codegen_instr *instr, gp_unit unit, unsigned cur_dest_index)
/* Temporary stores ignore the address, and always use whatever's
* stored in address register 0.
*/
- printf("/t[addr0]");
+ fprintf(fp, "/t[addr0]");
} else {
if (instr->store0_varying)
- printf("/v");
+ fprintf(fp, "/v");
else
- printf("/$");
- printf("%u", instr->store0_addr);
+ fprintf(fp, "/$");
+ fprintf(fp, "%u", instr->store0_addr);
}
- printf(".");
+ fprintf(fp, ".");
if (instr->store0_src_x == src)
- printf("x");
+ fprintf(fp, "x");
if (instr->store0_src_y == src)
- printf("y");
+ fprintf(fp, "y");
}
if (instr->store1_src_z == src ||
instr->store1_src_w == src) {
if (instr->store1_temporary) {
- printf("/t[addr0]");
+ fprintf(fp, "/t[addr0]");
} else {
if (instr->store1_varying)
- printf("/v");
+ fprintf(fp, "/v");
else
- printf("/$");
- printf("%u", instr->store1_addr);
+ fprintf(fp, "/$");
+ fprintf(fp, "%u", instr->store1_addr);
}
- printf(".");
+ fprintf(fp, ".");
if (instr->store1_src_z == src)
- printf("z");
+ fprintf(fp, "z");
if (instr->store1_src_w == src)
- printf("w");
+ fprintf(fp, "w");
}
if (unit == unit_complex) {
switch (instr->complex_op) {
case gpir_codegen_complex_op_temp_store_addr:
- printf("/addr0");
+ fprintf(fp, "/addr0");
break;
case gpir_codegen_complex_op_temp_load_addr_0:
- printf("/addr1");
+ fprintf(fp, "/addr1");
break;
case gpir_codegen_complex_op_temp_load_addr_1:
- printf("/addr2");
+ fprintf(fp, "/addr2");
break;
case gpir_codegen_complex_op_temp_load_addr_2:
- printf("/addr3");
+ fprintf(fp, "/addr3");
break;
default:
break;
@@ -117,14 +117,14 @@ print_dest(gpir_codegen_instr *instr, gp_unit unit, unsigned cur_dest_index)
static void
print_src(gpir_codegen_src src, gp_unit unit, unsigned unit_src_num,
gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr,
- unsigned cur_dest_index)
+ unsigned cur_dest_index, FILE *fp)
{
switch (src) {
case gpir_codegen_src_attrib_x:
case gpir_codegen_src_attrib_y:
case gpir_codegen_src_attrib_z:
case gpir_codegen_src_attrib_w:
- printf("%c%d.%c", instr->register0_attribute ? 'a' : '$',
+ fprintf(fp, "%c%d.%c", instr->register0_attribute ? 'a' : '$',
instr->register0_addr, "xyzw"[src - gpir_codegen_src_attrib_x]);
break;
@@ -132,7 +132,7 @@ print_src(gpir_codegen_src src, gp_unit unit, unsigned unit_src_num,
case gpir_codegen_src_register_y:
case gpir_codegen_src_register_z:
case gpir_codegen_src_register_w:
- printf("$%d.%c", instr->register1_addr,
+ fprintf(fp, "$%d.%c", instr->register1_addr,
"xyzw"[src - gpir_codegen_src_register_x]);
break;
@@ -140,54 +140,54 @@ print_src(gpir_codegen_src src, gp_unit unit, unsigned unit_src_num,
case gpir_codegen_src_unknown_1:
case gpir_codegen_src_unknown_2:
case gpir_codegen_src_unknown_3:
- printf("unknown%d", src - gpir_codegen_src_unknown_0);
+ fprintf(fp, "unknown%d", src - gpir_codegen_src_unknown_0);
break;
case gpir_codegen_src_load_x:
case gpir_codegen_src_load_y:
case gpir_codegen_src_load_z:
case gpir_codegen_src_load_w:
- printf("t[%d", instr->load_addr);
+ fprintf(fp, "t[%d", instr->load_addr);
switch (instr->load_offset) {
case gpir_codegen_load_off_ld_addr_0:
- printf("+addr1");
+ fprintf(fp, "+addr1");
break;
case gpir_codegen_load_off_ld_addr_1:
- printf("+addr2");
+ fprintf(fp, "+addr2");
break;
case gpir_codegen_load_off_ld_addr_2:
- printf("+addr3");
+ fprintf(fp, "+addr3");
break;
case gpir_codegen_load_off_none:
break;
default:
- printf("+unk%d", instr->load_offset);
+ fprintf(fp, "+unk%d", instr->load_offset);
}
- printf("].%c", "xyzw"[src - gpir_codegen_src_load_x]);
+ fprintf(fp, "].%c", "xyzw"[src - gpir_codegen_src_load_x]);
break;
case gpir_codegen_src_p1_acc_0:
- printf("^%d", cur_dest_index - 1 * num_units + unit_acc_0);
+ fprintf(fp, "^%d", cur_dest_index - 1 * num_units + unit_acc_0);
break;
case gpir_codegen_src_p1_acc_1:
- printf("^%d", cur_dest_index - 1 * num_units + unit_acc_1);
+ fprintf(fp, "^%d", cur_dest_index - 1 * num_units + unit_acc_1);
break;
case gpir_codegen_src_p1_mul_0:
- printf("^%d", cur_dest_index - 1 * num_units + unit_mul_0);
+ fprintf(fp, "^%d", cur_dest_index - 1 * num_units + unit_mul_0);
break;
case gpir_codegen_src_p1_mul_1:
- printf("^%d", cur_dest_index - 1 * num_units + unit_mul_1);
+ fprintf(fp, "^%d", cur_dest_index - 1 * num_units + unit_mul_1);
break;
case gpir_codegen_src_p1_pass:
- printf("^%d", cur_dest_index - 1 * num_units + unit_pass);
+ fprintf(fp, "^%d", cur_dest_index - 1 * num_units + unit_pass);
break;
case gpir_codegen_src_unused:
- printf("unused");
+ fprintf(fp, "unused");
break;
case gpir_codegen_src_p1_complex: /* Also ident */
@@ -195,48 +195,48 @@ print_src(gpir_codegen_src src, gp_unit unit, unsigned unit_src_num,
case unit_acc_0:
case unit_acc_1:
if (unit_src_num == 1) {
- printf("0");
+ fprintf(fp, "0");
return;
}
break;
case unit_mul_0:
case unit_mul_1:
if (unit_src_num == 1) {
- printf("1");
+ fprintf(fp, "1");
return;
}
break;
default:
break;
}
- printf("^%d", cur_dest_index - 1 * num_units + unit_complex);
+ fprintf(fp, "^%d", cur_dest_index - 1 * num_units + unit_complex);
break;
case gpir_codegen_src_p2_pass:
- printf("^%d", cur_dest_index - 2 * num_units + unit_pass);
+ fprintf(fp, "^%d", cur_dest_index - 2 * num_units + unit_pass);
break;
case gpir_codegen_src_p2_acc_0:
- printf("^%d", cur_dest_index - 2 * num_units + unit_acc_0);
+ fprintf(fp, "^%d", cur_dest_index - 2 * num_units + unit_acc_0);
break;
case gpir_codegen_src_p2_acc_1:
- printf("^%d", cur_dest_index - 2 * num_units + unit_acc_1);
+ fprintf(fp, "^%d", cur_dest_index - 2 * num_units + unit_acc_1);
break;
case gpir_codegen_src_p2_mul_0:
- printf("^%d", cur_dest_index - 2 * num_units + unit_mul_0);
+ fprintf(fp, "^%d", cur_dest_index - 2 * num_units + unit_mul_0);
break;
case gpir_codegen_src_p2_mul_1:
- printf("^%d", cur_dest_index - 2 * num_units + unit_mul_1);
+ fprintf(fp, "^%d", cur_dest_index - 2 * num_units + unit_mul_1);
break;
case gpir_codegen_src_p1_attrib_x:
case gpir_codegen_src_p1_attrib_y:
case gpir_codegen_src_p1_attrib_z:
case gpir_codegen_src_p1_attrib_w:
- printf("%c%d.%c", prev_instr->register0_attribute ? 'a' : '$',
+ fprintf(fp, "%c%d.%c", prev_instr->register0_attribute ? 'a' : '$',
prev_instr->register0_addr,
"xyzw"[src - gpir_codegen_src_p1_attrib_x]);
break;
@@ -245,7 +245,7 @@ print_src(gpir_codegen_src src, gp_unit unit, unsigned unit_src_num,
static bool
print_mul(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr,
- unsigned cur_dest_index)
+ unsigned cur_dest_index, FILE *fp)
{
bool printed = false;
@@ -255,113 +255,113 @@ print_mul(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr,
if (instr->mul0_src0 != gpir_codegen_src_unused &&
instr->mul0_src1 != gpir_codegen_src_unused) {
printed = true;
- printf("\t");
+ fprintf(fp, "\t");
if (instr->mul0_src1 == gpir_codegen_src_ident &&
!instr->mul0_neg) {
- printf("mov.m0 ");
- print_dest(instr, unit_mul_0, cur_dest_index);
- printf(" ");
+ fprintf(fp, "mov.m0 ");
+ print_dest(instr, unit_mul_0, cur_dest_index, fp);
+ fprintf(fp, " ");
print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr,
- cur_dest_index);
+ cur_dest_index, fp);
} else {
if (instr->mul_op == gpir_codegen_mul_op_complex2)
- printf("complex2.m0 ");
+ fprintf(fp, "complex2.m0 ");
else
- printf("mul.m0 ");
+ fprintf(fp, "mul.m0 ");
- print_dest(instr, unit_mul_0, cur_dest_index);
- printf(" ");
+ print_dest(instr, unit_mul_0, cur_dest_index, fp);
+ fprintf(fp, " ");
print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr,
- cur_dest_index);
- printf(" ");
+ cur_dest_index, fp);
+ fprintf(fp, " ");
if (instr->mul0_neg)
- printf("-");
+ fprintf(fp, "-");
print_src(instr->mul0_src1, unit_mul_0, 1, instr, prev_instr,
- cur_dest_index);
+ cur_dest_index, fp);
}
- printf("\n");
+ fprintf(fp, "\n");
}
if (instr->mul1_src0 != gpir_codegen_src_unused &&
instr->mul1_src1 != gpir_codegen_src_unused) {
printed = true;
- printf("\t");
+ fprintf(fp, "\t");
if (instr->mul1_src1 == gpir_codegen_src_ident &&
!instr->mul1_neg) {
- printf("mov.m1 ");
- print_dest(instr, unit_mul_1, cur_dest_index);
- printf(" ");
+ fprintf(fp, "mov.m1 ");
+ print_dest(instr, unit_mul_1, cur_dest_index, fp);
+ fprintf(fp, " ");
print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr,
- cur_dest_index);
+ cur_dest_index, fp);
} else {
- printf("mul.m1 ");
- print_dest(instr, unit_mul_1, cur_dest_index);
- printf(" ");
+ fprintf(fp, "mul.m1 ");
+ print_dest(instr, unit_mul_1, cur_dest_index, fp);
+ fprintf(fp, " ");
print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr,
- cur_dest_index);
- printf(" ");
+ cur_dest_index, fp);
+ fprintf(fp, " ");
if (instr->mul1_neg)
- printf("-");
+ fprintf(fp, "-");
print_src(instr->mul1_src1, unit_mul_0, 1, instr, prev_instr,
- cur_dest_index);
+ cur_dest_index, fp);
}
- printf("\n");
+ fprintf(fp, "\n");
}
break;
case gpir_codegen_mul_op_complex1:
printed = true;
- printf("\tcomplex1.m01 ");
- print_dest(instr, unit_mul_0, cur_dest_index);
- printf(" ");
+ fprintf(fp, "\tcomplex1.m01 ");
+ print_dest(instr, unit_mul_0, cur_dest_index, fp);
+ fprintf(fp, " ");
print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr,
- cur_dest_index);
- printf(" ");
+ cur_dest_index, fp);
+ fprintf(fp, " ");
print_src(instr->mul0_src1, unit_mul_0, 1, instr, prev_instr,
- cur_dest_index);
- printf(" ");
+ cur_dest_index, fp);
+ fprintf(fp, " ");
print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr,
- cur_dest_index);
- printf(" ");
+ cur_dest_index, fp);
+ fprintf(fp, " ");
print_src(instr->mul1_src1, unit_mul_1, 1, instr, prev_instr,
- cur_dest_index);
- printf("\n");
+ cur_dest_index, fp);
+ fprintf(fp, "\n");
break;
case gpir_codegen_mul_op_select:
printed = true;
- printf("\tsel.m01 ");
- print_dest(instr, unit_mul_0, cur_dest_index);
- printf(" ");
+ fprintf(fp, "\tsel.m01 ");
+ print_dest(instr, unit_mul_0, cur_dest_index, fp);
+ fprintf(fp, " ");
print_src(instr->mul0_src1, unit_mul_0, 1, instr, prev_instr,
- cur_dest_index);
- printf(" ");
+ cur_dest_index, fp);
+ fprintf(fp, " ");
print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr,
- cur_dest_index);
- printf(" ");
+ cur_dest_index, fp);
+ fprintf(fp, " ");
print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr,
- cur_dest_index);
- printf("\n");
+ cur_dest_index, fp);
+ fprintf(fp, "\n");
break;
default:
printed = true;
- printf("\tunknown%u.m01 ", instr->mul_op);
- print_dest(instr, unit_mul_0, cur_dest_index);
- printf(" ");
+ fprintf(fp, "\tunknown%u.m01 ", instr->mul_op);
+ print_dest(instr, unit_mul_0, cur_dest_index, fp);
+ fprintf(fp, " ");
print_src(instr->mul0_src0, unit_mul_0, 0, instr, prev_instr,
- cur_dest_index);
- printf(" ");
+ cur_dest_index, fp);
+ fprintf(fp, " ");
print_src(instr->mul0_src1, unit_mul_0, 1, instr, prev_instr,
- cur_dest_index);
- printf(" ");
+ cur_dest_index, fp);
+ fprintf(fp, " ");
print_src(instr->mul1_src0, unit_mul_1, 0, instr, prev_instr,
- cur_dest_index);
- printf(" ");
+ cur_dest_index, fp);
+ fprintf(fp, " ");
print_src(instr->mul1_src1, unit_mul_1, 1, instr, prev_instr,
- cur_dest_index);
- printf("\n");
+ cur_dest_index, fp);
+ fprintf(fp, "\n");
break;
}
@@ -393,14 +393,14 @@ static const acc_op_info acc_op_infos[8] = {
static bool
print_acc(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr,
- unsigned cur_dest_index)
+ unsigned cur_dest_index, FILE *fp)
{
bool printed = false;
const acc_op_info op = acc_op_infos[instr->acc_op];
if (instr->acc0_src0 != gpir_codegen_src_unused) {
printed = true;
- printf("\t");
+ fprintf(fp, "\t");
acc_op_info acc0_op = op;
if (instr->acc0_src1 == gpir_codegen_src_ident &&
instr->acc0_src1_neg) {
@@ -410,30 +410,30 @@ print_acc(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr,
}
if (acc0_op.name)
- printf("%s.a0 ", acc0_op.name);
+ fprintf(fp, "%s.a0 ", acc0_op.name);
else
- printf("op%u.a0 ", instr->acc_op);
+ fprintf(fp, "op%u.a0 ", instr->acc_op);
- print_dest(instr, unit_acc_0, cur_dest_index);
- printf(" ");
+ print_dest(instr, unit_acc_0, cur_dest_index, fp);
+ fprintf(fp, " ");
if (instr->acc0_src0_neg)
- printf("-");
+ fprintf(fp, "-");
print_src(instr->acc0_src0, unit_acc_0, 0, instr, prev_instr,
- cur_dest_index);
+ cur_dest_index, fp);
if (acc0_op.srcs > 1) {
- printf(" ");
+ fprintf(fp, " ");
if (instr->acc0_src1_neg)
- printf("-");
+ fprintf(fp, "-");
print_src(instr->acc0_src1, unit_acc_0, 1, instr, prev_instr,
- cur_dest_index);
+ cur_dest_index, fp);
}
- printf("\n");
+ fprintf(fp, "\n");
}
if (instr->acc1_src0 != gpir_codegen_src_unused) {
printed = true;
- printf("\t");
+ fprintf(fp, "\t");
acc_op_info acc1_op = op;
if (instr->acc1_src1 == gpir_codegen_src_ident &&
instr->acc1_src1_neg) {
@@ -443,25 +443,25 @@ print_acc(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr,
}
if (acc1_op.name)
- printf("%s.a1 ", acc1_op.name);
+ fprintf(fp, "%s.a1 ", acc1_op.name);
else
- printf("op%u.a1 ", instr->acc_op);
+ fprintf(fp, "op%u.a1 ", instr->acc_op);
- print_dest(instr, unit_acc_1, cur_dest_index);
- printf(" ");
+ print_dest(instr, unit_acc_1, cur_dest_index, fp);
+ fprintf(fp, " ");
if (instr->acc1_src0_neg)
- printf("-");
+ fprintf(fp, "-");
print_src(instr->acc1_src0, unit_acc_1, 0, instr, prev_instr,
- cur_dest_index);
+ cur_dest_index, fp);
if (acc1_op.srcs > 1) {
- printf(" ");
+ fprintf(fp, " ");
if (instr->acc1_src1_neg)
- printf("-");
+ fprintf(fp, "-");
print_src(instr->acc1_src1, unit_acc_1, 1, instr, prev_instr,
- cur_dest_index);
+ cur_dest_index, fp);
}
- printf("\n");
+ fprintf(fp, "\n");
}
return printed;
@@ -469,131 +469,129 @@ print_acc(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr,
static bool
print_pass(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr,
- unsigned cur_dest_index)
+ unsigned cur_dest_index, FILE *fp)
{
if (instr->pass_src == gpir_codegen_src_unused)
return false;
- printf("\t");
+ fprintf(fp, "\t");
switch (instr->pass_op) {
case gpir_codegen_pass_op_pass:
- printf("mov.p ");
+ fprintf(fp, "mov.p ");
break;
case gpir_codegen_pass_op_preexp2:
- printf("preexp2.p ");
+ fprintf(fp, "preexp2.p ");
break;
case gpir_codegen_pass_op_postlog2:
- printf("postlog2.p ");
+ fprintf(fp, "postlog2.p ");
break;
case gpir_codegen_pass_op_clamp:
- printf("clamp.p ");
+ fprintf(fp, "clamp.p ");
break;
default:
- printf("unk%u.p ", instr->pass_op);
+ fprintf(fp, "unk%u.p ", instr->pass_op);
}
- print_dest(instr, unit_pass, cur_dest_index);
- printf(" ");
+ print_dest(instr, unit_pass, cur_dest_index, fp);
+ fprintf(fp, " ");
print_src(instr->pass_src, unit_pass, 0, instr, prev_instr,
- cur_dest_index);
+ cur_dest_index, fp);
if (instr->pass_op == gpir_codegen_pass_op_clamp) {
- printf(" ");
+ fprintf(fp, " ");
print_src(gpir_codegen_src_load_x, unit_pass, 1, instr, prev_instr,
- cur_dest_index);
- printf(" ");
+ cur_dest_index, fp);
+ fprintf(fp, " ");
print_src(gpir_codegen_src_load_y, unit_pass, 2, instr, prev_instr,
- cur_dest_index);
+ cur_dest_index, fp);
}
- printf("\n");
+ fprintf(fp, "\n");
return true;
}
static bool
print_complex(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr,
- unsigned cur_dest_index)
+ unsigned cur_dest_index, FILE *fp)
{
if (instr->complex_src == gpir_codegen_src_unused)
return false;
- printf("\t");
+ fprintf(fp, "\t");
switch (instr->complex_op) {
case gpir_codegen_complex_op_nop:
return false;
case gpir_codegen_complex_op_exp2:
- printf("exp2.c ");
+ fprintf(fp, "exp2.c ");
break;
case gpir_codegen_complex_op_log2:
- printf("log2.c ");
+ fprintf(fp, "log2.c ");
break;
case gpir_codegen_complex_op_rsqrt:
- printf("rsqrt.c ");
+ fprintf(fp, "rsqrt.c ");
break;
case gpir_codegen_complex_op_rcp:
- printf("rcp.c ");
+ fprintf(fp, "rcp.c ");
break;
case gpir_codegen_complex_op_pass:
case gpir_codegen_complex_op_temp_store_addr:
case gpir_codegen_complex_op_temp_load_addr_0:
case gpir_codegen_complex_op_temp_load_addr_1:
case gpir_codegen_complex_op_temp_load_addr_2:
- printf("mov.c ");
+ fprintf(fp, "mov.c ");
break;
default:
- printf("unk%u.c ", instr->complex_op);
+ fprintf(fp, "unk%u.c ", instr->complex_op);
}
- print_dest(instr, unit_complex, cur_dest_index);
- printf(" ");
+ print_dest(instr, unit_complex, cur_dest_index, fp);
+ fprintf(fp, " ");
print_src(instr->complex_src, unit_complex, 0, instr, prev_instr,
- cur_dest_index);
- printf("\n");
+ cur_dest_index, fp);
+ fprintf(fp, "\n");
return true;
}
static void
print_instr(gpir_codegen_instr *instr, gpir_codegen_instr *prev_instr,
- unsigned instr_number, unsigned cur_dest_index)
+ unsigned instr_number, unsigned cur_dest_index, FILE *fp)
{
bool printed = false;
- printf("%03d:", instr_number);
- printed |= print_acc(instr, prev_instr, cur_dest_index);
- printed |= print_mul(instr, prev_instr, cur_dest_index);
- printed |= print_complex(instr, prev_instr, cur_dest_index);
- printed |= print_pass(instr, prev_instr, cur_dest_index);
+ fprintf(fp, "%03d:", instr_number);
+ printed |= print_acc(instr, prev_instr, cur_dest_index, fp);
+ printed |= print_mul(instr, prev_instr, cur_dest_index, fp);
+ printed |= print_complex(instr, prev_instr, cur_dest_index, fp);
+ printed |= print_pass(instr, prev_instr, cur_dest_index, fp);
if (instr->branch) {
printed = true;
/* The branch condition is taken from the current pass unit result */
- printf("\tbranch ^%d %03d\n", cur_dest_index + unit_pass,
+ fprintf(fp, "\tbranch ^%d %03d\n", cur_dest_index + unit_pass,
instr->branch_target + (instr->branch_target_lo ? 0 : 0x100));
}
if (instr->unknown_1 != 0) {
printed = true;
- printf("\tunknown_1 %u\n", instr->unknown_1);
+ fprintf(fp, "\tunknown_1 %u\n", instr->unknown_1);
}
if (!printed)
- printf("\tnop\n");
+ fprintf(fp, "\tnop\n");
}
void
-gpir_disassemble_program(gpir_codegen_instr *code, unsigned num_instr)
+gpir_disassemble_program(gpir_codegen_instr *code, unsigned num_instr, FILE *fp)
{
- printf("=======disassembly:=======\n");
-
unsigned cur_dest_index = 0;
unsigned cur_instr = 0;
for (gpir_codegen_instr *instr = code; cur_instr < num_instr;
instr++, cur_instr++, cur_dest_index += num_units) {
- print_instr(instr, instr - 1, cur_instr, cur_dest_index);
+ print_instr(instr, instr - 1, cur_instr, cur_dest_index, fp);
}
}
diff --git a/src/gallium/drivers/lima/ir/gp/gpir.h b/src/gallium/drivers/lima/ir/gp/gpir.h
index 63f74caa0ed..7065633aafe 100644
--- a/src/gallium/drivers/lima/ir/gp/gpir.h
+++ b/src/gallium/drivers/lima/ir/gp/gpir.h
@@ -32,6 +32,7 @@
/* list of operations that a node can do. */
typedef enum {
+ gpir_op_unsupported = 0,
gpir_op_mov,
/* mul ops */
@@ -397,15 +398,9 @@ typedef struct gpir_compiler {
/* Find the gpir node for a given NIR SSA def. */
gpir_node **node_for_ssa;
- /* Find the gpir node for a given NIR register. */
- gpir_node **node_for_reg;
-
/* Find the gpir register for a given NIR SSA def. */
gpir_reg **reg_for_ssa;
- /* Find the gpir register for a given NIR register. */
- gpir_reg **reg_for_reg;
-
/* gpir block for NIR block. */
gpir_block **blocks;
diff --git a/src/gallium/drivers/lima/ir/gp/nir.c b/src/gallium/drivers/lima/ir/gp/nir.c
index 4b1479a68fc..4b02e60a8fc 100644
--- a/src/gallium/drivers/lima/ir/gp/nir.c
+++ b/src/gallium/drivers/lima/ir/gp/nir.c
@@ -38,18 +38,12 @@ gpir_reg *gpir_create_reg(gpir_compiler *comp)
return reg;
}
-static gpir_reg *reg_for_nir_reg(gpir_compiler *comp, nir_register *nir_reg)
-{
- unsigned index = nir_reg->index;
- gpir_reg *reg = comp->reg_for_reg[index];
- if (reg)
- return reg;
- reg = gpir_create_reg(comp);
- comp->reg_for_reg[index] = reg;
- return reg;
-}
-
-static void register_node_ssa(gpir_block *block, gpir_node *node, nir_ssa_def *ssa)
+/* Register the given gpir_node as providing the given NIR destination, so
+ * that gpir_node_find() will return it. Also insert any stores necessary if
+ * the destination will be used after the end of this basic block. The node
+ * must already be inserted.
+ */
+static void register_node_ssa(gpir_block *block, gpir_node *node, nir_def *ssa)
{
block->comp->node_for_ssa[ssa->index] = node;
snprintf(node->name, sizeof(node->name), "ssa%d", ssa->index);
@@ -59,7 +53,7 @@ static void register_node_ssa(gpir_block *block, gpir_node *node, nir_ssa_def *s
*/
bool needs_register = false;
nir_foreach_use(use, ssa) {
- if (use->parent_instr->block != ssa->parent_instr->block) {
+ if (nir_src_parent_instr(use)->block != ssa->parent_instr->block) {
needs_register = true;
break;
}
@@ -67,7 +61,7 @@ static void register_node_ssa(gpir_block *block, gpir_node *node, nir_ssa_def *s
if (!needs_register) {
nir_foreach_if_use(use, ssa) {
- if (nir_cf_node_prev(&use->parent_if->cf_node) !=
+ if (nir_cf_node_prev(&nir_src_parent_if(use)->cf_node) !=
&ssa->parent_instr->block->cf_node) {
needs_register = true;
break;
@@ -85,56 +79,36 @@ static void register_node_ssa(gpir_block *block, gpir_node *node, nir_ssa_def *s
}
}
-static void register_node_reg(gpir_block *block, gpir_node *node, nir_reg_dest *nir_reg)
+static void register_node_reg(gpir_block *block, gpir_node *node, int index)
{
- block->comp->node_for_reg[nir_reg->reg->index] = node;
+ block->comp->node_for_ssa[index] = node;
gpir_store_node *store = gpir_node_create(block, gpir_op_store_reg);
- snprintf(node->name, sizeof(node->name), "reg%d", nir_reg->reg->index);
+ snprintf(store->node.name, sizeof(node->name), "reg%d", index);
store->child = node;
- store->reg = reg_for_nir_reg(block->comp, nir_reg->reg);
+ store->reg = block->comp->reg_for_ssa[index];
gpir_node_add_dep(&store->node, node, GPIR_DEP_INPUT);
list_addtail(&store->node.list, &block->node_list);
}
-/* Register the given gpir_node as providing the given NIR destination, so
- * that gpir_node_find() will return it. Also insert any stores necessary if
- * the destination will be used after the end of this basic block. The node
- * must already be inserted.
- */
-static void register_node(gpir_block *block, gpir_node *node, nir_dest *dest)
-{
- if (dest->is_ssa)
- register_node_ssa(block, node, &dest->ssa);
- else
- register_node_reg(block, node, &dest->reg);
-}
-
static gpir_node *gpir_node_find(gpir_block *block, nir_src *src,
int channel)
{
gpir_reg *reg = NULL;
gpir_node *pred = NULL;
- if (src->is_ssa) {
- if (src->ssa->num_components > 1) {
- for (int i = 0; i < GPIR_VECTOR_SSA_NUM; i++) {
- if (block->comp->vector_ssa[i].ssa == src->ssa->index) {
- return block->comp->vector_ssa[i].nodes[channel];
- }
+ if (src->ssa->num_components > 1) {
+ for (int i = 0; i < GPIR_VECTOR_SSA_NUM; i++) {
+ if (block->comp->vector_ssa[i].ssa == src->ssa->index) {
+ return block->comp->vector_ssa[i].nodes[channel];
}
- } else {
- gpir_node *pred = block->comp->node_for_ssa[src->ssa->index];
- if (pred->block == block)
- return pred;
- reg = block->comp->reg_for_ssa[src->ssa->index];
}
} else {
- pred = block->comp->node_for_reg[src->reg.reg->index];
+ gpir_node *pred = block->comp->node_for_ssa[src->ssa->index];
if (pred && pred->block == block)
return pred;
- reg = reg_for_nir_reg(block->comp, src->reg.reg);
+ reg = block->comp->reg_for_ssa[src->ssa->index];
}
assert(reg);
@@ -147,9 +121,6 @@ static gpir_node *gpir_node_find(gpir_block *block, nir_src *src,
}
static int nir_to_gpir_opcodes[nir_num_opcodes] = {
- /* not supported */
- [0 ... nir_last_opcode] = -1,
-
[nir_op_fmul] = gpir_op_mul,
[nir_op_fadd] = gpir_op_add,
[nir_op_fneg] = gpir_op_neg,
@@ -182,13 +153,13 @@ static bool gpir_emit_alu(gpir_block *block, nir_instr *ni)
if (instr->op == nir_op_mov) {
gpir_node *child = gpir_node_find(block, &instr->src[0].src,
instr->src[0].swizzle[0]);
- register_node(block, child, &instr->dest.dest);
+ register_node_ssa(block, child, &instr->def);
return true;
}
int op = nir_to_gpir_opcodes[instr->op];
- if (op < 0) {
+ if (op == gpir_op_unsupported) {
gpir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
return false;
}
@@ -203,7 +174,6 @@ static bool gpir_emit_alu(gpir_block *block, nir_instr *ni)
for (int i = 0; i < num_child; i++) {
nir_alu_src *src = instr->src + i;
- node->children_negate[i] = src->negate;
gpir_node *child = gpir_node_find(block, &src->src, src->swizzle[0]);
node->children[i] = child;
@@ -212,12 +182,12 @@ static bool gpir_emit_alu(gpir_block *block, nir_instr *ni)
}
list_addtail(&node->node.list, &block->node_list);
- register_node(block, &node->node, &instr->dest.dest);
+ register_node_ssa(block, &node->node, &instr->def);
return true;
}
-static gpir_node *gpir_create_load(gpir_block *block, nir_dest *dest,
+static gpir_node *gpir_create_load(gpir_block *block, nir_def *def,
int op, int index, int component)
{
gpir_load_node *load = gpir_node_create(block, op);
@@ -227,25 +197,24 @@ static gpir_node *gpir_create_load(gpir_block *block, nir_dest *dest,
load->index = index;
load->component = component;
list_addtail(&load->node.list, &block->node_list);
- register_node(block, &load->node, dest);
+ register_node_ssa(block, &load->node, def);
return &load->node;
}
-static bool gpir_create_vector_load(gpir_block *block, nir_dest *dest, int index)
+static bool gpir_create_vector_load(gpir_block *block, nir_def *def, int index)
{
- assert(dest->is_ssa);
assert(index < GPIR_VECTOR_SSA_NUM);
- block->comp->vector_ssa[index].ssa = dest->ssa.index;
+ block->comp->vector_ssa[index].ssa = def->index;
- for (int i = 0; i < dest->ssa.num_components; i++) {
- gpir_node *node = gpir_create_load(block, dest, gpir_op_load_uniform,
+ for (int i = 0; i < def->num_components; i++) {
+ gpir_node *node = gpir_create_load(block, def, gpir_op_load_uniform,
block->comp->constant_base + index, i);
if (!node)
return false;
block->comp->vector_ssa[index].nodes[i] = node;
- snprintf(node->name, sizeof(node->name), "ssa%d.%c", dest->ssa.index, "xyzw"[i]);
+ snprintf(node->name, sizeof(node->name), "ssa%d.%c", def->index, "xyzw"[i]);
}
return true;
@@ -256,24 +225,49 @@ static bool gpir_emit_intrinsic(gpir_block *block, nir_instr *ni)
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
switch (instr->intrinsic) {
+ case nir_intrinsic_decl_reg:
+ {
+ gpir_reg *reg = gpir_create_reg(block->comp);
+ block->comp->reg_for_ssa[instr->def.index] = reg;
+ return true;
+ }
+ case nir_intrinsic_load_reg:
+ {
+ gpir_node *node = gpir_node_find(block, &instr->src[0], 0);
+ assert(node);
+ block->comp->node_for_ssa[instr->def.index] = node;
+ return true;
+ }
+ case nir_intrinsic_store_reg:
+ {
+ gpir_node *child = gpir_node_find(block, &instr->src[0], 0);
+ assert(child);
+ register_node_reg(block, child, instr->src[1].ssa->index);
+ return true;
+ }
case nir_intrinsic_load_input:
- return gpir_create_load(block, &instr->dest,
+ return gpir_create_load(block, &instr->def,
gpir_op_load_attribute,
nir_intrinsic_base(instr),
nir_intrinsic_component(instr)) != NULL;
case nir_intrinsic_load_uniform:
{
int offset = nir_intrinsic_base(instr);
+
+ if (!nir_src_is_const(instr->src[0])) {
+ gpir_error("indirect indexing for uniforms is not implemented\n");
+ return false;
+ }
offset += (int)nir_src_as_float(instr->src[0]);
- return gpir_create_load(block, &instr->dest,
+ return gpir_create_load(block, &instr->def,
gpir_op_load_uniform,
offset / 4, offset % 4) != NULL;
}
case nir_intrinsic_load_viewport_scale:
- return gpir_create_vector_load(block, &instr->dest, GPIR_VECTOR_SSA_VIEWPORT_SCALE);
+ return gpir_create_vector_load(block, &instr->def, GPIR_VECTOR_SSA_VIEWPORT_SCALE);
case nir_intrinsic_load_viewport_offset:
- return gpir_create_vector_load(block, &instr->dest, GPIR_VECTOR_SSA_VIEWPORT_OFFSET);
+ return gpir_create_vector_load(block, &instr->def, GPIR_VECTOR_SSA_VIEWPORT_OFFSET);
case nir_intrinsic_store_output:
{
gpir_store_node *store = gpir_node_create(block, gpir_op_store_varying);
@@ -315,7 +309,7 @@ static bool gpir_emit_load_const(gpir_block *block, nir_instr *ni)
static bool gpir_emit_ssa_undef(gpir_block *block, nir_instr *ni)
{
- gpir_error("nir_ssa_undef_instr is not supported\n");
+ gpir_error("nir_undef_instr is not supported\n");
return false;
}
@@ -335,7 +329,7 @@ static bool (*gpir_emit_instr[nir_instr_type_phi])(gpir_block *, nir_instr *) =
[nir_instr_type_alu] = gpir_emit_alu,
[nir_instr_type_intrinsic] = gpir_emit_intrinsic,
[nir_instr_type_load_const] = gpir_emit_load_const,
- [nir_instr_type_ssa_undef] = gpir_emit_ssa_undef,
+ [nir_instr_type_undef] = gpir_emit_ssa_undef,
[nir_instr_type_tex] = gpir_emit_tex,
[nir_instr_type_jump] = gpir_emit_jump,
};
@@ -401,7 +395,7 @@ static bool gpir_emit_function(gpir_compiler *comp, nir_function_impl *impl)
return true;
}
-static gpir_compiler *gpir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
+static gpir_compiler *gpir_compiler_create(void *prog, unsigned num_ssa)
{
gpir_compiler *comp = rzalloc(prog, gpir_compiler);
@@ -412,9 +406,7 @@ static gpir_compiler *gpir_compiler_create(void *prog, unsigned num_reg, unsigne
comp->vector_ssa[i].ssa = -1;
comp->node_for_ssa = rzalloc_array(comp, gpir_node *, num_ssa);
- comp->node_for_reg = rzalloc_array(comp, gpir_node *, num_reg);
comp->reg_for_ssa = rzalloc_array(comp, gpir_reg *, num_ssa);
- comp->reg_for_reg = rzalloc_array(comp, gpir_reg *, num_reg);
comp->prog = prog;
return comp;
}
@@ -427,7 +419,7 @@ static int gpir_glsl_type_size(enum glsl_base_type type)
}
static void gpir_print_shader_db(struct nir_shader *nir, gpir_compiler *comp,
- struct pipe_debug_callback *debug)
+ struct util_debug_callback *debug)
{
const struct shader_info *info = &nir->info;
char *shaderdb;
@@ -443,15 +435,15 @@ static void gpir_print_shader_db(struct nir_shader *nir, gpir_compiler *comp,
if (lima_debug & LIMA_DEBUG_SHADERDB)
fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
- pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb);
+ util_debug_message(debug, SHADER_INFO, "%s", shaderdb);
free(shaderdb);
}
bool gpir_compile_nir(struct lima_vs_compiled_shader *prog, struct nir_shader *nir,
- struct pipe_debug_callback *debug)
+ struct util_debug_callback *debug)
{
nir_function_impl *func = nir_shader_get_entrypoint(nir);
- gpir_compiler *comp = gpir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
+ gpir_compiler *comp = gpir_compiler_create(prog, func->ssa_alloc);
if (!comp)
return false;
diff --git a/src/gallium/drivers/lima/ir/gp/node.c b/src/gallium/drivers/lima/ir/gp/node.c
index ef534e7e286..9372d72a8bf 100644
--- a/src/gallium/drivers/lima/ir/gp/node.c
+++ b/src/gallium/drivers/lima/ir/gp/node.c
@@ -28,6 +28,9 @@
#include "gpir.h"
const gpir_op_info gpir_op_infos[] = {
+ [gpir_op_unsupported] = {
+ .name = "unsupported",
+ },
[gpir_op_mov] = {
.name = "mov",
.slots = (int []) {
diff --git a/src/gallium/drivers/lima/ir/gp/optimize.c b/src/gallium/drivers/lima/ir/gp/optimize.c
index c95faec9c6d..e5896b7d41e 100644
--- a/src/gallium/drivers/lima/ir/gp/optimize.c
+++ b/src/gallium/drivers/lima/ir/gp/optimize.c
@@ -80,7 +80,7 @@ optimize_branches(gpir_compiler *comp)
if (block->list.prev == &comp->block_list)
continue;
- gpir_block *prev_block = LIST_ENTRY(gpir_block, block->list.prev, list);
+ gpir_block *prev_block = list_entry(block->list.prev, gpir_block, list);
if (list_is_empty(&prev_block->node_list))
continue;
@@ -109,7 +109,7 @@ optimize_branches(gpir_compiler *comp)
/* Delete the branch */
list_del(&node->list);
- block->successors[0] = LIST_ENTRY(gpir_block, block->list.next, list);
+ block->successors[0] = list_entry(block->list.next, gpir_block, list);
}
}
diff --git a/src/gallium/drivers/lima/ir/gp/reduce_scheduler.c b/src/gallium/drivers/lima/ir/gp/reduce_scheduler.c
index 47cc6109e01..bcfe6fd89b0 100644
--- a/src/gallium/drivers/lima/ir/gp/reduce_scheduler.c
+++ b/src/gallium/drivers/lima/ir/gp/reduce_scheduler.c
@@ -31,6 +31,13 @@
* Author: Vivek Sarkar, Mauricio J. Serrano, Barbara B. Simons
*/
+static int cmp_float(const void *a, const void *b)
+{
+ const float *fa = (const float *) a;
+ const float *fb = (const float *) b;
+ return (*fa > *fb) - (*fa < *fb);
+}
+
static void schedule_calc_sched_info(gpir_node *node)
{
int n = 0;
@@ -68,15 +75,7 @@ static void schedule_calc_sched_info(gpir_node *node)
}
/* sort */
- for (i = 0; i < n - 1; i++) {
- for (int j = 0; j < n - i - 1; j++) {
- if (reg[j] > reg[j + 1]) {
- float tmp = reg[j + 1];
- reg[j + 1] = reg[j];
- reg[j] = tmp;
- }
- }
- }
+ qsort(reg, n, sizeof(reg[0]), cmp_float);
for (i = 0; i < n; i++) {
float pressure = reg[i] + n - (i + 1);
diff --git a/src/gallium/drivers/lima/ir/gp/regalloc.c b/src/gallium/drivers/lima/ir/gp/regalloc.c
index 8526d1e9e7d..eaab3e68182 100644
--- a/src/gallium/drivers/lima/ir/gp/regalloc.c
+++ b/src/gallium/drivers/lima/ir/gp/regalloc.c
@@ -507,6 +507,11 @@ static void handle_reg_write(gpir_store_node *store,
static void handle_value_write(gpir_node *node,
struct value_regalloc_ctx *ctx)
{
+ /* TODO: why does an uninitialized node->value_reg
+ * sometimes end up here? */
+ if (node->value_reg < 0)
+ return;
+
ctx->last_written[node->value_reg] = node;
ctx->live[node->value_reg] = NULL;
}
diff --git a/src/gallium/drivers/lima/ir/gp/scheduler.c b/src/gallium/drivers/lima/ir/gp/scheduler.c
index 78128bd891e..c4700dce34d 100644
--- a/src/gallium/drivers/lima/ir/gp/scheduler.c
+++ b/src/gallium/drivers/lima/ir/gp/scheduler.c
@@ -1297,9 +1297,9 @@ static bool try_node(sched_ctx *ctx)
* the list at all. We know better here, so we have to open-code
* list_for_each_entry() without the check in order to not assert.
*/
- for (gpir_node *node = LIST_ENTRY(gpir_node, ctx->ready_list.next, list);
+ for (gpir_node *node = list_entry(ctx->ready_list.next, gpir_node, list);
&node->list != &ctx->ready_list;
- node = LIST_ENTRY(gpir_node, node->list.next, list)) {
+ node = list_entry(node->list.next, gpir_node, list)) {
if (best_score != INT_MIN) {
if (node->sched.dist < best_node->sched.dist)
break;
diff --git a/src/gallium/drivers/lima/ir/lima_ir.h b/src/gallium/drivers/lima/ir/lima_ir.h
index 41d363a5550..706804fb8d5 100644
--- a/src/gallium/drivers/lima/ir/lima_ir.h
+++ b/src/gallium/drivers/lima/ir/lima_ir.h
@@ -54,13 +54,13 @@ struct lima_fs_compiled_shader;
/* gpir interface */
bool gpir_compile_nir(struct lima_vs_compiled_shader *prog, struct nir_shader *nir,
- struct pipe_debug_callback *debug);
+ struct util_debug_callback *debug);
/* ppir interface */
bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *nir,
struct ra_regs *ra,
- struct pipe_debug_callback *debug);
+ struct util_debug_callback *debug);
struct ra_regs *ppir_regalloc_init(void *mem_ctx);
void lima_nir_lower_uniform_to_scalar(nir_shader *shader);
@@ -73,4 +73,6 @@ void lima_nir_duplicate_load_consts(nir_shader *shader);
void lima_nir_duplicate_load_inputs(nir_shader *shader);
void lima_nir_duplicate_load_uniforms(nir_shader *shader);
+bool lima_nir_lower_txp(nir_shader *shader);
+
#endif
diff --git a/src/gallium/drivers/lima/ir/lima_nir_duplicate_consts.c b/src/gallium/drivers/lima/ir/lima_nir_duplicate_consts.c
index 0eeab6b6ad0..efe59fd9b50 100644
--- a/src/gallium/drivers/lima/ir/lima_nir_duplicate_consts.c
+++ b/src/gallium/drivers/lima/ir/lima_nir_duplicate_consts.c
@@ -34,9 +34,9 @@ lima_nir_duplicate_load_const(nir_builder *b, nir_load_const_instr *load)
nir_foreach_use_safe(use_src, &load->def) {
nir_load_const_instr *dupl;
- if (last_parent_instr != use_src->parent_instr) {
+ if (last_parent_instr != nir_src_parent_instr(use_src)) {
/* if ssa use, clone for the target block */
- b->cursor = nir_before_instr(use_src->parent_instr);
+ b->cursor = nir_before_instr(nir_src_parent_instr(use_src));
dupl = nir_load_const_instr_create(b->shader, load->def.num_components,
load->def.bit_size);
@@ -49,18 +49,19 @@ lima_nir_duplicate_load_const(nir_builder *b, nir_load_const_instr *load)
dupl = last_dupl;
}
- nir_instr_rewrite_src(use_src->parent_instr, use_src, nir_src_for_ssa(&dupl->def));
- last_parent_instr = use_src->parent_instr;
+ nir_src_rewrite(use_src, &dupl->def);
+ last_parent_instr = nir_src_parent_instr(use_src);
last_dupl = dupl;
}
last_dupl = NULL;
- last_parent_instr = NULL;
+ nir_if *last_parent_if = NULL;
nir_foreach_if_use_safe(use_src, &load->def) {
nir_load_const_instr *dupl;
+ nir_if *nif = nir_src_parent_if(use_src);
- if (last_parent_instr != use_src->parent_instr) {
+ if (last_parent_if != nif) {
/* if 'if use', clone where it is */
b->cursor = nir_before_instr(&load->instr);
@@ -75,8 +76,8 @@ lima_nir_duplicate_load_const(nir_builder *b, nir_load_const_instr *load)
dupl = last_dupl;
}
- nir_if_rewrite_condition(use_src->parent_if, nir_src_for_ssa(&dupl->def));
- last_parent_instr = use_src->parent_instr;
+ nir_src_rewrite(&nir_src_parent_if(use_src)->condition, &dupl->def);
+ last_parent_if = nif;
last_dupl = dupl;
}
@@ -87,8 +88,7 @@ lima_nir_duplicate_load_const(nir_builder *b, nir_load_const_instr *load)
static void
lima_nir_duplicate_load_consts_impl(nir_shader *shader, nir_function_impl *impl)
{
- nir_builder builder;
- nir_builder_init(&builder, impl);
+ nir_builder builder = nir_builder_create(impl);
nir_foreach_block(block, impl) {
nir_foreach_instr(instr, block) {
@@ -118,9 +118,7 @@ lima_nir_duplicate_load_consts_impl(nir_shader *shader, nir_function_impl *impl)
void
lima_nir_duplicate_load_consts(nir_shader *shader)
{
- nir_foreach_function(function, shader) {
- if (function->impl) {
- lima_nir_duplicate_load_consts_impl(shader, function->impl);
- }
+ nir_foreach_function_impl(impl, shader) {
+ lima_nir_duplicate_load_consts_impl(shader, impl);
}
}
diff --git a/src/gallium/drivers/lima/ir/lima_nir_duplicate_intrinsic.c b/src/gallium/drivers/lima/ir/lima_nir_duplicate_intrinsic.c
index ecff28e525e..358352b3245 100644
--- a/src/gallium/drivers/lima/ir/lima_nir_duplicate_intrinsic.c
+++ b/src/gallium/drivers/lima/ir/lima_nir_duplicate_intrinsic.c
@@ -32,23 +32,19 @@ lima_nir_duplicate_intrinsic(nir_builder *b, nir_intrinsic_instr *itr,
nir_intrinsic_instr *last_dupl = NULL;
nir_instr *last_parent_instr = NULL;
- nir_foreach_use_safe(use_src, &itr->dest.ssa) {
+ nir_foreach_use_safe(use_src, &itr->def) {
nir_intrinsic_instr *dupl;
- if (last_parent_instr != use_src->parent_instr) {
+ if (last_parent_instr != nir_src_parent_instr(use_src)) {
/* if ssa use, clone for the target block */
- b->cursor = nir_before_instr(use_src->parent_instr);
+ b->cursor = nir_before_instr(nir_src_parent_instr(use_src));
dupl = nir_intrinsic_instr_create(b->shader, op);
dupl->num_components = itr->num_components;
memcpy(dupl->const_index, itr->const_index, sizeof(itr->const_index));
- dupl->src[0].is_ssa = itr->src[0].is_ssa;
- if (itr->src[0].is_ssa)
- dupl->src[0].ssa = itr->src[0].ssa;
- else
- dupl->src[0].reg = itr->src[0].reg;
+ dupl->src[0].ssa = itr->src[0].ssa;
- nir_ssa_dest_init(&dupl->instr, &dupl->dest,
- dupl->num_components, itr->dest.ssa.bit_size, NULL);
+ nir_def_init(&dupl->instr, &dupl->def, dupl->num_components,
+ itr->def.bit_size);
dupl->instr.pass_flags = 1;
nir_builder_instr_insert(b, &dupl->instr);
@@ -57,31 +53,28 @@ lima_nir_duplicate_intrinsic(nir_builder *b, nir_intrinsic_instr *itr,
dupl = last_dupl;
}
- nir_instr_rewrite_src(use_src->parent_instr, use_src, nir_src_for_ssa(&dupl->dest.ssa));
- last_parent_instr = use_src->parent_instr;
+ nir_src_rewrite(use_src, &dupl->def);
+ last_parent_instr = nir_src_parent_instr(use_src);
last_dupl = dupl;
}
last_dupl = NULL;
- last_parent_instr = NULL;
+ nir_if *last_parent_if = NULL;
- nir_foreach_if_use_safe(use_src, &itr->dest.ssa) {
+ nir_foreach_if_use_safe(use_src, &itr->def) {
nir_intrinsic_instr *dupl;
+ nir_if *nif = nir_src_parent_if(use_src);
- if (last_parent_instr != use_src->parent_instr) {
+ if (last_parent_if != nif) {
/* if 'if use', clone where it is */
b->cursor = nir_before_instr(&itr->instr);
dupl = nir_intrinsic_instr_create(b->shader, op);
dupl->num_components = itr->num_components;
memcpy(dupl->const_index, itr->const_index, sizeof(itr->const_index));
- dupl->src[0].is_ssa = itr->src[0].is_ssa;
- if (itr->src[0].is_ssa)
- dupl->src[0].ssa = itr->src[0].ssa;
- else
- dupl->src[0].reg = itr->src[0].reg;
+ dupl->src[0].ssa = itr->src[0].ssa;
- nir_ssa_dest_init(&dupl->instr, &dupl->dest,
- dupl->num_components, itr->dest.ssa.bit_size, NULL);
+ nir_def_init(&dupl->instr, &dupl->def, dupl->num_components,
+ itr->def.bit_size);
dupl->instr.pass_flags = 1;
nir_builder_instr_insert(b, &dupl->instr);
@@ -90,8 +83,8 @@ lima_nir_duplicate_intrinsic(nir_builder *b, nir_intrinsic_instr *itr,
dupl = last_dupl;
}
- nir_if_rewrite_condition(use_src->parent_if, nir_src_for_ssa(&dupl->dest.ssa));
- last_parent_instr = use_src->parent_instr;
+ nir_src_rewrite(&nir_src_parent_if(use_src)->condition, &dupl->def);
+ last_parent_if = nif;
last_dupl = dupl;
}
@@ -103,8 +96,7 @@ static void
lima_nir_duplicate_intrinsic_impl(nir_shader *shader, nir_function_impl *impl,
nir_intrinsic_op op)
{
- nir_builder builder;
- nir_builder_init(&builder, impl);
+ nir_builder builder = nir_builder_create(impl);
nir_foreach_block(block, impl) {
nir_foreach_instr(instr, block) {
@@ -123,9 +115,6 @@ lima_nir_duplicate_intrinsic_impl(nir_shader *shader, nir_function_impl *impl,
if (itr->instr.pass_flags)
continue;
- if (!itr->dest.is_ssa)
- continue;
-
lima_nir_duplicate_intrinsic(&builder, itr, op);
}
}
@@ -140,10 +129,8 @@ lima_nir_duplicate_intrinsic_impl(nir_shader *shader, nir_function_impl *impl,
void
lima_nir_duplicate_load_uniforms(nir_shader *shader)
{
- nir_foreach_function(function, shader) {
- if (function->impl) {
- lima_nir_duplicate_intrinsic_impl(shader, function->impl, nir_intrinsic_load_uniform);
- }
+ nir_foreach_function_impl(impl, shader) {
+ lima_nir_duplicate_intrinsic_impl(shader, impl, nir_intrinsic_load_uniform);
}
}
@@ -153,9 +140,7 @@ lima_nir_duplicate_load_uniforms(nir_shader *shader)
void
lima_nir_duplicate_load_inputs(nir_shader *shader)
{
- nir_foreach_function(function, shader) {
- if (function->impl) {
- lima_nir_duplicate_intrinsic_impl(shader, function->impl, nir_intrinsic_load_input);
- }
+ nir_foreach_function_impl(impl, shader) {
+ lima_nir_duplicate_intrinsic_impl(shader, impl, nir_intrinsic_load_input);
}
}
diff --git a/src/gallium/drivers/lima/ir/lima_nir_lower_txp.c b/src/gallium/drivers/lima/ir/lima_nir_lower_txp.c
new file mode 100644
index 00000000000..8ee6a4b3528
--- /dev/null
+++ b/src/gallium/drivers/lima/ir/lima_nir_lower_txp.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2021 Lima Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "lima_ir.h"
+
+static nir_def *
+get_proj_index(nir_instr *coord_instr, nir_instr *proj_instr,
+ int coord_components, int *proj_idx)
+{
+ *proj_idx = -1;
+ if (coord_instr->type != nir_instr_type_alu ||
+ proj_instr->type != nir_instr_type_alu)
+ return NULL;
+
+ nir_alu_instr *coord_alu = nir_instr_as_alu(coord_instr);
+ nir_alu_instr *proj_alu = nir_instr_as_alu(proj_instr);
+
+ if (coord_alu->op != nir_op_mov ||
+ proj_alu->op != nir_op_mov)
+ return NULL;
+
+ nir_def *coord_src_ssa = coord_alu->src[0].src.ssa;
+ nir_def *proj_src_ssa = proj_alu->src[0].src.ssa;
+
+ if (coord_src_ssa != proj_src_ssa)
+ return NULL;
+
+ if (coord_src_ssa->parent_instr->type != nir_instr_type_intrinsic)
+ return NULL;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(coord_src_ssa->parent_instr);
+ if (intrin->intrinsic != nir_intrinsic_load_input)
+ return NULL;
+
+ if (intrin->def.num_components != 4)
+ return NULL;
+
+ /* Coords must be in .xyz */
+ for (int i = 0; i < coord_components; i++) {
+ if (coord_alu->src[0].swizzle[i] != i)
+ return NULL;
+ }
+
+ *proj_idx = proj_alu->src[0].swizzle[0];
+
+ return coord_src_ssa;
+}
+
+static bool
+lima_nir_lower_txp_instr(nir_builder *b, nir_instr *instr,
+ UNUSED void *cb_data)
+{
+ if (instr->type != nir_instr_type_tex)
+ return false;
+
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+
+ int proj_idx = nir_tex_instr_src_index(tex, nir_tex_src_projector);
+ int coords_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
+
+ if (proj_idx < 0)
+ return false;
+
+ switch (tex->sampler_dim) {
+ case GLSL_SAMPLER_DIM_RECT:
+ case GLSL_SAMPLER_DIM_1D:
+ case GLSL_SAMPLER_DIM_2D:
+ case GLSL_SAMPLER_DIM_3D:
+ break;
+ default:
+ return false;
+ }
+
+ b->cursor = nir_before_instr(&tex->instr);
+
+ /* Merge coords and projector into single backend-specific source.
+ * It's easy if texture2DProj argument is vec3, it's more tricky with
+ * vec4 since NIR just drops Z component that we need, so we have to
+ * step back and use load_input SSA instead of mov as a source for
+ * newly constructed vec4
+ */
+ nir_def *proj_ssa = tex->src[proj_idx].src.ssa;
+ nir_def *coords_ssa = tex->src[coords_idx].src.ssa;
+
+ int proj_idx_in_vec = -1;
+ nir_def *load_input = get_proj_index(coords_ssa->parent_instr,
+ proj_ssa->parent_instr,
+ tex->coord_components,
+ &proj_idx_in_vec);
+ nir_def *combined;
+ if (load_input && proj_idx_in_vec == 3) {
+ unsigned xyzw[] = { 0, 1, 2, 3 };
+ combined = nir_swizzle(b, load_input, xyzw, 4);
+ tex->coord_components = 4;
+ } else if (load_input && proj_idx_in_vec == 2) {
+ unsigned xyz[] = { 0, 1, 2 };
+ combined = nir_swizzle(b, load_input, xyz, 3);
+ tex->coord_components = 3;
+ } else {
+ switch (tex->coord_components) {
+ default:
+ case 1:
+ /* We still need vec3 for 1D textures, so duplicate coordinate */
+ combined = nir_vec3(b,
+ nir_channel(b, coords_ssa, 0),
+ nir_channel(b, coords_ssa, 0),
+ nir_channel(b, proj_ssa, 0));
+ tex->coord_components = 3;
+ break;
+ case 2:
+ combined = nir_vec3(b,
+ nir_channel(b, coords_ssa, 0),
+ nir_channel(b, coords_ssa, 1),
+ nir_channel(b, proj_ssa, 0));
+ tex->coord_components = 3;
+ break;
+ case 3:
+ combined = nir_vec4(b,
+ nir_channel(b, coords_ssa, 0),
+ nir_channel(b, coords_ssa, 1),
+ nir_channel(b, coords_ssa, 2),
+ nir_channel(b, proj_ssa, 0));
+ tex->coord_components = 4;
+ }
+ }
+
+ nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_coord));
+ nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_projector));
+ nir_tex_instr_add_src(tex, nir_tex_src_backend1, combined);
+
+ return true;
+}
+
+bool
+lima_nir_lower_txp(nir_shader *shader)
+{
+ return nir_shader_instructions_pass(shader, lima_nir_lower_txp_instr,
+ nir_metadata_block_index |
+ nir_metadata_dominance,
+ NULL);
+}
diff --git a/src/gallium/drivers/lima/ir/lima_nir_lower_uniform_to_scalar.c b/src/gallium/drivers/lima/ir/lima_nir_lower_uniform_to_scalar.c
index f9d593f0417..4067746b574 100644
--- a/src/gallium/drivers/lima/ir/lima_nir_lower_uniform_to_scalar.c
+++ b/src/gallium/drivers/lima/ir/lima_nir_lower_uniform_to_scalar.c
@@ -31,12 +31,12 @@ lower_load_uniform_to_scalar(nir_builder *b, nir_intrinsic_instr *intr)
{
b->cursor = nir_before_instr(&intr->instr);
- nir_ssa_def *loads[4];
+ nir_def *loads[4];
for (unsigned i = 0; i < intr->num_components; i++) {
nir_intrinsic_instr *chan_intr =
nir_intrinsic_instr_create(b->shader, intr->intrinsic);
- nir_ssa_dest_init(&chan_intr->instr, &chan_intr->dest,
- 1, intr->dest.ssa.bit_size, NULL);
+ nir_def_init(&chan_intr->instr, &chan_intr->def, 1,
+ intr->def.bit_size);
chan_intr->num_components = 1;
nir_intrinsic_set_base(chan_intr, nir_intrinsic_base(intr) * 4 + i);
@@ -48,10 +48,10 @@ lower_load_uniform_to_scalar(nir_builder *b, nir_intrinsic_instr *intr)
nir_builder_instr_insert(b, &chan_intr->instr);
- loads[i] = &chan_intr->dest.ssa;
+ loads[i] = &chan_intr->def;
}
- nir_ssa_def_rewrite_uses(&intr->dest.ssa,
+ nir_def_rewrite_uses(&intr->def,
nir_vec(b, loads, intr->num_components));
nir_instr_remove(&intr->instr);
}
@@ -59,23 +59,20 @@ lower_load_uniform_to_scalar(nir_builder *b, nir_intrinsic_instr *intr)
void
lima_nir_lower_uniform_to_scalar(nir_shader *shader)
{
- nir_foreach_function(function, shader) {
- if (function->impl) {
- nir_builder b;
- nir_builder_init(&b, function->impl);
+ nir_foreach_function_impl(impl, shader) {
+ nir_builder b = nir_builder_create(impl);
- nir_foreach_block(block, function->impl) {
- nir_foreach_instr_safe(instr, block) {
- if (instr->type != nir_instr_type_intrinsic)
- continue;
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
- nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
- if (intr->intrinsic != nir_intrinsic_load_uniform)
- continue;
+ if (intr->intrinsic != nir_intrinsic_load_uniform)
+ continue;
- lower_load_uniform_to_scalar(&b, intr);
- }
+ lower_load_uniform_to_scalar(&b, intr);
}
}
}
diff --git a/src/gallium/drivers/lima/ir/lima_nir_split_load_input.c b/src/gallium/drivers/lima/ir/lima_nir_split_load_input.c
index dbdb3a81c64..c061c88cdad 100644
--- a/src/gallium/drivers/lima/ir/lima_nir_split_load_input.c
+++ b/src/gallium/drivers/lima/ir/lima_nir_split_load_input.c
@@ -27,86 +27,63 @@
#include "lima_ir.h"
static bool
-lima_nir_split_load_input_block(nir_block *block, nir_builder *b)
+lima_nir_split_load_input_instr(nir_builder *b,
+ nir_instr *instr,
+ UNUSED void *cb_data)
{
- bool progress = false;
-
- nir_foreach_instr_safe(instr, block) {
- if (instr->type != nir_instr_type_alu)
- continue;
-
- nir_alu_instr *alu = nir_instr_as_alu(instr);
- if (alu->op != nir_op_mov)
- continue;
-
- if (!alu->dest.dest.is_ssa)
- continue;
-
- if (!alu->src[0].src.is_ssa)
- continue;
-
- nir_ssa_def *ssa = alu->src[0].src.ssa;
- if (ssa->parent_instr->type != nir_instr_type_intrinsic)
- continue;
-
- nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(ssa->parent_instr);
- if (intrin->intrinsic != nir_intrinsic_load_input)
- continue;
-
- uint8_t swizzle = alu->src[0].swizzle[0];
- int i;
-
- for (i = 1; i < nir_dest_num_components(alu->dest.dest); i++)
- if (alu->src[0].swizzle[i] != (swizzle + i))
- break;
-
- if (i != nir_dest_num_components(alu->dest.dest))
- continue;
-
- /* mali4xx can't access unaligned vec3, don't split load input */
- if (nir_dest_num_components(alu->dest.dest) == 3 && swizzle > 0)
- continue;
-
- b->cursor = nir_before_instr(&intrin->instr);
- nir_intrinsic_instr *new_intrin = nir_intrinsic_instr_create(
- b->shader,
- intrin->intrinsic);
- nir_ssa_dest_init(&new_intrin->instr, &new_intrin->dest,
- nir_dest_num_components(alu->dest.dest),
- ssa->bit_size,
- NULL);
- new_intrin->num_components = nir_dest_num_components(alu->dest.dest);
- nir_intrinsic_set_base(new_intrin, nir_intrinsic_base(intrin));
- nir_intrinsic_set_component(new_intrin, nir_intrinsic_component(intrin) + swizzle);
- nir_intrinsic_set_dest_type(new_intrin, nir_intrinsic_dest_type(intrin));
-
- /* offset */
- nir_src_copy(&new_intrin->src[0], &intrin->src[0]);
-
- nir_builder_instr_insert(b, &new_intrin->instr);
- nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
- &new_intrin->dest.ssa);
- nir_instr_remove(&alu->instr);
- progress = true;
- }
-
- return progress;
-}
-
-static bool
-lima_nir_split_load_input_impl(nir_function_impl *impl)
-{
- bool progress = false;
- nir_builder builder;
- nir_builder_init(&builder, impl);
-
- nir_foreach_block(block, impl) {
- progress |= lima_nir_split_load_input_block(block, &builder);
- }
-
- nir_metadata_preserve(impl, nir_metadata_block_index |
- nir_metadata_dominance);
- return progress;
+ if (instr->type != nir_instr_type_alu)
+ return false;
+
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ if (alu->op != nir_op_mov)
+ return false;
+
+ nir_def *ssa = alu->src[0].src.ssa;
+ if (ssa->parent_instr->type != nir_instr_type_intrinsic)
+ return false;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(ssa->parent_instr);
+ if (intrin->intrinsic != nir_intrinsic_load_input)
+ return false;
+
+ uint8_t swizzle = alu->src[0].swizzle[0];
+ int i;
+
+ for (i = 1; i < alu->def.num_components; i++)
+ if (alu->src[0].swizzle[i] != (swizzle + i))
+ break;
+
+ if (i != alu->def.num_components)
+ return false;
+
+ /* mali4xx can't access unaligned vec3, don't split load input */
+ if (alu->def.num_components == 3 && swizzle > 0)
+ return false;
+
+ /* mali4xx can't access unaligned vec2, don't split load input */
+ if (alu->def.num_components == 2 &&
+ swizzle != 0 && swizzle != 2)
+ return false;
+
+ b->cursor = nir_before_instr(&intrin->instr);
+ nir_intrinsic_instr *new_intrin = nir_intrinsic_instr_create(
+ b->shader,
+ intrin->intrinsic);
+ nir_def_init(&new_intrin->instr, &new_intrin->def,
+ alu->def.num_components, ssa->bit_size);
+ new_intrin->num_components = alu->def.num_components;
+ nir_intrinsic_set_base(new_intrin, nir_intrinsic_base(intrin));
+ nir_intrinsic_set_component(new_intrin, nir_intrinsic_component(intrin) + swizzle);
+ nir_intrinsic_set_dest_type(new_intrin, nir_intrinsic_dest_type(intrin));
+
+ /* offset */
+ new_intrin->src[0] = nir_src_for_ssa(intrin->src[0].ssa);
+
+ nir_builder_instr_insert(b, &new_intrin->instr);
+ nir_def_rewrite_uses(&alu->def,
+ &new_intrin->def);
+ nir_instr_remove(&alu->instr);
+ return true;
}
/* Replaces a single load of several packed varyings and number of movs with
@@ -115,13 +92,8 @@ lima_nir_split_load_input_impl(nir_function_impl *impl)
bool
lima_nir_split_load_input(nir_shader *shader)
{
- bool progress = false;
-
- nir_foreach_function(function, shader) {
- if (function->impl)
- progress |= lima_nir_split_load_input_impl(function->impl);
- }
-
- return progress;
+ return nir_shader_instructions_pass(shader, lima_nir_split_load_input_instr,
+ nir_metadata_block_index |
+ nir_metadata_dominance,
+ NULL);
}
-
diff --git a/src/gallium/drivers/lima/ir/lima_nir_split_loads.c b/src/gallium/drivers/lima/ir/lima_nir_split_loads.c
index 75707280c7b..fb6e4451753 100644
--- a/src/gallium/drivers/lima/ir/lima_nir_split_loads.c
+++ b/src/gallium/drivers/lima/ir/lima_nir_split_loads.c
@@ -36,59 +36,45 @@
* down but won't split it.
*/
-static nir_ssa_def *
+static nir_def *
clone_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)
{
nir_intrinsic_instr *new_intrin =
nir_instr_as_intrinsic(nir_instr_clone(b->shader, &intrin->instr));
- assert(new_intrin->dest.is_ssa);
-
- unsigned num_srcs = nir_intrinsic_infos[new_intrin->intrinsic].num_srcs;
- for (unsigned i = 0; i < num_srcs; i++) {
- assert(new_intrin->src[i].is_ssa);
- }
-
nir_builder_instr_insert(b, &new_intrin->instr);
- return &new_intrin->dest.ssa;
+ return &new_intrin->def;
}
static bool
replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)
{
- if (!intrin->dest.is_ssa)
- return false;
-
if (intrin->intrinsic != nir_intrinsic_load_input &&
intrin->intrinsic != nir_intrinsic_load_uniform)
return false;
- if (!intrin->src[0].is_ssa)
- return false;
-
if (intrin->src[0].ssa->parent_instr->type == nir_instr_type_load_const)
return false;
struct hash_table *visited_instrs = _mesa_pointer_hash_table_create(NULL);
- nir_foreach_use_safe(src, &intrin->dest.ssa) {
+ nir_foreach_use_safe(src, &intrin->def) {
struct hash_entry *entry =
- _mesa_hash_table_search(visited_instrs, src->parent_instr);
- if (entry && (src->parent_instr->type != nir_instr_type_phi)) {
- nir_ssa_def *def = entry->data;
- nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(def));
+ _mesa_hash_table_search(visited_instrs, nir_src_parent_instr(src));
+ if (entry && (nir_src_parent_instr(src)->type != nir_instr_type_phi)) {
+ nir_def *def = entry->data;
+ nir_src_rewrite(src, def);
continue;
}
- b->cursor = nir_before_src(src, false);
- nir_ssa_def *new = clone_intrinsic(b, intrin);
- nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(new));
- _mesa_hash_table_insert(visited_instrs, src->parent_instr, new);
+ b->cursor = nir_before_src(src);
+ nir_def *new = clone_intrinsic(b, intrin);
+ nir_src_rewrite(src, new);
+ _mesa_hash_table_insert(visited_instrs, nir_src_parent_instr(src), new);
}
- nir_foreach_if_use_safe(src, &intrin->dest.ssa) {
- b->cursor = nir_before_src(src, true);
- nir_if_rewrite_condition(src->parent_if,
- nir_src_for_ssa(clone_intrinsic(b, intrin)));
+ nir_foreach_if_use_safe(src, &intrin->def) {
+ b->cursor = nir_before_src(src);
+ nir_src_rewrite(&nir_src_parent_if(src)->condition, clone_intrinsic(b, intrin));
}
nir_instr_remove(&intrin->instr);
@@ -103,18 +89,18 @@ replace_load_const(nir_builder *b, nir_load_const_instr *load_const)
nir_foreach_use_safe(src, &load_const->def) {
struct hash_entry *entry =
- _mesa_hash_table_search(visited_instrs, src->parent_instr);
- if (entry && (src->parent_instr->type != nir_instr_type_phi)) {
- nir_ssa_def *def = entry->data;
- nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(def));
+ _mesa_hash_table_search(visited_instrs, nir_src_parent_instr(src));
+ if (entry && (nir_src_parent_instr(src)->type != nir_instr_type_phi)) {
+ nir_def *def = entry->data;
+ nir_src_rewrite(src, def);
continue;
}
- b->cursor = nir_before_src(src, false);
- nir_ssa_def *new = nir_build_imm(b, load_const->def.num_components,
+ b->cursor = nir_before_src(src);
+ nir_def *new = nir_build_imm(b, load_const->def.num_components,
load_const->def.bit_size,
load_const->value);
- nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(new));
- _mesa_hash_table_insert(visited_instrs, src->parent_instr, new);
+ nir_src_rewrite(src, new);
+ _mesa_hash_table_insert(visited_instrs, nir_src_parent_instr(src), new);
}
nir_instr_remove(&load_const->instr);
@@ -126,19 +112,16 @@ lima_nir_split_loads(nir_shader *shader)
{
bool progress = false;
- nir_foreach_function(function, shader) {
- if (function->impl) {
- nir_builder b;
- nir_builder_init(&b, function->impl);
-
- nir_foreach_block_reverse(block, function->impl) {
- nir_foreach_instr_reverse_safe(instr, block) {
- if (instr->type == nir_instr_type_load_const) {
- replace_load_const(&b, nir_instr_as_load_const(instr));
- progress = true;
- } else if (instr->type == nir_instr_type_intrinsic) {
- progress |= replace_intrinsic(&b, nir_instr_as_intrinsic(instr));
- }
+ nir_foreach_function_impl(impl, shader) {
+ nir_builder b = nir_builder_create(impl);
+
+ nir_foreach_block_reverse(block, impl) {
+ nir_foreach_instr_reverse_safe(instr, block) {
+ if (instr->type == nir_instr_type_load_const) {
+ replace_load_const(&b, nir_instr_as_load_const(instr));
+ progress = true;
+ } else if (instr->type == nir_instr_type_intrinsic) {
+ progress |= replace_intrinsic(&b, nir_instr_as_intrinsic(instr));
}
}
}
diff --git a/src/gallium/drivers/lima/ir/pp/codegen.c b/src/gallium/drivers/lima/ir/pp/codegen.c
index 47ceb183677..b043bd46bd8 100644
--- a/src/gallium/drivers/lima/ir/pp/codegen.c
+++ b/src/gallium/drivers/lima/ir/pp/codegen.c
@@ -91,8 +91,20 @@ static void ppir_codegen_encode_varying(ppir_node *node, void *code)
f->imm.perspective = 1;
break;
case ppir_op_load_coords:
- /* num_components == 3 implies cubemap as we don't support 3D textures */
- f->imm.source_type = num_components == 3 ? 2 : 0;
+ if (load->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
+ f->imm.source_type = 2;
+
+ switch (load->perspective) {
+ case ppir_perspective_none:
+ f->imm.perspective = 0;
+ break;
+ case ppir_perspective_z:
+ f->imm.perspective = 2;
+ break;
+ case ppir_perspective_w:
+ f->imm.perspective = 3;
+ break;
+ }
break;
default:
break;
@@ -103,12 +115,22 @@ static void ppir_codegen_encode_varying(ppir_node *node, void *code)
f->reg.mask = dest->write_mask << (index & 0x3);
if (load->num_src) {
- /* num_components == 3 implies cubemap as we don't support 3D textures */
- if (num_components == 3) {
+ if (load->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
f->reg.source_type = 2;
f->reg.perspective = 1;
} else {
f->reg.source_type = 1;
+ switch (load->perspective) {
+ case ppir_perspective_none:
+ f->reg.perspective = 0;
+ break;
+ case ppir_perspective_z:
+ f->reg.perspective = 2;
+ break;
+ case ppir_perspective_w:
+ f->reg.perspective = 3;
+ break;
+ }
}
ppir_src *src = &load->src;
index = ppir_target_get_src_reg_index(src);
@@ -134,9 +156,10 @@ static void ppir_codegen_encode_texld(ppir_node *node, void *code)
switch (ldtex->sampler_dim) {
case GLSL_SAMPLER_DIM_2D:
+ case GLSL_SAMPLER_DIM_3D:
case GLSL_SAMPLER_DIM_RECT:
case GLSL_SAMPLER_DIM_EXTERNAL:
- f->type = ppir_codegen_sampler_type_2d;
+ f->type = ppir_codegen_sampler_type_generic;
break;
case GLSL_SAMPLER_DIM_CUBE:
f->type = ppir_codegen_sampler_type_cube;
@@ -175,6 +198,22 @@ static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
}
}
+static ppir_codegen_outmod ppir_codegen_get_outmod(ppir_outmod outmod)
+{
+ switch (outmod) {
+ case ppir_outmod_none:
+ return ppir_codegen_outmod_none;
+ case ppir_outmod_clamp_fraction:
+ return ppir_codegen_outmod_clamp_fraction;
+ case ppir_outmod_clamp_positive:
+ return ppir_codegen_outmod_clamp_positive;
+ case ppir_outmod_round:
+ return ppir_codegen_outmod_round;
+ default:
+ unreachable("invalid ppir_outmod");
+ }
+}
+
static unsigned shift_to_op(int shift)
{
assert(shift >= -3 && shift <= 3);
@@ -194,7 +233,7 @@ static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
f->dest = index >> 2;
f->mask = dest->write_mask << dest_shift;
}
- f->dest_modifier = dest->modifier;
+ f->dest_modifier = ppir_codegen_get_outmod(dest->modifier);
switch (node->op) {
case ppir_op_mul:
@@ -267,7 +306,7 @@ static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
f->output_en = true;
}
- f->dest_modifier = dest->modifier;
+ f->dest_modifier = ppir_codegen_get_outmod(dest->modifier);
switch (node->op) {
case ppir_op_mul:
@@ -333,7 +372,7 @@ static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
int dest_shift = index & 0x3;
f->dest = index >> 2;
f->mask = dest->write_mask << dest_shift;
- f->dest_modifier = dest->modifier;
+ f->dest_modifier = ppir_codegen_get_outmod(dest->modifier);
switch (node->op) {
case ppir_op_add:
@@ -423,7 +462,7 @@ static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
f->output_en = true;
- f->dest_modifier = dest->modifier;
+ f->dest_modifier = ppir_codegen_get_outmod(dest->modifier);
switch (node->op) {
case ppir_op_add:
@@ -509,7 +548,7 @@ static void ppir_codegen_encode_combine(ppir_node *node, void *code)
int dest_component = ffs(dest->write_mask) - 1;
assert(dest_component >= 0);
f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
- f->scalar.dest_modifier = dest->modifier;
+ f->scalar.dest_modifier = ppir_codegen_get_outmod(dest->modifier);
ppir_src *src = alu->src;
f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
@@ -620,7 +659,7 @@ static void ppir_codegen_encode_branch(ppir_node *node, void *code)
while (list_is_empty(&target->instr_list)) {
if (!target->list.next)
break;
- target = LIST_ENTRY(ppir_block, target->list.next, list);
+ target = list_entry(target->list.next, ppir_block, list);
}
assert(!list_is_empty(&target->instr_list));
@@ -674,13 +713,13 @@ static int get_instr_encode_size(ppir_instr *instr)
static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
{
- int off1 = dst_offset & 0x1f;
- uint32_t *cpy_dst = dst, *cpy_src = src;
+ unsigned char *cpy_dst = dst, *cpy_src = src;
+ int off1 = dst_offset & 0x07;
- cpy_dst += (dst_offset >> 5);
+ cpy_dst += (dst_offset >> 3);
if (off1) {
- int off2 = 32 - off1;
+ int off2 = 0x08 - off1;
int cpy_size = 0;
while (1) {
*cpy_dst |= *cpy_src << off1;
@@ -750,7 +789,7 @@ static int encode_instr(ppir_instr *instr, void *code, void *last_code)
size = align_to_word(size) + 1;
ctrl->count = size;
- if (instr->is_end)
+ if (instr->stop)
ctrl->stop = true;
if (last_code) {
@@ -778,7 +817,7 @@ static void ppir_codegen_print_prog(ppir_compiler *comp)
printf("%08x ", prog[i]);
}
printf("\n");
- ppir_disassemble_instr(prog, offset);
+ ppir_disassemble_instr(prog, offset, stdout);
prog += n;
offset += n;
}
@@ -795,6 +834,11 @@ bool ppir_codegen_prog(ppir_compiler *comp)
instr->encode_size = get_instr_encode_size(instr);
size += instr->encode_size;
}
+ /* Set stop flag for the last instruction if block has stop flag */
+ if (block->stop) {
+ ppir_instr *instr = list_last_entry(&block->instr_list, ppir_instr, list);
+ instr->stop = true;
+ }
}
uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
diff --git a/src/gallium/drivers/lima/ir/pp/codegen.h b/src/gallium/drivers/lima/ir/pp/codegen.h
index bf2541f1a8f..dbd0c8f654b 100644
--- a/src/gallium/drivers/lima/ir/pp/codegen.h
+++ b/src/gallium/drivers/lima/ir/pp/codegen.h
@@ -104,8 +104,8 @@ typedef union __attribute__((__packed__)) {
} ppir_codegen_field_varying;
typedef enum {
- ppir_codegen_sampler_type_2d = 0x00,
- ppir_codegen_sampler_type_cube = 0x1F,
+ ppir_codegen_sampler_type_generic = 0x00,
+ ppir_codegen_sampler_type_cube = 0x1F,
} ppir_codegen_sampler_type;
typedef struct __attribute__((__packed__)) {
@@ -355,6 +355,6 @@ typedef union __attribute__((__packed__)) {
} discard;
} ppir_codegen_field_branch;
-void ppir_disassemble_instr(uint32_t *instr, unsigned offset);
+void ppir_disassemble_instr(uint32_t *instr, unsigned offset, FILE *fp);
#endif
diff --git a/src/gallium/drivers/lima/ir/pp/disasm.c b/src/gallium/drivers/lima/ir/pp/disasm.c
index 50aa4cbb852..81d4016a090 100644
--- a/src/gallium/drivers/lima/ir/pp/disasm.c
+++ b/src/gallium/drivers/lima/ir/pp/disasm.c
@@ -35,51 +35,51 @@ typedef struct {
} asm_op;
static void
-print_swizzle(uint8_t swizzle)
+print_swizzle(uint8_t swizzle, FILE *fp)
{
if (swizzle == 0xE4)
return;
- printf(".");
+ fprintf(fp, ".");
for (unsigned i = 0; i < 4; i++, swizzle >>= 2)
- printf("%c", "xyzw"[swizzle & 3]);
+ fprintf(fp, "%c", "xyzw"[swizzle & 3]);
}
static void
-print_mask(uint8_t mask)
+print_mask(uint8_t mask, FILE *fp)
{
if (mask == 0xF)
return;
- printf(".");
- if (mask & 1) printf("x");
- if (mask & 2) printf("y");
- if (mask & 4) printf("z");
- if (mask & 8) printf("w");
+ fprintf(fp, ".");
+ if (mask & 1) fprintf(fp, "x");
+ if (mask & 2) fprintf(fp, "y");
+ if (mask & 4) fprintf(fp, "z");
+ if (mask & 8) fprintf(fp, "w");
}
static void
-print_reg(ppir_codegen_vec4_reg reg, const char *special)
+print_reg(ppir_codegen_vec4_reg reg, const char *special, FILE *fp)
{
if (special) {
- printf("%s", special);
+ fprintf(fp, "%s", special);
} else {
switch (reg)
{
case ppir_codegen_vec4_reg_constant0:
- printf("^const0");
+ fprintf(fp, "^const0");
break;
case ppir_codegen_vec4_reg_constant1:
- printf("^const1");
+ fprintf(fp, "^const1");
break;
case ppir_codegen_vec4_reg_texture:
- printf("^texture");
+ fprintf(fp, "^texture");
break;
case ppir_codegen_vec4_reg_uniform:
- printf("^uniform");
+ fprintf(fp, "^uniform");
break;
default:
- printf("$%u", reg);
+ fprintf(fp, "$%u", reg);
break;
}
}
@@ -87,75 +87,75 @@ print_reg(ppir_codegen_vec4_reg reg, const char *special)
static void
print_vector_source(ppir_codegen_vec4_reg reg, const char *special,
- uint8_t swizzle, bool abs, bool neg)
+ uint8_t swizzle, bool abs, bool neg, FILE *fp)
{
if (neg)
- printf("-");
+ fprintf(fp, "-");
if (abs)
- printf("abs(");
+ fprintf(fp, "abs(");
- print_reg(reg, special);
- print_swizzle(swizzle);
+ print_reg(reg, special, fp);
+ print_swizzle(swizzle, fp);
if (abs)
- printf(")");
+ fprintf(fp, ")");
}
static void
-print_source_scalar(unsigned reg, const char *special, bool abs, bool neg)
+print_source_scalar(unsigned reg, const char *special, bool abs, bool neg, FILE *fp)
{
if (neg)
- printf("-");
+ fprintf(fp, "-");
if (abs)
- printf("abs(");
+ fprintf(fp, "abs(");
- print_reg(reg >> 2, special);
+ print_reg(reg >> 2, special, fp);
if (!special)
- printf(".%c", "xyzw"[reg & 3]);
+ fprintf(fp, ".%c", "xyzw"[reg & 3]);
if (abs)
- printf(")");
+ fprintf(fp, ")");
}
static void
-print_varying_source(ppir_codegen_field_varying *varying)
+print_varying_source(ppir_codegen_field_varying *varying, FILE *fp)
{
switch (varying->imm.alignment) {
case 0:
- printf("%u.%c", varying->imm.index >> 2,
+ fprintf(fp, "%u.%c", varying->imm.index >> 2,
"xyzw"[varying->imm.index & 3]);
break;
case 1: {
const char *c[2] = {"xy", "zw"};
- printf("%u.%s", varying->imm.index >> 1, c[varying->imm.index & 1]);
+ fprintf(fp, "%u.%s", varying->imm.index >> 1, c[varying->imm.index & 1]);
break;
}
default:
- printf("%u", varying->imm.index);
+ fprintf(fp, "%u", varying->imm.index);
break;
}
if (varying->imm.offset_vector != 15) {
unsigned reg = (varying->imm.offset_vector << 2) +
varying->imm.offset_scalar;
- printf("+");
- print_source_scalar(reg, NULL, false, false);
+ fprintf(fp, "+");
+ print_source_scalar(reg, NULL, false, false, fp);
}
}
static void
-print_outmod(ppir_codegen_outmod modifier)
+print_outmod(ppir_codegen_outmod modifier, FILE *fp)
{
switch (modifier)
{
case ppir_codegen_outmod_clamp_fraction:
- printf(".sat");
+ fprintf(fp, ".sat");
break;
case ppir_codegen_outmod_clamp_positive:
- printf(".pos");
+ fprintf(fp, ".pos");
break;
case ppir_codegen_outmod_round:
- printf(".int");
+ fprintf(fp, ".int");
break;
default:
break;
@@ -163,190 +163,189 @@ print_outmod(ppir_codegen_outmod modifier)
}
static void
-print_dest_scalar(unsigned reg)
+print_dest_scalar(unsigned reg, FILE *fp)
{
- printf("$%u", reg >> 2);
- printf(".%c ", "xyzw"[reg & 3]);
+ fprintf(fp, "$%u", reg >> 2);
+ fprintf(fp, ".%c ", "xyzw"[reg & 3]);
}
static void
-print_const(unsigned const_num, uint16_t *val)
+print_const(unsigned const_num, uint16_t *val, FILE *fp)
{
- printf("const%u", const_num);
+ fprintf(fp, "const%u", const_num);
for (unsigned i = 0; i < 4; i++)
- printf(" %f", _mesa_half_to_float(val[i]));
+ fprintf(fp, " %f", _mesa_half_to_float(val[i]));
}
static void
-print_const0(void *code, unsigned offset)
+print_const0(void *code, unsigned offset, FILE *fp)
{
(void) offset;
- print_const(0, code);
+ print_const(0, code, fp);
}
static void
-print_const1(void *code, unsigned offset)
+print_const1(void *code, unsigned offset, FILE *fp)
{
(void) offset;
- print_const(1, code);
+ print_const(1, code, fp);
}
static void
-print_varying(void *code, unsigned offset)
+print_varying(void *code, unsigned offset, FILE *fp)
{
(void) offset;
ppir_codegen_field_varying *varying = code;
- printf("load");
+ fprintf(fp, "load");
bool perspective = varying->imm.source_type < 2 && varying->imm.perspective;
if (perspective)
{
- printf(".perspective");
+ fprintf(fp, ".perspective");
switch (varying->imm.perspective)
{
case 2:
- printf(".z");
+ fprintf(fp, ".z");
break;
case 3:
- printf(".w");
+ fprintf(fp, ".w");
break;
default:
- printf(".unknown");
+ fprintf(fp, ".unknown");
break;
}
}
- printf(".v ");
+ fprintf(fp, ".v ");
switch (varying->imm.dest)
{
case ppir_codegen_vec4_reg_discard:
- printf("^discard");
+ fprintf(fp, "^discard");
break;
default:
- printf("$%u", varying->imm.dest);
+ fprintf(fp, "$%u", varying->imm.dest);
break;
}
- print_mask(varying->imm.mask);
- printf(" ");
+ print_mask(varying->imm.mask, fp);
+ fprintf(fp, " ");
switch (varying->imm.source_type) {
case 1:
print_vector_source(varying->reg.source, NULL, varying->reg.swizzle,
- varying->reg.absolute, varying->reg.negate);
+ varying->reg.absolute, varying->reg.negate, fp);
break;
case 2:
switch (varying->imm.perspective) {
case 0:
- printf("cube(");
- print_varying_source(varying);
- printf(")");
+ fprintf(fp, "cube(");
+ print_varying_source(varying, fp);
+ fprintf(fp, ")");
break;
case 1:
- printf("cube(");
+ fprintf(fp, "cube(");
print_vector_source(varying->reg.source, NULL, varying->reg.swizzle,
- varying->reg.absolute, varying->reg.negate);
- printf(")");
+ varying->reg.absolute, varying->reg.negate, fp);
+ fprintf(fp, ")");
break;
case 2:
- printf("normalize(");
+ fprintf(fp, "normalize(");
print_vector_source(varying->reg.source, NULL, varying->reg.swizzle,
- varying->reg.absolute, varying->reg.negate);
- printf(")");
+ varying->reg.absolute, varying->reg.negate, fp);
+ fprintf(fp, ")");
break;
default:
- printf("gl_FragCoord");
+ fprintf(fp, "gl_FragCoord");
break;
}
break;
case 3:
if (varying->imm.perspective)
- printf("gl_FrontFacing");
+ fprintf(fp, "gl_FrontFacing");
else
- printf("gl_PointCoord");
+ fprintf(fp, "gl_PointCoord");
break;
default:
- print_varying_source(varying);
+ print_varying_source(varying, fp);
break;
}
}
static void
-print_sampler(void *code, unsigned offset)
+print_sampler(void *code, unsigned offset, FILE *fp)
{
(void) offset;
ppir_codegen_field_sampler *sampler = code;
- printf("texld");
+ fprintf(fp, "texld");
if (sampler->lod_bias_en)
- printf(".b");
+ fprintf(fp, ".b");
switch (sampler->type) {
- case ppir_codegen_sampler_type_2d:
- printf(".2d");
+ case ppir_codegen_sampler_type_generic:
break;
case ppir_codegen_sampler_type_cube:
- printf(".cube");
+ fprintf(fp, ".cube");
break;
default:
- printf("_t%u", sampler->type);
+ fprintf(fp, "_t%u", sampler->type);
break;
}
- printf(" %u", sampler->index);
+ fprintf(fp, " %u", sampler->index);
if (sampler->offset_en)
{
- printf("+");
- print_source_scalar(sampler->index_offset, NULL, false, false);
+ fprintf(fp, "+");
+ print_source_scalar(sampler->index_offset, NULL, false, false, fp);
}
if (sampler->lod_bias_en)
{
- printf(" ");
- print_source_scalar(sampler->lod_bias, NULL, false, false);
+ fprintf(fp, " ");
+ print_source_scalar(sampler->lod_bias, NULL, false, false, fp);
}
}
static void
-print_uniform(void *code, unsigned offset)
+print_uniform(void *code, unsigned offset, FILE *fp)
{
(void) offset;
ppir_codegen_field_uniform *uniform = code;
- printf("load.");
+ fprintf(fp, "load.");
switch (uniform->source) {
case ppir_codegen_uniform_src_uniform:
- printf("u");
+ fprintf(fp, "u");
break;
case ppir_codegen_uniform_src_temporary:
- printf("t");
+ fprintf(fp, "t");
break;
default:
- printf(".u%u", uniform->source);
+ fprintf(fp, ".u%u", uniform->source);
break;
}
int16_t index = uniform->index;
switch (uniform->alignment) {
case 2:
- printf(" %d", index);
+ fprintf(fp, " %d", index);
break;
case 1:
- printf(" %d.%s", index / 2, (index & 1) ? "zw" : "xy");
+ fprintf(fp, " %d.%s", index / 2, (index & 1) ? "zw" : "xy");
break;
default:
- printf(" %d.%c", index / 4, "xyzw"[index & 3]);
+ fprintf(fp, " %d.%c", index / 4, "xyzw"[index & 3]);
break;
}
if (uniform->offset_en) {
- printf("+");
- print_source_scalar(uniform->offset_reg, NULL, false, false);
+ fprintf(fp, "+");
+ print_source_scalar(uniform->offset_reg, NULL, false, false, fp);
}
}
@@ -377,7 +376,7 @@ static const asm_op vec4_mul_ops[] = {
#undef CASE
static void
-print_vec4_mul(void *code, unsigned offset)
+print_vec4_mul(void *code, unsigned offset, FILE *fp)
{
(void) offset;
ppir_codegen_field_vec4_mul *vec4_mul = code;
@@ -385,34 +384,34 @@ print_vec4_mul(void *code, unsigned offset)
asm_op op = vec4_mul_ops[vec4_mul->op];
if (op.name)
- printf("%s", op.name);
+ fprintf(fp, "%s", op.name);
else
- printf("op%u", vec4_mul->op);
- print_outmod(vec4_mul->dest_modifier);
- printf(".v0 ");
+ fprintf(fp, "op%u", vec4_mul->op);
+ print_outmod(vec4_mul->dest_modifier, fp);
+ fprintf(fp, ".v0 ");
if (vec4_mul->mask) {
- printf("$%u", vec4_mul->dest);
- print_mask(vec4_mul->mask);
- printf(" ");
+ fprintf(fp, "$%u", vec4_mul->dest);
+ print_mask(vec4_mul->mask, fp);
+ fprintf(fp, " ");
}
print_vector_source(vec4_mul->arg0_source, NULL,
vec4_mul->arg0_swizzle,
vec4_mul->arg0_absolute,
- vec4_mul->arg0_negate);
+ vec4_mul->arg0_negate, fp);
if (vec4_mul->op < 8 && vec4_mul->op != 0) {
- printf("<<%u", vec4_mul->op);
+ fprintf(fp, "<<%u", vec4_mul->op);
}
- printf(" ");
+ fprintf(fp, " ");
if (op.srcs > 1) {
print_vector_source(vec4_mul->arg1_source, NULL,
vec4_mul->arg1_swizzle,
vec4_mul->arg1_absolute,
- vec4_mul->arg1_negate);
+ vec4_mul->arg1_negate, fp);
}
}
@@ -444,7 +443,7 @@ static const asm_op vec4_acc_ops[] = {
#undef CASE
static void
-print_vec4_acc(void *code, unsigned offset)
+print_vec4_acc(void *code, unsigned offset, FILE *fp)
{
(void) offset;
ppir_codegen_field_vec4_acc *vec4_acc = code;
@@ -452,29 +451,29 @@ print_vec4_acc(void *code, unsigned offset)
asm_op op = vec4_acc_ops[vec4_acc->op];
if (op.name)
- printf("%s", op.name);
+ fprintf(fp, "%s", op.name);
else
- printf("op%u", vec4_acc->op);
- print_outmod(vec4_acc->dest_modifier);
- printf(".v1 ");
+ fprintf(fp, "op%u", vec4_acc->op);
+ print_outmod(vec4_acc->dest_modifier, fp);
+ fprintf(fp, ".v1 ");
if (vec4_acc->mask) {
- printf("$%u", vec4_acc->dest);
- print_mask(vec4_acc->mask);
- printf(" ");
+ fprintf(fp, "$%u", vec4_acc->dest);
+ print_mask(vec4_acc->mask, fp);
+ fprintf(fp, " ");
}
print_vector_source(vec4_acc->arg0_source, vec4_acc->mul_in ? "^v0" : NULL,
vec4_acc->arg0_swizzle,
vec4_acc->arg0_absolute,
- vec4_acc->arg0_negate);
+ vec4_acc->arg0_negate, fp);
if (op.srcs > 1) {
- printf(" ");
+ fprintf(fp, " ");
print_vector_source(vec4_acc->arg1_source, NULL,
vec4_acc->arg1_swizzle,
vec4_acc->arg1_absolute,
- vec4_acc->arg1_negate);
+ vec4_acc->arg1_negate, fp);
}
}
@@ -505,7 +504,7 @@ static const asm_op float_mul_ops[] = {
#undef CASE
static void
-print_float_mul(void *code, unsigned offset)
+print_float_mul(void *code, unsigned offset, FILE *fp)
{
(void) offset;
ppir_codegen_field_float_mul *float_mul = code;
@@ -513,29 +512,29 @@ print_float_mul(void *code, unsigned offset)
asm_op op = float_mul_ops[float_mul->op];
if (op.name)
- printf("%s", op.name);
+ fprintf(fp, "%s", op.name);
else
- printf("op%u", float_mul->op);
- print_outmod(float_mul->dest_modifier);
- printf(".s0 ");
+ fprintf(fp, "op%u", float_mul->op);
+ print_outmod(float_mul->dest_modifier, fp);
+ fprintf(fp, ".s0 ");
if (float_mul->output_en)
- print_dest_scalar(float_mul->dest);
+ print_dest_scalar(float_mul->dest, fp);
print_source_scalar(float_mul->arg0_source, NULL,
float_mul->arg0_absolute,
- float_mul->arg0_negate);
+ float_mul->arg0_negate, fp);
if (float_mul->op < 8 && float_mul->op != 0) {
- printf("<<%u", float_mul->op);
+ fprintf(fp, "<<%u", float_mul->op);
}
if (op.srcs > 1) {
- printf(" ");
+ fprintf(fp, " ");
print_source_scalar(float_mul->arg1_source, NULL,
float_mul->arg1_absolute,
- float_mul->arg1_negate);
+ float_mul->arg1_negate, fp);
}
}
@@ -565,7 +564,7 @@ static const asm_op float_acc_ops[] = {
#undef CASE
static void
-print_float_acc(void *code, unsigned offset)
+print_float_acc(void *code, unsigned offset, FILE *fp)
{
(void) offset;
ppir_codegen_field_float_acc *float_acc = code;
@@ -573,24 +572,24 @@ print_float_acc(void *code, unsigned offset)
asm_op op = float_acc_ops[float_acc->op];
if (op.name)
- printf("%s", op.name);
+ fprintf(fp, "%s", op.name);
else
- printf("op%u", float_acc->op);
- print_outmod(float_acc->dest_modifier);
- printf(".s1 ");
+ fprintf(fp, "op%u", float_acc->op);
+ print_outmod(float_acc->dest_modifier, fp);
+ fprintf(fp, ".s1 ");
if (float_acc->output_en)
- print_dest_scalar(float_acc->dest);
+ print_dest_scalar(float_acc->dest, fp);
print_source_scalar(float_acc->arg0_source, float_acc->mul_in ? "^s0" : NULL,
float_acc->arg0_absolute,
- float_acc->arg0_negate);
+ float_acc->arg0_negate, fp);
if (op.srcs > 1) {
- printf(" ");
+ fprintf(fp, " ");
print_source_scalar(float_acc->arg1_source, NULL,
float_acc->arg1_absolute,
- float_acc->arg1_negate);
+ float_acc->arg1_negate, fp);
}
}
@@ -616,7 +615,7 @@ static const asm_op combine_ops[] = {
#undef CASE
static void
-print_combine(void *code, unsigned offset)
+print_combine(void *code, unsigned offset, FILE *fp)
{
(void) offset;
ppir_codegen_field_combine *combine = code;
@@ -626,105 +625,104 @@ print_combine(void *code, unsigned offset)
/* This particular combination can only be valid for scalar * vector
* multiplies, and the opcode field is reused for something else.
*/
- printf("mul");
+ fprintf(fp, "mul");
} else {
asm_op op = combine_ops[combine->scalar.op];
if (op.name)
- printf("%s", op.name);
+ fprintf(fp, "%s", op.name);
else
- printf("op%u", combine->scalar.op);
+ fprintf(fp, "op%u", combine->scalar.op);
}
if (!combine->scalar.dest_vec)
- print_outmod(combine->scalar.dest_modifier);
- printf(".s2 ");
+ print_outmod(combine->scalar.dest_modifier, fp);
+ fprintf(fp, ".s2 ");
if (combine->scalar.dest_vec) {
- printf("$%u", combine->vector.dest);
- print_mask(combine->vector.mask);
+ fprintf(fp, "$%u", combine->vector.dest);
+ print_mask(combine->vector.mask, fp);
} else {
- print_dest_scalar(combine->scalar.dest);
+ print_dest_scalar(combine->scalar.dest, fp);
}
- printf(" ");
+ fprintf(fp, " ");
print_source_scalar(combine->scalar.arg0_src, NULL,
combine->scalar.arg0_absolute,
- combine->scalar.arg0_negate);
- printf(" ");
+ combine->scalar.arg0_negate, fp);
+ fprintf(fp, " ");
if (combine->scalar.arg1_en) {
if (combine->scalar.dest_vec) {
print_vector_source(combine->vector.arg1_source, NULL,
combine->vector.arg1_swizzle,
- false, false);
+ false, false, fp);
} else {
print_source_scalar(combine->scalar.arg1_src, NULL,
combine->scalar.arg1_absolute,
- combine->scalar.arg1_negate);
+ combine->scalar.arg1_negate, fp);
}
}
}
static void
-print_temp_write(void *code, unsigned offset)
+print_temp_write(void *code, unsigned offset, FILE *fp)
{
(void) offset;
ppir_codegen_field_temp_write *temp_write = code;
if (temp_write->fb_read.unknown_0 == 0x7) {
if (temp_write->fb_read.source)
- printf("fb_color");
+ fprintf(fp, "fb_color");
else
- printf("fb_depth");
- printf(" $%u", temp_write->fb_read.dest);
+ fprintf(fp, "fb_depth");
+ fprintf(fp, " $%u", temp_write->fb_read.dest);
return;
}
- printf("store.t");
+ fprintf(fp, "store.t");
int16_t index = temp_write->temp_write.index;
switch (temp_write->temp_write.alignment) {
case 2:
- printf(" %d", index);
+ fprintf(fp, " %d", index);
break;
case 1:
- printf(" %d.%s", index / 2, (index & 1) ? "zw" : "xy");
+ fprintf(fp, " %d.%s", index / 2, (index & 1) ? "zw" : "xy");
break;
default:
- printf(" %d.%c", index / 4, "xyzw"[index & 3]);
+ fprintf(fp, " %d.%c", index / 4, "xyzw"[index & 3]);
break;
}
if (temp_write->temp_write.offset_en) {
- printf("+");
+ fprintf(fp, "+");
print_source_scalar(temp_write->temp_write.offset_reg,
- NULL, false, false);
+ NULL, false, false, fp);
}
- printf(" ");
+ fprintf(fp, " ");
if (temp_write->temp_write.alignment) {
- print_reg(temp_write->temp_write.source >> 2, NULL);
+ print_reg(temp_write->temp_write.source >> 2, NULL, fp);
} else {
- print_source_scalar(temp_write->temp_write.source, NULL, false, false);
+ print_source_scalar(temp_write->temp_write.source, NULL, false, false, fp);
}
}
static void
-print_branch(void *code, unsigned offset)
-{
+print_branch(void *code, unsigned offset, FILE *fp)
+{
ppir_codegen_field_branch *branch = code;
if (branch->discard.word0 == PPIR_CODEGEN_DISCARD_WORD0 &&
branch->discard.word1 == PPIR_CODEGEN_DISCARD_WORD1 &&
branch->discard.word2 == PPIR_CODEGEN_DISCARD_WORD2) {
- printf("discard");
+ fprintf(fp, "discard");
return;
}
-
const char* cond[] = {
"nv", "lt", "eq", "le",
"gt", "ne", "ge", "" ,
@@ -734,18 +732,18 @@ print_branch(void *code, unsigned offset)
cond_mask |= (branch->branch.cond_lt ? 1 : 0);
cond_mask |= (branch->branch.cond_eq ? 2 : 0);
cond_mask |= (branch->branch.cond_gt ? 4 : 0);
- printf("branch");
+ fprintf(fp, "branch");
if (cond_mask != 0x7) {
- printf(".%s ", cond[cond_mask]);
- print_source_scalar(branch->branch.arg0_source, NULL, false, false);
- printf(" ");
- print_source_scalar(branch->branch.arg1_source, NULL, false, false);
+ fprintf(fp, ".%s ", cond[cond_mask]);
+ print_source_scalar(branch->branch.arg0_source, NULL, false, false, fp);
+ fprintf(fp, " ");
+ print_source_scalar(branch->branch.arg1_source, NULL, false, false, fp);
}
- printf(" %d", branch->branch.target + offset);
+ fprintf(fp, " %d", branch->branch.target + offset);
}
-typedef void (*print_field_func)(void *, unsigned);
+typedef void (*print_field_func)(void *, unsigned, FILE *);
static const print_field_func print_field[ppir_codegen_field_shift_count] = {
[ppir_codegen_field_shift_varying] = print_varying,
@@ -767,29 +765,29 @@ static const int ppir_codegen_field_size[] = {
};
static void
-bitcopy(char *src, char *dst, unsigned bits, unsigned src_offset)
+bitcopy(unsigned char *src, unsigned char *dst, unsigned bits, unsigned src_offset)
{
src += src_offset / 8;
src_offset %= 8;
- for (int b = bits; b > 0; b -= 8, src++, dst++) {
- unsigned char out = ((unsigned char) *src) >> src_offset;
+ for (unsigned b = bits; b > 0; b -= MIN2(b, 8), src++, dst++) {
+ unsigned char out = *src >> src_offset;
if (src_offset > 0 && src_offset + b > 8)
- out |= ((unsigned char) *(src + 1)) << (8 - src_offset);
- *dst = (char) out;
+ out |= *(src + 1) << (8 - src_offset);
+ *dst = out;
}
}
void
-ppir_disassemble_instr(uint32_t *instr, unsigned offset)
+ppir_disassemble_instr(uint32_t *instr, unsigned offset, FILE *fp)
{
ppir_codegen_ctrl *ctrl = (ppir_codegen_ctrl *) instr;
- char *instr_code = (char *) (instr + 1);
+ unsigned char *instr_code = (unsigned char *) (instr + 1);
unsigned bit_offset = 0;
bool first = true;
for (unsigned i = 0; i < ppir_codegen_field_shift_count; i++) {
- char code[12];
+ unsigned char code[12];
if (!((ctrl->fields >> i) & 1))
continue;
@@ -800,18 +798,18 @@ ppir_disassemble_instr(uint32_t *instr, unsigned offset)
if (first)
first = false;
else
- printf(", ");
+ fprintf(fp, ", ");
- print_field[i](code, offset);
+ print_field[i](code, offset, fp);
bit_offset += bits;
}
if (ctrl->sync)
- printf(", sync");
+ fprintf(fp, ", sync");
if (ctrl->stop)
- printf(", stop");
+ fprintf(fp, ", stop");
- printf("\n");
+ fprintf(fp, "\n");
}
diff --git a/src/gallium/drivers/lima/ir/pp/instr.c b/src/gallium/drivers/lima/ir/pp/instr.c
index 8e1bc95158d..707055c48ac 100644
--- a/src/gallium/drivers/lima/ir/pp/instr.c
+++ b/src/gallium/drivers/lima/ir/pp/instr.c
@@ -186,18 +186,17 @@ bool ppir_instr_insert_node(ppir_instr *instr, ppir_node *node)
uint8_t swizzle[4] = {0};
if (ppir_instr_insert_const(&ic, nc, swizzle)) {
+ instr->constant[i] = ic;
ppir_node *succ = ppir_node_first_succ(node);
- ppir_src *src = NULL;
for (int s = 0; s < ppir_node_get_src_num(succ); s++) {
- src = ppir_node_get_src(succ, s);
- if (src->node == node)
- break;
- }
- assert(src->node == node);
+ ppir_src *src = ppir_node_get_src(succ, s);
+ assert(src);
+ if (src->node != node)
+ continue;
- instr->constant[i] = ic;
- ppir_update_src_pipeline(ppir_pipeline_reg_const0 + i, src,
- &c->dest, swizzle);
+ ppir_update_src_pipeline(ppir_pipeline_reg_const0 + i, src,
+ &c->dest, swizzle);
+ }
break;
}
}
@@ -284,7 +283,7 @@ void ppir_instr_print_list(ppir_compiler *comp)
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
printf("-------block %3d-------\n", block->index);
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
- printf("%c%03d: ", instr->is_end ? '*' : ' ', instr->index);
+ printf("%c%03d: ", instr->stop ? '*' : ' ', instr->index);
for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
ppir_node *node = instr->slots[i];
if (node)
diff --git a/src/gallium/drivers/lima/ir/pp/liveness.c b/src/gallium/drivers/lima/ir/pp/liveness.c
index 1799a53b165..02faa423b87 100644
--- a/src/gallium/drivers/lima/ir/pp/liveness.c
+++ b/src/gallium/drivers/lima/ir/pp/liveness.c
@@ -121,7 +121,7 @@ ppir_liveness_instr_srcs(ppir_compiler *comp, ppir_instr *instr)
/* Update the liveness information of the instruction by removing its
* dests from the live_in set. */
static void
-ppir_liveness_instr_dest(ppir_compiler *comp, ppir_instr *instr)
+ppir_liveness_instr_dest(ppir_compiler *comp, ppir_instr *instr, ppir_instr *last)
{
for (int i = PPIR_INSTR_SLOT_NUM-1; i >= 0; i--) {
ppir_node *node = instr->slots[i];
@@ -146,9 +146,18 @@ ppir_liveness_instr_dest(ppir_compiler *comp, ppir_instr *instr)
unsigned int index = reg->regalloc_index;
bool live = BITSET_TEST(instr->live_set, index);
+ /* If it's an out reg, it's alive till the end of the block, so add it
+ * to live_set of the last instruction */
+ if (!live && reg->out_reg && (instr != last)) {
+ BITSET_SET(last->live_set, index);
+ BITSET_CLEAR(instr->live_set, index);
+ continue;
+ }
+
/* If a register is written but wasn't read in a later instruction, it is
- * either dead code or a bug. For now, assign an interference to it to
- * ensure it doesn't get assigned a live register and overwrites it. */
+ * either an output register in last instruction, dead code or a bug.
+ * For now, assign an interference to it to ensure it doesn't get assigned
+ * a live register and overwrites it. */
if (!live) {
BITSET_SET(instr->live_internal, index);
continue;
@@ -224,13 +233,13 @@ ppir_liveness_compute_live_sets(ppir_compiler *comp)
}
}
else {
- ppir_instr *next_instr = LIST_ENTRY(ppir_instr, instr->list.next, list);
+ ppir_instr *next_instr = list_entry(instr->list.next, ppir_instr, list);
ppir_liveness_propagate(comp,
instr->live_set, next_instr->live_set,
instr->live_mask, next_instr->live_mask);
}
- ppir_liveness_instr_dest(comp, instr);
+ ppir_liveness_instr_dest(comp, instr, last);
ppir_liveness_instr_srcs(comp, instr);
cont |= !ppir_liveness_set_equal(comp,
diff --git a/src/gallium/drivers/lima/ir/pp/lower.c b/src/gallium/drivers/lima/ir/pp/lower.c
index deed1c7f2c9..ecc19b79c2c 100644
--- a/src/gallium/drivers/lima/ir/pp/lower.c
+++ b/src/gallium/drivers/lima/ir/pp/lower.c
@@ -98,7 +98,8 @@ static bool ppir_lower_swap_args(ppir_block *block, ppir_node *node)
static bool ppir_lower_load(ppir_block *block, ppir_node *node)
{
ppir_dest *dest = ppir_node_get_dest(node);
- if (ppir_node_is_root(node) && dest->type == ppir_target_ssa) {
+ if (ppir_node_is_root(node) && !node->succ_different_block &&
+ dest->type == ppir_target_ssa) {
ppir_node_delete(node);
return true;
}
@@ -107,7 +108,8 @@ static bool ppir_lower_load(ppir_block *block, ppir_node *node)
* that has load node in source
*/
if ((ppir_node_has_single_src_succ(node) || ppir_node_is_root(node)) &&
- dest->type != ppir_target_register) {
+ !node->succ_different_block &&
+ dest->type != ppir_target_register) {
ppir_node *succ = ppir_node_first_succ(node);
switch (succ->type) {
case ppir_node_type_alu:
@@ -322,6 +324,98 @@ static bool ppir_lower_sat(ppir_block *block, ppir_node *node)
return true;
}
+static bool ppir_lower_branch_merge_condition(ppir_block *block, ppir_node *node)
+{
+ /* Check if we can merge a condition with a branch instruction,
+ * removing the need for a select instruction */
+ assert(node->type == ppir_node_type_branch);
+
+ if (!ppir_node_has_single_pred(node))
+ return false;
+
+ ppir_node *pred = ppir_node_first_pred(node);
+ assert(pred);
+
+ if (pred->type != ppir_node_type_alu)
+ return false;
+
+ switch (pred->op)
+ {
+ case ppir_op_lt:
+ case ppir_op_gt:
+ case ppir_op_le:
+ case ppir_op_ge:
+ case ppir_op_eq:
+ case ppir_op_ne:
+ break;
+ default:
+ return false;
+ }
+
+ ppir_dest *dest = ppir_node_get_dest(pred);
+ if (!ppir_node_has_single_succ(pred) || dest->type != ppir_target_ssa)
+ return false;
+
+ ppir_alu_node *cond = ppir_node_to_alu(pred);
+ /* branch can't reference pipeline registers */
+ if (cond->src[0].type == ppir_target_pipeline ||
+ cond->src[1].type == ppir_target_pipeline)
+ return false;
+
+ /* branch can't use flags */
+ if (cond->src[0].negate || cond->src[0].absolute ||
+ cond->src[1].negate || cond->src[1].absolute)
+ return false;
+
+ /* at this point, it can be successfully be replaced. */
+ ppir_branch_node *branch = ppir_node_to_branch(node);
+ switch (pred->op)
+ {
+ case ppir_op_le:
+ branch->cond_gt = true;
+ break;
+ case ppir_op_lt:
+ branch->cond_eq = true;
+ branch->cond_gt = true;
+ break;
+ case ppir_op_ge:
+ branch->cond_lt = true;
+ break;
+ case ppir_op_gt:
+ branch->cond_eq = true;
+ branch->cond_lt = true;
+ break;
+ case ppir_op_eq:
+ branch->cond_lt = true;
+ branch->cond_gt = true;
+ break;
+ case ppir_op_ne:
+ branch->cond_eq = true;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ assert(cond->num_src == 2);
+
+ branch->num_src = 2;
+ branch->src[0] = cond->src[0];
+ branch->src[1] = cond->src[1];
+
+ /* for all nodes before the condition */
+ ppir_node_foreach_pred_safe(pred, dep) {
+ /* insert the branch node as successor */
+ ppir_node *p = dep->pred;
+ ppir_node_remove_dep(dep);
+ ppir_node_add_dep(node, p, ppir_dep_src);
+ }
+
+ ppir_node_delete(pred);
+
+ return true;
+}
+
static bool ppir_lower_branch(ppir_block *block, ppir_node *node)
{
ppir_branch_node *branch = ppir_node_to_branch(node);
@@ -330,6 +424,12 @@ static bool ppir_lower_branch(ppir_block *block, ppir_node *node)
if (branch->num_src == 0)
return true;
+ /* Check if we can merge a condition with the branch */
+ if (ppir_lower_branch_merge_condition(block, node))
+ return true;
+
+ /* If the condition cannot be merged, fall back to a
+ * comparison against zero */
ppir_const_node *zero = ppir_node_create(block, ppir_op_const, -1, 0);
if (!zero)
@@ -342,11 +442,6 @@ static bool ppir_lower_branch(ppir_block *block, ppir_node *node)
zero->dest.ssa.num_components = 1;
zero->dest.write_mask = 0x01;
- /* For now we're just comparing branch condition with 0,
- * in future we should look whether it's possible to move
- * comparision node into branch itself and use current
- * way as a fallback for complex conditions.
- */
ppir_node_target_assign(&branch->src[1], &zero->node);
if (branch->negate)
diff --git a/src/gallium/drivers/lima/ir/pp/nir.c b/src/gallium/drivers/lima/ir/pp/nir.c
index 5d2d2282233..517ec628b41 100644
--- a/src/gallium/drivers/lima/ir/pp/nir.c
+++ b/src/gallium/drivers/lima/ir/pp/nir.c
@@ -29,11 +29,12 @@
#include "util/bitscan.h"
#include "compiler/nir/nir.h"
#include "pipe/p_state.h"
+#include "nir_legacy.h"
#include "ppir.h"
-static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)
+static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_def *ssa)
{
ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
if (!node)
@@ -52,16 +53,16 @@ static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ss
}
static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
- nir_register *reg, unsigned mask)
+ nir_def *def, unsigned mask)
{
- ppir_node *node = ppir_node_create(block, op, reg->index, mask);
+ ppir_node *node = ppir_node_create(block, op, def->index, mask);
if (!node)
return NULL;
ppir_dest *dest = ppir_node_get_dest(node);
list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
- if (r->index == reg->index) {
+ if (r->index == def->index) {
dest->reg = r;
break;
}
@@ -78,22 +79,22 @@ static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
}
static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
- nir_dest *dest, unsigned mask)
+ nir_legacy_dest *dest, unsigned mask)
{
unsigned index = -1;
if (dest) {
if (dest->is_ssa)
- return ppir_node_create_ssa(block, op, &dest->ssa);
+ return ppir_node_create_ssa(block, op, dest->ssa);
else
- return ppir_node_create_reg(block, op, dest->reg.reg, mask);
+ return ppir_node_create_reg(block, op, dest->reg.handle, mask);
}
return ppir_node_create(block, op, index, 0);
}
static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
- ppir_src *ps, nir_src *ns, unsigned mask)
+ ppir_src *ps, nir_legacy_src *ns, unsigned mask)
{
ppir_node *child = NULL;
@@ -103,15 +104,15 @@ static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
ppir_node_add_dep(node, child, ppir_dep_src);
}
else {
- nir_register *reg = ns->reg.reg;
+ nir_reg_src *rs = &ns->reg;
while (mask) {
int swizzle = ps->swizzle[u_bit_scan(&mask)];
- child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
+ child = comp->var_nodes[(rs->handle->index << 2) + swizzle];
/* Reg is read before it was written, create a dummy node for it */
if (!child) {
- child = ppir_node_create_reg(node->block, ppir_op_dummy, reg,
+ child = ppir_node_create_reg(node->block, ppir_op_dummy, rs->handle,
u_bit_consecutive(0, 4));
- comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle] = child;
+ comp->var_nodes[(rs->handle->index << 2) + swizzle] = child;
}
/* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
if (child && node != child && child->op != ppir_op_dummy)
@@ -119,13 +120,11 @@ static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
}
}
+ assert(child);
ppir_node_target_assign(ps, child);
}
static int nir_to_ppir_opcodes[nir_num_opcodes] = {
- /* not supported */
- [0 ... nir_last_opcode] = -1,
-
[nir_op_mov] = ppir_op_mov,
[nir_op_fmul] = ppir_op_mul,
[nir_op_fabs] = ppir_op_abs,
@@ -160,21 +159,38 @@ static int nir_to_ppir_opcodes[nir_num_opcodes] = {
static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)
{
nir_alu_instr *instr = nir_instr_as_alu(ni);
+ nir_def *def = &instr->def;
int op = nir_to_ppir_opcodes[instr->op];
- if (op < 0) {
+ if (op == ppir_op_unsupported) {
ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
return false;
}
+ nir_legacy_alu_dest legacy_dest = nir_legacy_chase_alu_dest(def);
+
+ /* Don't try to translate folded fsat since their source won't be valid */
+ if (instr->op == nir_op_fsat && nir_legacy_fsat_folds(instr))
+ return true;
- ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,
- instr->dest.write_mask);
+ /* Skip folded fabs/fneg since we do not have dead code elimination */
+ if ((instr->op == nir_op_fabs || instr->op == nir_op_fneg) &&
+ nir_legacy_float_mod_folds(instr)) {
+ /* Add parent node as a the folded def node to keep
+ * the dependency chain */
+ nir_alu_src *ns = &instr->src[0];
+ ppir_node *parent = block->comp->var_nodes[ns->src.ssa->index];
+ assert(parent);
+ block->comp->var_nodes[def->index] = parent;
+ return true;
+ }
+
+ ppir_alu_node *node = ppir_node_create_dest(block, op, &legacy_dest.dest,
+ legacy_dest.write_mask);
if (!node)
return false;
ppir_dest *pd = &node->dest;
- nir_alu_dest *nd = &instr->dest;
- if (nd->saturate)
+ if (legacy_dest.fsat)
pd->modifier = ppir_outmod_clamp_fraction;
unsigned src_mask;
@@ -194,13 +210,13 @@ static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)
node->num_src = num_child;
for (int i = 0; i < num_child; i++) {
- nir_alu_src *ns = instr->src + i;
+ nir_legacy_alu_src ns = nir_legacy_chase_alu_src(instr->src + i, true);
ppir_src *ps = node->src + i;
- memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));
- ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);
+ memcpy(ps->swizzle, ns.swizzle, sizeof(ps->swizzle));
+ ppir_node_add_src(block->comp, &node->node, ps, &ns.src, src_mask);
- ps->absolute = ns->abs;
- ps->negate = ns->negate;
+ ps->absolute = ns.fabs;
+ ps->negate = ns.fneg;
}
list_addtail(&node->node.list, &block->node_list);
@@ -244,8 +260,9 @@ static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
branch = ppir_node_to_branch(node);
/* second src and condition will be updated during lowering */
+ nir_legacy_src legacy_src = nir_legacy_chase_src(&instr->src[0]);
ppir_node_add_src(block->comp, node, &branch->src[0],
- &instr->src[0], u_bit_consecutive(0, instr->num_components));
+ &legacy_src, u_bit_consecutive(0, instr->num_components));
branch->num_src = 1;
branch->target = comp->discard_block;
@@ -268,11 +285,22 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
ppir_alu_node *alu_node;
switch (instr->intrinsic) {
- case nir_intrinsic_load_input:
- if (!instr->dest.is_ssa)
- mask = u_bit_consecutive(0, instr->num_components);
+ case nir_intrinsic_decl_reg:
+ case nir_intrinsic_store_reg:
+ /* Nothing to do for these */
+ return true;
+
+ case nir_intrinsic_load_reg: {
+ nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def);
+ lnode = ppir_node_create_dest(block, ppir_op_dummy, &legacy_dest, mask);
+ return true;
+ }
+
+ case nir_intrinsic_load_input: {
+ mask = u_bit_consecutive(0, instr->num_components);
- lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);
+ nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def);
+ lnode = ppir_node_create_dest(block, ppir_op_load_varying, &legacy_dest, mask);
if (!lnode)
return false;
@@ -282,16 +310,17 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4);
else {
lnode->num_src = 1;
- ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
+ nir_legacy_src legacy_src = nir_legacy_chase_src(instr->src);
+ ppir_node_add_src(block->comp, &lnode->node, &lnode->src, &legacy_src, 1);
}
list_addtail(&lnode->node.list, &block->node_list);
return true;
+ }
case nir_intrinsic_load_frag_coord:
case nir_intrinsic_load_point_coord:
- case nir_intrinsic_load_front_face:
- if (!instr->dest.is_ssa)
- mask = u_bit_consecutive(0, instr->num_components);
+ case nir_intrinsic_load_front_face: {
+ mask = u_bit_consecutive(0, instr->num_components);
ppir_op op;
switch (instr->intrinsic) {
@@ -309,19 +338,21 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
break;
}
- lnode = ppir_node_create_dest(block, op, &instr->dest, mask);
+ nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def);
+ lnode = ppir_node_create_dest(block, op, &legacy_dest, mask);
if (!lnode)
return false;
lnode->num_components = instr->num_components;
list_addtail(&lnode->node.list, &block->node_list);
return true;
+ }
- case nir_intrinsic_load_uniform:
- if (!instr->dest.is_ssa)
- mask = u_bit_consecutive(0, instr->num_components);
+ case nir_intrinsic_load_uniform: {
+ mask = u_bit_consecutive(0, instr->num_components);
- lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);
+ nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def);
+ lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &legacy_dest, mask);
if (!lnode)
return false;
@@ -331,11 +362,13 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
else {
lnode->num_src = 1;
- ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
+ nir_legacy_src legacy_src = nir_legacy_chase_src(instr->src);
+ ppir_node_add_src(block->comp, &lnode->node, &lnode->src, &legacy_src, 1);
}
list_addtail(&lnode->node.list, &block->node_list);
return true;
+ }
case nir_intrinsic_store_output: {
/* In simple cases where the store_output is ssa, that register
@@ -345,16 +378,36 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
* back to inserting a mov at the end.
* If the source node will only be able to output to pipeline
* registers, fall back to the mov as well. */
- if (!block->comp->uses_discard && instr->src->is_ssa) {
+ assert(nir_src_is_const(instr->src[1]) &&
+ "lima doesn't support indirect outputs");
+
+ nir_io_semantics io = nir_intrinsic_io_semantics(instr);
+ unsigned offset = nir_src_as_uint(instr->src[1]);
+ unsigned slot = io.location + offset;
+ ppir_output_type out_type = ppir_nir_output_to_ppir(slot,
+ block->comp->dual_source_blend ? io.dual_source_blend_index : 0);
+ if (out_type == ppir_output_invalid) {
+ ppir_debug("Unsupported output type: %d\n", slot);
+ return false;
+ }
+
+ if (!block->comp->uses_discard) {
node = block->comp->var_nodes[instr->src->ssa->index];
+ assert(node);
switch (node->op) {
case ppir_op_load_uniform:
case ppir_op_load_texture:
+ case ppir_op_dummy:
case ppir_op_const:
break;
- default:
- node->is_end = 1;
+ default: {
+ ppir_dest *dest = ppir_node_get_dest(node);
+ dest->ssa.out_type = out_type;
+ dest->ssa.num_components = 4;
+ dest->write_mask = u_bit_consecutive(0, 4);
+ node->is_out = 1;
return true;
+ }
}
}
@@ -364,19 +417,21 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
ppir_dest *dest = ppir_node_get_dest(&alu_node->node);
dest->type = ppir_target_ssa;
- dest->ssa.num_components = instr->num_components;
+ dest->ssa.num_components = 4;
dest->ssa.index = 0;
- dest->write_mask = u_bit_consecutive(0, instr->num_components);
+ dest->write_mask = u_bit_consecutive(0, 4);
+ dest->ssa.out_type = out_type;
alu_node->num_src = 1;
for (int i = 0; i < instr->num_components; i++)
alu_node->src[0].swizzle[i] = i;
- ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
- u_bit_consecutive(0, instr->num_components));
+ nir_legacy_src legacy_src = nir_legacy_chase_src(instr->src);
+ ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, &legacy_src,
+ u_bit_consecutive(0, 4));
- alu_node->node.is_end = 1;
+ alu_node->node.is_out = 1;
list_addtail(&alu_node->node.list, &block->node_list);
return true;
@@ -418,7 +473,7 @@ static bool ppir_emit_load_const(ppir_block *block, nir_instr *ni)
static bool ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
{
- nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(ni);
+ nir_undef_instr *undef = nir_instr_as_undef(ni);
ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def);
if (!node)
return false;
@@ -447,7 +502,9 @@ static bool ppir_emit_tex(ppir_block *block, nir_instr *ni)
}
switch (instr->sampler_dim) {
+ case GLSL_SAMPLER_DIM_1D:
case GLSL_SAMPLER_DIM_2D:
+ case GLSL_SAMPLER_DIM_3D:
case GLSL_SAMPLER_DIM_CUBE:
case GLSL_SAMPLER_DIM_RECT:
case GLSL_SAMPLER_DIM_EXTERNAL:
@@ -460,10 +517,10 @@ static bool ppir_emit_tex(ppir_block *block, nir_instr *ni)
/* emit ld_tex node */
unsigned mask = 0;
- if (!instr->dest.is_ssa)
- mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
+ mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
- node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask);
+ nir_legacy_dest legacy_dest = nir_legacy_chase_dest(&instr->def);
+ node = ppir_node_create_dest(block, ppir_op_load_texture, &legacy_dest, mask);
if (!node)
return false;
@@ -473,23 +530,28 @@ static bool ppir_emit_tex(ppir_block *block, nir_instr *ni)
for (int i = 0; i < instr->coord_components; i++)
node->src[0].swizzle[i] = i;
+ bool perspective = false;
+
for (int i = 0; i < instr->num_srcs; i++) {
switch (instr->src[i].src_type) {
+ case nir_tex_src_backend1:
+ perspective = true;
+ FALLTHROUGH;
case nir_tex_src_coord: {
nir_src *ns = &instr->src[i].src;
- if (ns->is_ssa) {
- ppir_node *child = block->comp->var_nodes[ns->ssa->index];
- if (child->op == ppir_op_load_varying) {
- /* If the successor is load_texture, promote it to load_coords */
- nir_tex_src *nts = (nir_tex_src *)ns;
- if (nts->src_type == nir_tex_src_coord)
- child->op = ppir_op_load_coords;
- }
+ ppir_node *child = block->comp->var_nodes[ns->ssa->index];
+ if (child->op == ppir_op_load_varying) {
+ /* If the successor is load_texture, promote it to load_coords */
+ nir_tex_src *nts = (nir_tex_src *)ns;
+ if (nts->src_type == nir_tex_src_coord ||
+ nts->src_type == nir_tex_src_backend1)
+ child->op = ppir_op_load_coords;
}
/* src[0] is not used by the ld_tex instruction but ensures
* correct scheduling due to the pipeline dependency */
- ppir_node_add_src(block->comp, &node->node, &node->src[0], &instr->src[i].src,
+ nir_legacy_src legacy_src = nir_legacy_chase_src(&instr->src[i].src);
+ ppir_node_add_src(block->comp, &node->node, &node->src[0], &legacy_src,
u_bit_consecutive(0, instr->coord_components));
node->num_src++;
break;
@@ -498,7 +560,8 @@ static bool ppir_emit_tex(ppir_block *block, nir_instr *ni)
case nir_tex_src_lod:
node->lod_bias_en = true;
node->explicit_lod = (instr->src[i].src_type == nir_tex_src_lod);
- ppir_node_add_src(block->comp, &node->node, &node->src[1], &instr->src[i].src, 1);
+ nir_legacy_src legacy_src = nir_legacy_chase_src(&instr->src[i].src);
+ ppir_node_add_src(block->comp, &node->node, &node->src[1], &legacy_src, 1);
node->num_src++;
break;
default:
@@ -526,13 +589,10 @@ static bool ppir_emit_tex(ppir_block *block, nir_instr *ni)
load->src = node->src[0];
load->num_src = 1;
- if (node->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
- load->num_components = 3;
- else
- load->num_components = 2;
+ load->num_components = instr->coord_components;
ppir_debug("%s create load_coords node %d for %d\n",
- __FUNCTION__, load->index, node->node.index);
+ __func__, load->index, node->node.index);
ppir_node_foreach_pred_safe((&node->node), dep) {
ppir_node *pred = dep->pred;
@@ -543,6 +603,15 @@ static bool ppir_emit_tex(ppir_block *block, nir_instr *ni)
}
assert(load);
+
+ if (perspective) {
+ if (instr->coord_components == 3)
+ load->perspective = ppir_perspective_z;
+ else
+ load->perspective = ppir_perspective_w;
+ }
+
+ load->sampler_dim = instr->sampler_dim;
node->src[0].type = load->dest.type = ppir_target_pipeline;
node->src[0].pipeline = load->dest.pipeline = ppir_pipeline_reg_discard;
@@ -598,7 +667,7 @@ static bool (*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) =
[nir_instr_type_alu] = ppir_emit_alu,
[nir_instr_type_intrinsic] = ppir_emit_intrinsic,
[nir_instr_type_load_const] = ppir_emit_load_const,
- [nir_instr_type_ssa_undef] = ppir_emit_ssa_undef,
+ [nir_instr_type_undef] = ppir_emit_ssa_undef,
[nir_instr_type_tex] = ppir_emit_tex,
[nir_instr_type_jump] = ppir_emit_jump,
};
@@ -650,8 +719,9 @@ static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
if (!node)
return false;
else_branch = ppir_node_to_branch(node);
+ nir_legacy_src legacy_src = nir_legacy_chase_src(&if_stmt->condition);
ppir_node_add_src(block->comp, node, &else_branch->src[0],
- &if_stmt->condition, 1);
+ &legacy_src, 1);
else_branch->num_src = 1;
/* Negate condition to minimize branching. We're generating following:
* current_block: { ...; if (!statement) branch else_block; }
@@ -704,6 +774,7 @@ static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
{
+ assert(!nir_loop_has_continue_construct(nloop));
ppir_block *save_loop_cont_block = comp->loop_cont_block;
ppir_block *block;
ppir_branch_node *loop_branch;
@@ -769,10 +840,10 @@ static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
return true;
}
-static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
+static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_ssa)
{
ppir_compiler *comp = rzalloc_size(
- prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));
+ prog, sizeof(*comp) + (num_ssa << 2) * sizeof(ppir_node *));
if (!comp)
return NULL;
@@ -782,8 +853,8 @@ static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigne
comp->blocks = _mesa_hash_table_u64_create(prog);
comp->var_nodes = (ppir_node **)(comp + 1);
- comp->reg_base = num_ssa;
comp->prog = prog;
+
return comp;
}
@@ -819,7 +890,7 @@ static void ppir_add_ordering_deps(ppir_compiler *comp)
if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
}
- if (node->is_end ||
+ if (node->is_out ||
node->op == ppir_op_discard ||
node->op == ppir_op_store_temp ||
node->op == ppir_op_branch) {
@@ -830,7 +901,7 @@ static void ppir_add_ordering_deps(ppir_compiler *comp)
}
static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
- struct pipe_debug_callback *debug)
+ struct util_debug_callback *debug)
{
const struct shader_info *info = &nir->info;
char *shaderdb;
@@ -846,7 +917,7 @@ static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
if (lima_debug & LIMA_DEBUG_SHADERDB)
fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
- pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb);
+ util_debug_message(debug, SHADER_INFO, "%s", shaderdb);
free(shaderdb);
}
@@ -876,22 +947,20 @@ static void ppir_add_write_after_read_deps(ppir_compiler *comp)
bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *nir,
struct ra_regs *ra,
- struct pipe_debug_callback *debug)
+ struct util_debug_callback *debug)
{
nir_function_impl *func = nir_shader_get_entrypoint(nir);
- ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
+ ppir_compiler *comp = ppir_compiler_create(prog, func->ssa_alloc);
if (!comp)
return false;
comp->ra = ra;
comp->uses_discard = nir->info.fs.uses_discard;
+ comp->dual_source_blend = nir->info.fs.color_is_dual_source;
/* 1st pass: create ppir blocks */
- nir_foreach_function(function, nir) {
- if (!function->impl)
- continue;
-
- nir_foreach_block(nblock, function->impl) {
+ nir_foreach_function_impl(impl, nir) {
+ nir_foreach_block(nblock, impl) {
ppir_block *block = ppir_block_create(comp);
if (!block)
return false;
@@ -901,11 +970,8 @@ bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *n
}
/* 2nd pass: populate successors */
- nir_foreach_function(function, nir) {
- if (!function->impl)
- continue;
-
- nir_foreach_block(nblock, function->impl) {
+ nir_foreach_function_impl(impl, nir) {
+ nir_foreach_block(nblock, impl) {
ppir_block *block = ppir_get_block(comp, nblock);
assert(block);
@@ -916,26 +982,19 @@ bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *n
}
}
- /* Validate outputs, we support only gl_FragColor */
- nir_foreach_shader_out_variable(var, nir) {
- switch (var->data.location) {
- case FRAG_RESULT_COLOR:
- case FRAG_RESULT_DATA0:
- break;
- default:
- ppir_error("unsupported output type\n");
- goto err_out0;
- break;
- }
- }
+ comp->out_type_to_reg = rzalloc_size(comp, sizeof(int) * ppir_output_num);
+
+ /* -1 means reg is not written by the shader */
+ for (int i = 0; i < ppir_output_num; i++)
+ comp->out_type_to_reg[i] = -1;
- foreach_list_typed(nir_register, reg, node, &func->registers) {
+ nir_foreach_reg_decl(decl, func) {
ppir_reg *r = rzalloc(comp, ppir_reg);
if (!r)
return false;
- r->index = reg->index;
- r->num_components = reg->num_components;
+ r->index = decl->def.index;
+ r->num_components = nir_intrinsic_num_components(decl);
r->is_head = false;
list_addtail(&r->list, &comp->reg_list);
comp->reg_num++;
diff --git a/src/gallium/drivers/lima/ir/pp/node.c b/src/gallium/drivers/lima/ir/pp/node.c
index 99d025e2c05..e22a06ce5ee 100644
--- a/src/gallium/drivers/lima/ir/pp/node.c
+++ b/src/gallium/drivers/lima/ir/pp/node.c
@@ -29,6 +29,9 @@
#include "ppir.h"
const ppir_op_info ppir_op_infos[] = {
+ [ppir_op_unsupported] = {
+ .name = "unsupported",
+ },
[ppir_op_mov] = {
.name = "mov",
.slots = (int []) {
@@ -330,12 +333,14 @@ const ppir_op_info ppir_op_infos[] = {
.name = "undef",
.type = ppir_node_type_alu,
.slots = (int []) {
+ PPIR_INSTR_SLOT_END
},
},
[ppir_op_dummy] = {
.name = "dummy",
.type = ppir_node_type_alu,
.slots = (int []) {
+ PPIR_INSTR_SLOT_END
},
},
};
@@ -366,7 +371,7 @@ void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask)
if (mask) {
/* reg has 4 slots for each component write node */
while (mask)
- comp->var_nodes[(index << 2) + comp->reg_base + u_bit_scan(&mask)] = node;
+ comp->var_nodes[(index << 2) + u_bit_scan(&mask)] = node;
snprintf(node->name, sizeof(node->name), "reg%d", index);
} else {
comp->var_nodes[index] = node;
@@ -618,9 +623,9 @@ static ppir_node *ppir_node_insert_mov_local(ppir_node *node)
ppir_node_add_dep(move, node, ppir_dep_src);
list_addtail(&move->list, &node->list);
- if (node->is_end) {
- node->is_end = false;
- move->is_end = true;
+ if (node->is_out) {
+ node->is_out = false;
+ move->is_out = true;
}
return move;
diff --git a/src/gallium/drivers/lima/ir/pp/node_to_instr.c b/src/gallium/drivers/lima/ir/pp/node_to_instr.c
index a54be74ccfc..ffe8c5af89d 100644
--- a/src/gallium/drivers/lima/ir/pp/node_to_instr.c
+++ b/src/gallium/drivers/lima/ir/pp/node_to_instr.c
@@ -58,6 +58,18 @@ static bool ppir_do_node_to_instr_try_insert(ppir_block *block, ppir_node *node)
return ppir_instr_insert_node(succ->instr, node);
}
+ if (ppir_node_has_single_succ(node) &&
+ ppir_node_has_single_pred(ppir_node_first_succ(node)) &&
+ (ppir_node_first_succ(node)->type == ppir_node_type_branch)) {
+
+ assert(ppir_node_has_single_succ(node));
+ ppir_node *succ = ppir_node_first_succ(node);
+ assert(succ);
+ assert(succ->instr);
+
+ return ppir_instr_insert_node(succ->instr, node);
+ }
+
switch (node->type) {
case ppir_node_type_load:
break;
@@ -88,7 +100,8 @@ static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node)
* by using pipeline reg ^vmul/^fmul */
ppir_alu_node *alu = ppir_node_to_alu(node);
if (alu->dest.type == ppir_target_ssa &&
- ppir_node_has_single_succ(node)) {
+ ppir_node_has_single_succ(node) &&
+ ppir_node_has_single_src_succ(node)) {
ppir_node *succ = ppir_node_first_succ(node);
if (succ->instr_pos == PPIR_INSTR_SLOT_ALU_VEC_ADD) {
node->instr_pos = PPIR_INSTR_SLOT_ALU_VEC_MUL;
@@ -202,7 +215,7 @@ static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node)
case ppir_node_type_discard:
if (!create_new_instr(block, node))
return false;
- node->instr->is_end = true;
+ block->stop = true;
break;
case ppir_node_type_branch:
if (!create_new_instr(block, node))
@@ -275,8 +288,13 @@ static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *root)
if (!ppir_do_one_node_to_instr(block, node))
return false;
- if (node->is_end)
- node->instr->is_end = true;
+ /* The node writes output register. We can't stop at this exact
+ * instruction because there may be another node that writes another
+ * output, so set stop flag for the block. We will set stop flag on
+ * the last instruction of the block during codegen
+ */
+ if (node->is_out)
+ block->stop = true;
ppir_node_foreach_pred(node, dep) {
ppir_node *pred = dep->pred;
diff --git a/src/gallium/drivers/lima/ir/pp/ppir.h b/src/gallium/drivers/lima/ir/pp/ppir.h
index 480fca9e689..f9191a1c5d3 100644
--- a/src/gallium/drivers/lima/ir/pp/ppir.h
+++ b/src/gallium/drivers/lima/ir/pp/ppir.h
@@ -32,6 +32,7 @@
#include "ir/lima_ir.h"
typedef enum {
+ ppir_op_unsupported = 0,
ppir_op_mov,
ppir_op_abs,
ppir_op_neg,
@@ -161,7 +162,7 @@ typedef struct ppir_node {
struct ppir_instr *instr;
int instr_pos;
struct ppir_block *block;
- bool is_end;
+ bool is_out;
bool succ_different_block;
/* for scheduler */
@@ -179,9 +180,45 @@ typedef enum {
ppir_pipeline_reg_discard, /* varying load */
} ppir_pipeline;
+typedef enum {
+ ppir_output_color0,
+ ppir_output_color1,
+ ppir_output_depth,
+ ppir_output_num,
+ ppir_output_invalid = -1,
+} ppir_output_type;
+
+static inline const char *ppir_output_type_to_str(ppir_output_type type)
+{
+ switch (type) {
+ case ppir_output_color0:
+ return "OUTPUT_COLOR0";
+ case ppir_output_color1:
+ return "OUTPUT_COLOR1";
+ case ppir_output_depth:
+ return "OUTPUT_DEPTH";
+ default:
+ return "INVALID";
+ }
+}
+
+static inline ppir_output_type ppir_nir_output_to_ppir(gl_frag_result res, int dual_src_index)
+{
+ switch (res) {
+ case FRAG_RESULT_COLOR:
+ case FRAG_RESULT_DATA0:
+ return ppir_output_color0 + dual_src_index;
+ case FRAG_RESULT_DEPTH:
+ return ppir_output_depth;
+ default:
+ return ppir_output_invalid;
+ }
+}
+
typedef struct ppir_reg {
struct list_head list;
int index;
+ ppir_output_type out_type;
int regalloc_index;
int num_components;
@@ -191,6 +228,7 @@ typedef struct ppir_reg {
bool is_head;
bool spilled;
bool undef;
+ bool out_reg;
} ppir_reg;
typedef enum {
@@ -252,6 +290,12 @@ typedef struct {
ppir_dest dest;
} ppir_const_node;
+typedef enum {
+ ppir_perspective_none = 0,
+ ppir_perspective_z,
+ ppir_perspective_w,
+} ppir_perspective;
+
typedef struct {
ppir_node node;
int index;
@@ -259,6 +303,8 @@ typedef struct {
ppir_dest dest;
ppir_src src;
int num_src;
+ ppir_perspective perspective;
+ int sampler_dim;
} ppir_load_node;
typedef struct {
@@ -308,7 +354,7 @@ typedef struct ppir_instr {
ppir_node *slots[PPIR_INSTR_SLOT_NUM];
ppir_const constant[2];
- bool is_end;
+ bool stop;
/* for scheduler */
struct list_head succ_list;
@@ -332,6 +378,7 @@ typedef struct ppir_block {
struct list_head list;
struct list_head node_list;
struct list_head instr_list;
+ bool stop;
struct ppir_block *successors[2];
@@ -362,17 +409,18 @@ typedef struct ppir_compiler {
struct hash_table_u64 *blocks;
int cur_index;
int cur_instr_index;
+ int *out_type_to_reg;
struct list_head reg_list;
int reg_num;
/* array for searching ssa/reg node */
ppir_node **var_nodes;
- unsigned reg_base;
struct ra_regs *ra;
struct lima_fs_compiled_shader *prog;
bool uses_discard;
+ bool dual_source_blend;
/* for scheduler */
int sched_instr_base;
@@ -455,6 +503,7 @@ static inline ppir_node *ppir_node_first_pred(ppir_node *node)
static inline ppir_dest *ppir_node_get_dest(ppir_node *node)
{
+ assert(node);
switch (node->type) {
case ppir_node_type_alu:
return &ppir_node_to_alu(node)->dest;
@@ -471,6 +520,7 @@ static inline ppir_dest *ppir_node_get_dest(ppir_node *node)
static inline int ppir_node_get_src_num(ppir_node *node)
{
+ assert(node);
switch (node->type) {
case ppir_node_type_alu:
return ppir_node_to_alu(node)->num_src;
diff --git a/src/gallium/drivers/lima/ir/pp/regalloc.c b/src/gallium/drivers/lima/ir/pp/regalloc.c
index 3ea136b5660..e80d468313b 100644
--- a/src/gallium/drivers/lima/ir/pp/regalloc.c
+++ b/src/gallium/drivers/lima/ir/pp/regalloc.c
@@ -82,9 +82,6 @@ static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp)
{
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
list_for_each_entry(ppir_node, node, &block->node_list, list) {
- if (node->is_end)
- continue;
-
if (!node->instr || node->op == ppir_op_const)
continue;
@@ -94,6 +91,8 @@ static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp)
if (dest->type == ppir_target_ssa) {
reg = &dest->ssa;
+ if (node->is_out)
+ reg->out_reg = true;
list_addtail(&reg->list, &comp->reg_list);
comp->reg_num++;
}
@@ -133,6 +132,14 @@ static void ppir_regalloc_print_result(ppir_compiler *comp)
}
}
printf("--------------------------\n");
+
+ printf("======ppir output regs======\n");
+ for (int i = 0; i < ppir_output_num; i++) {
+ if (comp->out_type_to_reg[i] != -1)
+ printf("%s: $%d\n", ppir_output_type_to_str(i),
+ (int)comp->out_type_to_reg[i]);
+ }
+ printf("--------------------------\n");
}
static bool create_new_instr_after(ppir_block *block, ppir_instr *ref,
@@ -411,6 +418,7 @@ static ppir_reg *ppir_regalloc_choose_spill_node(ppir_compiler *comp,
* but not too much as to offset the num_components base cost. */
const float slot_scale = 1.1f;
+ memset(spill_costs, 0, sizeof(spill_costs[0]) * comp->reg_num);
list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
if (reg->spilled) {
/* not considered for spilling */
@@ -578,6 +586,11 @@ static bool ppir_regalloc_prog_try(ppir_compiler *comp, bool *spilled)
n = 0;
list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
reg->index = ra_get_node_reg(g, n++);
+ if (reg->out_reg) {
+ /* We need actual reg number, we don't have swizzle for output regs */
+ assert(!(reg->index & 0x3) && "ppir: output regs don't have swizzle");
+ comp->out_type_to_reg[reg->out_type] = reg->index / 4;
+ }
}
ralloc_free(g);
@@ -604,8 +617,12 @@ bool ppir_regalloc_prog(ppir_compiler *comp)
ppir_regalloc_update_reglist_ssa(comp);
/* No registers? Probably shader consists of discard instruction */
- if (list_is_empty(&comp->reg_list))
+ if (list_is_empty(&comp->reg_list)) {
+ comp->prog->state.frag_color0_reg = 0;
+ comp->prog->state.frag_color1_reg = -1;
+ comp->prog->state.frag_depth_reg = -1;
return true;
+ }
/* this will most likely succeed in the first
* try, except for very complicated shaders */
@@ -613,5 +630,12 @@ bool ppir_regalloc_prog(ppir_compiler *comp)
if (!spilled)
return false;
+ comp->prog->state.frag_color0_reg =
+ comp->out_type_to_reg[ppir_output_color0];
+ comp->prog->state.frag_color1_reg =
+ comp->out_type_to_reg[ppir_output_color1];
+ comp->prog->state.frag_depth_reg =
+ comp->out_type_to_reg[ppir_output_depth];
+
return true;
}
diff --git a/src/gallium/drivers/lima/ir/pp/scheduler.c b/src/gallium/drivers/lima/ir/pp/scheduler.c
index 5e7a17c9bb9..b81e2d61936 100644
--- a/src/gallium/drivers/lima/ir/pp/scheduler.c
+++ b/src/gallium/drivers/lima/ir/pp/scheduler.c
@@ -26,6 +26,10 @@
#include "ppir.h"
+static int cmp_int(const void *a, const void *b)
+{
+ return (*(int*)a - *(int*)b);
+}
static void ppir_schedule_calc_sched_info(ppir_instr *instr)
{
@@ -62,15 +66,7 @@ static void ppir_schedule_calc_sched_info(ppir_instr *instr)
}
/* sort */
- for (i = 0; i < n - 1; i++) {
- for (int j = 0; j < n - i - 1; j++) {
- if (reg[j] > reg[j + 1]) {
- int tmp = reg[j + 1];
- reg[j + 1] = reg[j];
- reg[j] = tmp;
- }
- }
- }
+ qsort(reg, n, sizeof(reg[0]), cmp_int);
for (i = 0; i < n; i++) {
int pressure = reg[i] + n - (i + 1);
diff --git a/src/gallium/drivers/lima/lima_blit.c b/src/gallium/drivers/lima/lima_blit.c
new file mode 100644
index 00000000000..0da8ee71727
--- /dev/null
+++ b/src/gallium/drivers/lima/lima_blit.c
@@ -0,0 +1,319 @@
+/*
+ * Copyright (C) 2022 Lima Project
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#include "drm-uapi/lima_drm.h"
+
+#include "util/u_math.h"
+#include "util/format/u_format.h"
+#include "util/u_surface.h"
+#include "util/u_inlines.h"
+#include "util/hash_table.h"
+
+#include "lima_context.h"
+#include "lima_gpu.h"
+#include "lima_resource.h"
+#include "lima_texture.h"
+#include "lima_format.h"
+#include "lima_job.h"
+#include "lima_screen.h"
+#include "lima_bo.h"
+#include "lima_parser.h"
+#include "lima_util.h"
+#include "lima_blit.h"
+
+void
+lima_pack_blit_cmd(struct lima_job *job,
+ struct util_dynarray *cmd_array,
+ struct pipe_surface *psurf,
+ const struct pipe_box *src,
+ const struct pipe_box *dst,
+ unsigned filter,
+ bool scissor,
+ unsigned sample_mask,
+ unsigned mrt_idx)
+{
+ #define lima_blit_render_state_offset 0x0000
+ #define lima_blit_gl_pos_offset 0x0040
+ #define lima_blit_varying_offset 0x0080
+ #define lima_blit_tex_desc_offset 0x00c0
+ #define lima_blit_tex_array_offset 0x0100
+ #define lima_blit_buffer_size 0x0140
+
+ struct lima_context *ctx = job->ctx;
+ struct lima_surface *surf = lima_surface(psurf);
+ int level = psurf->u.tex.level;
+ unsigned first_layer = psurf->u.tex.first_layer;
+ float fb_width = dst->width, fb_height = dst->height;
+
+ uint32_t va;
+ void *cpu = lima_job_create_stream_bo(
+ job, LIMA_PIPE_PP, lima_blit_buffer_size, &va);
+
+ struct lima_screen *screen = lima_screen(ctx->base.screen);
+
+ uint32_t reload_shader_first_instr_size =
+ ((uint32_t *)(screen->pp_buffer->map + pp_reload_program_offset))[0] & 0x1f;
+ uint32_t reload_shader_va = screen->pp_buffer->va + pp_reload_program_offset;
+
+ struct lima_render_state reload_render_state = {
+ .alpha_blend = 0xf03b1ad2,
+ .depth_test = 0x0000000e,
+ .depth_range = 0xffff0000,
+ .stencil_front = 0x00000007,
+ .stencil_back = 0x00000007,
+ .multi_sample = 0x00000007,
+ .shader_address = reload_shader_va | reload_shader_first_instr_size,
+ .varying_types = 0x00000001,
+ .textures_address = va + lima_blit_tex_array_offset,
+ .aux0 = 0x00004021,
+ .varyings_address = va + lima_blit_varying_offset,
+ };
+
+ reload_render_state.multi_sample |= (sample_mask << 12);
+
+ if (job->key.cbuf) {
+ fb_width = job->key.cbuf->width;
+ fb_height = job->key.cbuf->height;
+ } else {
+ fb_width = job->key.zsbuf->width;
+ fb_height = job->key.zsbuf->height;
+ }
+
+ if (util_format_is_depth_or_stencil(psurf->format)) {
+ reload_render_state.alpha_blend &= 0x0fffffff;
+ if (psurf->format != PIPE_FORMAT_Z16_UNORM)
+ reload_render_state.depth_test |= 0x400;
+ if (surf->reload & PIPE_CLEAR_DEPTH)
+ reload_render_state.depth_test |= 0x801;
+ if (surf->reload & PIPE_CLEAR_STENCIL) {
+ reload_render_state.depth_test |= 0x1000;
+ reload_render_state.stencil_front = 0x0000024f;
+ reload_render_state.stencil_back = 0x0000024f;
+ reload_render_state.stencil_test = 0x0000ffff;
+ }
+ }
+
+ memcpy(cpu + lima_blit_render_state_offset, &reload_render_state,
+ sizeof(reload_render_state));
+
+ lima_tex_desc *td = cpu + lima_blit_tex_desc_offset;
+ memset(td, 0, lima_min_tex_desc_size);
+ lima_texture_desc_set_res(ctx, td, psurf->texture, level, level,
+ first_layer, mrt_idx);
+ td->format = lima_format_get_texel_reload(psurf->format);
+ td->unnorm_coords = 1;
+ td->sampler_dim = LIMA_SAMPLER_DIM_2D;
+ td->min_img_filter_nearest = 1;
+ td->mag_img_filter_nearest = 1;
+ td->wrap_s = LIMA_TEX_WRAP_CLAMP_TO_EDGE;
+ td->wrap_t = LIMA_TEX_WRAP_CLAMP_TO_EDGE;
+ td->wrap_r = LIMA_TEX_WRAP_CLAMP_TO_EDGE;
+
+ if (filter != PIPE_TEX_FILTER_NEAREST) {
+ td->min_img_filter_nearest = 0;
+ td->mag_img_filter_nearest = 0;
+ }
+
+ uint32_t *ta = cpu + lima_blit_tex_array_offset;
+ ta[0] = va + lima_blit_tex_desc_offset;
+
+ float reload_gl_pos[] = {
+ dst->x + dst->width, dst->y, 0, 1,
+ dst->x, dst->y, 0, 1,
+ dst->x, dst->y + dst->height, 0, 1,
+ };
+ memcpy(cpu + lima_blit_gl_pos_offset, reload_gl_pos,
+ sizeof(reload_gl_pos));
+
+ float reload_varying[] = {
+ src->x + src->width, src->y,
+ src->x, src->y,
+ src->x, src->y + src->height,
+ 0, 0, /* unused */
+ };
+ memcpy(cpu + lima_blit_varying_offset, reload_varying,
+ sizeof(reload_varying));
+
+ PLBU_CMD_BEGIN(cmd_array, scissor ? 22 : 20);
+
+ PLBU_CMD_VIEWPORT_LEFT(0);
+ PLBU_CMD_VIEWPORT_RIGHT(fui(fb_width));
+ PLBU_CMD_VIEWPORT_BOTTOM(0);
+ PLBU_CMD_VIEWPORT_TOP(fui(fb_height));
+
+ PLBU_CMD_RSW_VERTEX_ARRAY(
+ va + lima_blit_render_state_offset,
+ va + lima_blit_gl_pos_offset);
+
+
+ if (scissor) {
+ int minx = MIN2(dst->x, dst->x + dst->width);
+ int maxx = MAX2(dst->x, dst->x + dst->width);
+ int miny = MIN2(dst->y, dst->y + dst->height);
+ int maxy = MAX2(dst->y, dst->y + dst->height);
+
+ PLBU_CMD_SCISSORS(minx, maxx, miny, maxy);
+ lima_damage_rect_union(&job->damage_rect, minx, maxx, miny, maxy);
+ }
+
+ PLBU_CMD_UNKNOWN2();
+ PLBU_CMD_UNKNOWN1();
+
+ PLBU_CMD_INDICES(screen->pp_buffer->va + pp_shared_index_offset);
+ PLBU_CMD_INDEXED_DEST(va + lima_blit_gl_pos_offset);
+ PLBU_CMD_DRAW_ELEMENTS(0xf, 0, 3);
+
+ PLBU_CMD_END();
+
+ lima_dump_command_stream_print(job->dump, cpu, lima_blit_buffer_size,
+ false, "blit plbu cmd at va %x\n", va);
+}
+
+static struct pipe_surface *
+lima_get_blit_surface(struct pipe_context *pctx,
+ struct pipe_resource *prsc,
+ unsigned level)
+{
+ struct pipe_surface tmpl;
+
+ memset(&tmpl, 0, sizeof(tmpl));
+ tmpl.format = prsc->format;
+ tmpl.u.tex.level = level;
+ tmpl.u.tex.first_layer = 0;
+ tmpl.u.tex.last_layer = 0;
+
+ return pctx->create_surface(pctx, prsc, &tmpl);
+}
+
+bool
+lima_do_blit(struct pipe_context *pctx,
+ const struct pipe_blit_info *info)
+{
+ struct lima_context *ctx = lima_context(pctx);
+ unsigned reload_flags = PIPE_CLEAR_COLOR0;
+ uint8_t identity[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
+ PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W };
+
+ if (lima_debug & LIMA_DEBUG_NO_BLIT)
+ return false;
+
+ /* Blitting of swizzled formats (R and RG) isn't implemented yet */
+ if (memcmp(identity,
+ lima_format_get_texel_swizzle(info->src.resource->format),
+ sizeof(identity)))
+ return false;
+
+ if (memcmp(identity,
+ lima_format_get_texel_swizzle(info->dst.resource->format),
+ sizeof(identity)))
+ return false;
+
+ if (util_format_is_depth_or_stencil(info->src.resource->format)) {
+ const struct util_format_description *desc =
+ util_format_description(info->src.resource->format);
+ reload_flags = 0;
+ if (util_format_has_depth(desc))
+ reload_flags |= PIPE_CLEAR_DEPTH;
+ if (util_format_has_stencil(desc))
+ reload_flags |= PIPE_CLEAR_STENCIL;
+ }
+
+ if (!lima_format_pixel_supported(info->dst.resource->format))
+ return false;
+
+ if (!lima_format_texel_supported(info->src.resource->format))
+ return false;
+
+ if (info->dst.resource->target != PIPE_TEXTURE_2D ||
+ info->src.resource->target != PIPE_TEXTURE_2D)
+ return false;
+
+ if (info->dst.box.x < 0 || info->dst.box.y < 0 ||
+ info->src.box.x < 0 || info->src.box.y < 0)
+ return false;
+
+ if (info->src.box.depth != 1 ||
+ info->dst.box.depth != 1)
+ return false;
+
+ /* Scissored blit isn't implemented yet */
+ if (info->scissor_enable)
+ return false;
+
+ if ((reload_flags & PIPE_CLEAR_COLOR) && !(info->mask & PIPE_MASK_RGBA))
+ return false;
+
+ if ((reload_flags & PIPE_CLEAR_DEPTH) && !(info->mask & PIPE_MASK_Z))
+ return false;
+
+ if ((reload_flags & PIPE_CLEAR_STENCIL) && !(info->mask & PIPE_MASK_S))
+ return false;
+
+ struct pipe_surface *dst_surf =
+ lima_get_blit_surface(pctx, info->dst.resource, info->dst.level);
+ struct lima_surface *lima_dst_surf = lima_surface(dst_surf);
+
+ struct pipe_surface *src_surf =
+ lima_get_blit_surface(pctx, info->src.resource, info->src.level);
+
+ struct lima_job *job;
+
+ if (util_format_is_depth_or_stencil(info->dst.resource->format))
+ job = lima_job_get_with_fb(ctx, NULL, dst_surf);
+ else
+ job = lima_job_get_with_fb(ctx, dst_surf, NULL);
+
+ struct lima_resource *src_res = lima_resource(src_surf->texture);
+ struct lima_resource *dst_res = lima_resource(dst_surf->texture);
+
+ lima_flush_job_accessing_bo(ctx, src_res->bo, true);
+ lima_flush_job_accessing_bo(ctx, dst_res->bo, true);
+
+ lima_job_add_bo(job, LIMA_PIPE_PP, src_res->bo, LIMA_SUBMIT_BO_READ);
+ _mesa_hash_table_insert(ctx->write_jobs, &dst_res->base, job);
+ lima_job_add_bo(job, LIMA_PIPE_PP, dst_res->bo, LIMA_SUBMIT_BO_WRITE);
+
+ if (info->src.resource->nr_samples > 1) {
+ for (int i = 0; i < MIN2(info->src.resource->nr_samples, LIMA_MAX_SAMPLES); i++) {
+ lima_pack_blit_cmd(job, &job->plbu_cmd_array,
+ src_surf, &info->src.box,
+ &info->dst.box, info->filter, true,
+ 1 << i, i);
+ }
+ } else {
+ lima_pack_blit_cmd(job, &job->plbu_cmd_array,
+ src_surf, &info->src.box,
+ &info->dst.box, info->filter, true,
+ 0xf, 0);
+ }
+
+ bool tile_aligned = false;
+
+ if (info->dst.box.x == 0 && info->dst.box.y == 0 &&
+ info->dst.box.width == lima_dst_surf->base.width &&
+ info->dst.box.height == lima_dst_surf->base.height)
+ tile_aligned = true;
+
+ if (info->dst.box.x % 16 == 0 && info->dst.box.y % 16 == 0 &&
+ info->dst.box.width % 16 == 0 && info->dst.box.height % 16 == 0)
+ tile_aligned = true;
+
+ /* Reload if dest is not aligned to tile boundaries */
+ if (!tile_aligned)
+ lima_dst_surf->reload = reload_flags;
+ else
+ lima_dst_surf->reload = 0;
+
+ job->resolve = reload_flags;
+
+ lima_do_job(job);
+
+ pipe_surface_reference(&dst_surf, NULL);
+ pipe_surface_reference(&src_surf, NULL);
+
+ return true;
+}
diff --git a/src/gallium/drivers/lima/lima_blit.h b/src/gallium/drivers/lima/lima_blit.h
new file mode 100644
index 00000000000..54a404ec9a7
--- /dev/null
+++ b/src/gallium/drivers/lima/lima_blit.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2022 Lima Project
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ */
+
+#ifndef H_LIMA_BLIT
+#define H_LIMA_BLIT
+
+#include <stdbool.h>
+
+struct util_dynarray;
+
+void
+lima_pack_blit_cmd(struct lima_job *job,
+ struct util_dynarray *cmd,
+ struct pipe_surface *psurf,
+ const struct pipe_box *src,
+ const struct pipe_box *dst,
+ unsigned filter,
+ bool scissor,
+ unsigned sample_mask,
+ unsigned mrt_idx);
+
+bool lima_do_blit(struct pipe_context *ctx,
+ const struct pipe_blit_info *blit_info);
+
+#endif
+
diff --git a/src/gallium/drivers/lima/lima_bo.c b/src/gallium/drivers/lima/lima_bo.c
index 9e6d46b05de..e2daba76b69 100644
--- a/src/gallium/drivers/lima/lima_bo.c
+++ b/src/gallium/drivers/lima/lima_bo.c
@@ -32,7 +32,7 @@
#include "util/u_hash_table.h"
#include "util/u_math.h"
#include "util/os_time.h"
-#include "os/os_mman.h"
+#include "util/os_mman.h"
#include "frontend/drm_driver.h"
diff --git a/src/gallium/drivers/lima/lima_context.c b/src/gallium/drivers/lima/lima_context.c
index e14f5ae0fb1..802f308f48e 100644
--- a/src/gallium/drivers/lima/lima_context.c
+++ b/src/gallium/drivers/lima/lima_context.c
@@ -29,6 +29,7 @@
#include "util/u_debug.h"
#include "util/ralloc.h"
#include "util/u_inlines.h"
+#include "util/u_debug_cb.h"
#include "util/hash_table.h"
#include "lima_screen.h"
@@ -138,13 +139,15 @@ lima_context_destroy(struct pipe_context *pctx)
struct lima_context *ctx = lima_context(pctx);
struct lima_screen *screen = lima_screen(pctx->screen);
- lima_job_fini(ctx);
+ if (ctx->jobs)
+ lima_job_fini(ctx);
for (int i = 0; i < lima_ctx_buff_num; i++)
pipe_resource_reference(&ctx->buffer_state[i].res, NULL);
lima_program_fini(ctx);
lima_state_fini(ctx);
+ util_unreference_framebuffer_state(&ctx->framebuffer.base);
if (ctx->blitter)
util_blitter_destroy(ctx->blitter);
@@ -187,25 +190,13 @@ plb_pp_stream_compare(const void *key1, const void *key2)
return memcmp(key1, key2, sizeof(struct lima_ctx_plb_pp_stream_key)) == 0;
}
-static void
-lima_set_debug_callback(struct pipe_context *pctx,
- const struct pipe_debug_callback *cb)
-{
- struct lima_context *ctx = lima_context(pctx);
-
- if (cb)
- ctx->debug = *cb;
- else
- memset(&ctx->debug, 0, sizeof(ctx->debug));
-}
-
struct pipe_context *
lima_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
{
struct lima_screen *screen = lima_screen(pscreen);
struct lima_context *ctx;
- ctx = rzalloc(screen, struct lima_context);
+ ctx = rzalloc(NULL, struct lima_context);
if (!ctx)
return NULL;
@@ -215,9 +206,11 @@ lima_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
return NULL;
}
+ ctx->sample_mask = (1 << LIMA_MAX_SAMPLES) - 1;
+
ctx->base.screen = pscreen;
ctx->base.destroy = lima_context_destroy;
- ctx->base.set_debug_callback = lima_set_debug_callback;
+ ctx->base.set_debug_callback = u_default_set_debug_callback;
ctx->base.invalidate_resource = lima_invalidate_resource;
lima_resource_context_init(ctx);
diff --git a/src/gallium/drivers/lima/lima_context.h b/src/gallium/drivers/lima/lima_context.h
index 86a668cb4dc..cd753660623 100644
--- a/src/gallium/drivers/lima/lima_context.h
+++ b/src/gallium/drivers/lima/lima_context.h
@@ -27,6 +27,7 @@
#include "util/list.h"
#include "util/slab.h"
+#include "util/u_debug.h"
#include "pipe/p_context.h"
#include "pipe/p_state.h"
@@ -49,6 +50,9 @@ struct lima_fs_compiled_shader {
struct {
int shader_size;
int stack_size;
+ int frag_color0_reg;
+ int frag_color1_reg;
+ int frag_depth_reg;
bool uses_discard;
} state;
};
@@ -200,6 +204,7 @@ struct lima_context {
LIMA_CONTEXT_DIRTY_CLIP = (1 << 15),
LIMA_CONTEXT_DIRTY_UNCOMPILED_VS = (1 << 16),
LIMA_CONTEXT_DIRTY_UNCOMPILED_FS = (1 << 17),
+ LIMA_CONTEXT_DIRTY_SAMPLE_MASK = (1 << 18),
} dirty;
struct u_upload_mgr *uploader;
@@ -209,6 +214,8 @@ struct lima_context {
struct lima_context_framebuffer framebuffer;
struct lima_context_viewport_state viewport;
+ /* input for PLBU_CMD_VIEWPORT_* */
+ struct lima_context_viewport_state ext_viewport;
struct pipe_scissor_state scissor;
struct pipe_scissor_state clipped_scissor;
struct lima_vs_compiled_shader *vs;
@@ -227,6 +234,9 @@ struct lima_context {
struct lima_texture_stateobj tex_stateobj;
struct lima_pp_stream_state pp_stream;
+ #define LIMA_MAX_SAMPLES 4
+ unsigned sample_mask;
+
unsigned min_index;
unsigned max_index;
@@ -270,8 +280,6 @@ struct lima_context {
int id;
- struct pipe_debug_callback debug;
-
unsigned index_offset;
struct lima_resource *index_res;
};
diff --git a/src/gallium/drivers/lima/lima_draw.c b/src/gallium/drivers/lima/lima_draw.c
index 161fc7288a5..e64f1e88c1a 100644
--- a/src/gallium/drivers/lima/lima_draw.c
+++ b/src/gallium/drivers/lima/lima_draw.c
@@ -85,6 +85,32 @@ lima_clip_scissor_to_viewport(struct lima_context *ctx)
cscissor->miny = cscissor->maxy;
}
+static void
+lima_extend_viewport(struct lima_context *ctx, const struct pipe_draw_info *info)
+{
+ /* restore the original values */
+ ctx->ext_viewport.left = ctx->viewport.left;
+ ctx->ext_viewport.right = ctx->viewport.right;
+ ctx->ext_viewport.bottom = ctx->viewport.bottom;
+ ctx->ext_viewport.top = ctx->viewport.top;
+
+ if (info->mode != MESA_PRIM_LINES)
+ return;
+
+ if (!ctx->rasterizer)
+ return;
+
+ float line_width = ctx->rasterizer->base.line_width;
+
+ if (line_width == 1.0f)
+ return;
+
+ ctx->ext_viewport.left = ctx->viewport.left - line_width / 2;
+ ctx->ext_viewport.right = ctx->viewport.right + line_width / 2;
+ ctx->ext_viewport.bottom = ctx->viewport.bottom - line_width / 2;
+ ctx->ext_viewport.top = ctx->viewport.top + line_width / 2;
+}
+
static bool
lima_is_scissor_zero(struct lima_context *ctx)
{
@@ -121,17 +147,6 @@ lima_update_job_wb(struct lima_context *ctx, unsigned buffers)
}
static void
-lima_damage_rect_union(struct pipe_scissor_state *rect,
- unsigned minx, unsigned maxx,
- unsigned miny, unsigned maxy)
-{
- rect->minx = MIN2(rect->minx, minx);
- rect->miny = MIN2(rect->miny, miny);
- rect->maxx = MAX2(rect->maxx, maxx);
- rect->maxy = MAX2(rect->maxy, maxy);
-}
-
-static void
lima_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor_state *scissor_state,
const union pipe_color_union *color, double depth, unsigned stencil)
{
@@ -327,10 +342,10 @@ lima_pack_plbu_cmd(struct lima_context *ctx, const struct pipe_draw_info *info,
struct lima_job *job = lima_job_get(ctx);
PLBU_CMD_BEGIN(&job->plbu_cmd_array, 32);
- PLBU_CMD_VIEWPORT_LEFT(fui(ctx->viewport.left));
- PLBU_CMD_VIEWPORT_RIGHT(fui(ctx->viewport.right));
- PLBU_CMD_VIEWPORT_BOTTOM(fui(ctx->viewport.bottom));
- PLBU_CMD_VIEWPORT_TOP(fui(ctx->viewport.top));
+ PLBU_CMD_VIEWPORT_LEFT(fui(ctx->ext_viewport.left));
+ PLBU_CMD_VIEWPORT_RIGHT(fui(ctx->ext_viewport.right));
+ PLBU_CMD_VIEWPORT_BOTTOM(fui(ctx->ext_viewport.bottom));
+ PLBU_CMD_VIEWPORT_TOP(fui(ctx->ext_viewport.top));
if (!info->index_size)
PLBU_CMD_ARRAYS_SEMAPHORE_BEGIN();
@@ -348,11 +363,11 @@ lima_pack_plbu_cmd(struct lima_context *ctx, const struct pipe_draw_info *info,
}
/* Specify point size with PLBU command if shader doesn't write */
- if (info->mode == PIPE_PRIM_POINTS && ctx->vs->state.point_size_idx == -1)
+ if (info->mode == MESA_PRIM_POINTS && ctx->vs->state.point_size_idx == -1)
force_point_size = true;
/* Specify line width with PLBU command for lines */
- if (info->mode > PIPE_PRIM_POINTS && info->mode < PIPE_PRIM_TRIANGLES)
+ if (info->mode > MESA_PRIM_POINTS && info->mode < MESA_PRIM_TRIANGLES)
force_point_size = true;
PLBU_CMD_PRIMITIVE_SETUP(force_point_size, cull, info->index_size);
@@ -377,10 +392,10 @@ lima_pack_plbu_cmd(struct lima_context *ctx, const struct pipe_draw_info *info,
PLBU_CMD_DEPTH_RANGE_NEAR(fui(ctx->viewport.near));
PLBU_CMD_DEPTH_RANGE_FAR(fui(ctx->viewport.far));
- if ((info->mode == PIPE_PRIM_POINTS && ctx->vs->state.point_size_idx == -1) ||
- ((info->mode >= PIPE_PRIM_LINES) && (info->mode < PIPE_PRIM_TRIANGLES)))
+ if ((info->mode == MESA_PRIM_POINTS && ctx->vs->state.point_size_idx == -1) ||
+ ((info->mode >= MESA_PRIM_LINES) && (info->mode < MESA_PRIM_TRIANGLES)))
{
- uint32_t v = info->mode == PIPE_PRIM_POINTS ?
+ uint32_t v = info->mode == MESA_PRIM_POINTS ?
fui(ctx->rasterizer->base.point_size) : fui(ctx->rasterizer->base.line_width);
PLBU_CMD_LOW_PRIM_SIZE(v);
}
@@ -424,106 +439,55 @@ lima_blend_func(enum pipe_blend_func pipe)
}
static int
-lima_blend_factor_has_alpha(enum pipe_blendfactor pipe)
-{
- /* Bit 4 is set if the blendfactor uses alpha */
- switch (pipe) {
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- case PIPE_BLENDFACTOR_DST_ALPHA:
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- return 1;
-
- case PIPE_BLENDFACTOR_SRC_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- case PIPE_BLENDFACTOR_DST_COLOR:
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- case PIPE_BLENDFACTOR_CONST_COLOR:
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- case PIPE_BLENDFACTOR_ZERO:
- case PIPE_BLENDFACTOR_ONE:
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- return 0;
-
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- return -1; /* not supported */
- }
- return -1;
-}
-
-static int
-lima_blend_factor_is_inv(enum pipe_blendfactor pipe)
-{
- /* Bit 3 is set if the blendfactor type is inverted */
- switch (pipe) {
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- case PIPE_BLENDFACTOR_ONE:
- return 1;
-
- case PIPE_BLENDFACTOR_SRC_COLOR:
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- case PIPE_BLENDFACTOR_DST_COLOR:
- case PIPE_BLENDFACTOR_DST_ALPHA:
- case PIPE_BLENDFACTOR_CONST_COLOR:
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- case PIPE_BLENDFACTOR_ZERO:
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- return 0;
-
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- return -1; /* not supported */
- }
- return -1;
-}
-
-static int
lima_blend_factor(enum pipe_blendfactor pipe)
{
- /* Bits 0-2 indicate the blendfactor type */
+ /* Bits 0-2 indicate the blendfactor type,
+ * Bit 3 is set if blendfactor is inverted
+ * Bit 4 is set if blendfactor has alpha */
switch (pipe) {
case PIPE_BLENDFACTOR_SRC_COLOR:
+ return 0 << 4 | 0 << 3 | 0;
case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return 1 << 4 | 0 << 3 | 0;
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return 0 << 4 | 1 << 3 | 0;
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- return 0;
+ return 1 << 4 | 1 << 3 | 0;
case PIPE_BLENDFACTOR_DST_COLOR:
+ return 0 << 4 | 0 << 3 | 1;
case PIPE_BLENDFACTOR_DST_ALPHA:
+ return 1 << 4 | 0 << 3 | 1;
case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return 0 << 4 | 1 << 3 | 1;
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- return 1;
+ return 1 << 4 | 1 << 3 | 1;
case PIPE_BLENDFACTOR_CONST_COLOR:
+ return 0 << 4 | 0 << 3 | 2;
case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return 1 << 4 | 0 << 3 | 2;
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return 0 << 4 | 1 << 3 | 2;
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- return 2;
+ return 1 << 4 | 1 << 3 | 2;
case PIPE_BLENDFACTOR_ZERO:
+ return 0 << 4 | 0 << 3 | 3;
case PIPE_BLENDFACTOR_ONE:
- return 3;
+ return 0 << 4 | 1 << 3 | 3;
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- return 4;
+ return 0 << 4 | 0 << 3 | 4;
case PIPE_BLENDFACTOR_SRC1_COLOR:
+ return 0 << 4 | 0 << 3 | 5;
case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ return 1 << 4 | 0 << 3 | 5;
case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ return 0 << 4 | 1 << 3 | 5;
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- return -1; /* not supported */
+ return 1 << 4 | 1 << 3 | 5;
}
return -1;
}
@@ -534,27 +498,37 @@ lima_calculate_alpha_blend(enum pipe_blend_func rgb_func, enum pipe_blend_func a
enum pipe_blendfactor alpha_src_factor, enum pipe_blendfactor alpha_dst_factor)
{
/* PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE has to be changed to PIPE_BLENDFACTOR_ONE
- * if it is set for alpha_src.
+ * if it is set for alpha_src or alpha_dst.
*/
if (alpha_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+ if (alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
+ alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+
+ /* MIN and MAX ops actually do OP(As * S + Ad * D, Ad), so
+ * we need to set S to 1 and D to 0 to get correct result */
+ if (alpha_func == PIPE_BLEND_MIN ||
+ alpha_func == PIPE_BLEND_MAX) {
+ alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+ alpha_dst_factor = PIPE_BLENDFACTOR_ZERO;
+ }
+
+ /* MIN and MAX ops actually do OP(Cs * S + Cd * D, Cd), so
+ * we need to set S to 1 and D to 0 to get correct result */
+ if (rgb_func == PIPE_BLEND_MIN ||
+ rgb_func == PIPE_BLEND_MAX) {
+ rgb_src_factor = PIPE_BLENDFACTOR_ONE;
+ rgb_dst_factor = PIPE_BLENDFACTOR_ZERO;
+ }
+
return lima_blend_func(rgb_func) |
(lima_blend_func(alpha_func) << 3) |
-
(lima_blend_factor(rgb_src_factor) << 6) |
- (lima_blend_factor_is_inv(rgb_src_factor) << 9) |
- (lima_blend_factor_has_alpha(rgb_src_factor) << 10) |
-
(lima_blend_factor(rgb_dst_factor) << 11) |
- (lima_blend_factor_is_inv(rgb_dst_factor) << 14) |
- (lima_blend_factor_has_alpha(rgb_dst_factor) << 15) |
-
- (lima_blend_factor(alpha_src_factor) << 16) |
- (lima_blend_factor_is_inv(alpha_src_factor) << 19) |
-
- (lima_blend_factor(alpha_dst_factor) << 20) |
- (lima_blend_factor_is_inv(alpha_dst_factor) << 23) |
+ /* alpha_src and alpha_dst are 4 bit, so need to mask 5th bit */
+ ((lima_blend_factor(alpha_src_factor) & 0xf) << 16) |
+ ((lima_blend_factor(alpha_dst_factor) & 0xf) << 20) |
0x0C000000; /* need to check if this is GLESv1 glAlphaFunc */
}
@@ -600,8 +574,7 @@ lima_calculate_depth_test(struct pipe_depth_stencil_alpha_state *depth,
return (depth->depth_enabled && depth->depth_writemask) |
((int)func << 1) |
(offset_scale << 16) |
- (offset_units << 24) |
- 0x30; /* find out what is this */
+ (offset_units << 24);
}
static void
@@ -647,20 +620,22 @@ lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *in
struct pipe_rasterizer_state *rst = &ctx->rasterizer->base;
render->depth_test = lima_calculate_depth_test(&ctx->zsa->base, rst);
- ushort far, near;
+ if (!rst->depth_clip_near || ctx->viewport.near == 0.0f)
+ render->depth_test |= 0x10; /* don't clip depth near */
+ if (!rst->depth_clip_far || ctx->viewport.far == 1.0f)
+ render->depth_test |= 0x20; /* don't clip depth far */
+
+ if (fs->state.frag_depth_reg != -1) {
+ render->depth_test |= (fs->state.frag_depth_reg << 6);
+ /* Shader writes depth */
+ render->depth_test |= 0x801;
+ }
+
+ uint16_t far, near;
near = float_to_ushort(ctx->viewport.near);
far = float_to_ushort(ctx->viewport.far);
- /* Insert a small 'epsilon' difference between 'near' and 'far' when
- * they are equal, to avoid application bugs. */
- if (far == near) {
- if (near > 0)
- near--;
- if (far < USHRT_MAX)
- far++;
- }
-
/* overlap with plbu? any place can remove one? */
render->depth_range = near | (far << 16);
@@ -699,14 +674,25 @@ lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *in
}
/* need more investigation */
- if (info->mode == PIPE_PRIM_POINTS)
- render->multi_sample = 0x0000F000;
- else if (info->mode < PIPE_PRIM_TRIANGLES)
- render->multi_sample = 0x0000F400;
+ if (info->mode == MESA_PRIM_POINTS)
+ render->multi_sample = 0x00000000;
+ else if (info->mode < MESA_PRIM_TRIANGLES)
+ render->multi_sample = 0x00000400;
else
- render->multi_sample = 0x0000F800;
+ render->multi_sample = 0x00000800;
if (ctx->framebuffer.base.samples)
render->multi_sample |= 0x68;
+ if (ctx->blend->base.alpha_to_coverage)
+ render->multi_sample |= (1 << 7);
+ if (ctx->blend->base.alpha_to_one)
+ render->multi_sample |= (1 << 8);
+ render->multi_sample |= (ctx->sample_mask << 12);
+
+ /* Set gl_FragColor register, need to specify it 4 times */
+ render->multi_sample |= (fs->state.frag_color0_reg << 28) |
+ (fs->state.frag_color0_reg << 24) |
+ (fs->state.frag_color0_reg << 20) |
+ (fs->state.frag_color0_reg << 16);
/* alpha test */
if (ctx->zsa->base.alpha_enabled) {
@@ -726,12 +712,17 @@ lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *in
render->textures_address = 0x00000000;
render->aux0 = (ctx->vs->state.varying_stride >> 3);
- render->aux1 = 0x00001000;
+ render->aux1 = 0x00000000;
+ if (ctx->rasterizer->base.front_ccw)
+ render->aux1 = 0x00001000;
+
if (ctx->blend->base.dither)
render->aux1 |= 0x00002000;
if (fs->state.uses_discard ||
- ctx->zsa->base.alpha_enabled) {
+ ctx->zsa->base.alpha_enabled ||
+ fs->state.frag_depth_reg != -1 ||
+ ctx->blend->base.alpha_to_coverage) {
early_z = false;
pixel_kill = false;
}
@@ -770,6 +761,10 @@ lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *in
render->aux1 |= 0x10000;
}
+ /* Set secondary output color */
+ if (fs->state.frag_color1_reg != -1)
+ render->aux0 |= (fs->state.frag_color1_reg << 28);
+
if (ctx->vs->state.num_varyings) {
render->varying_types = 0x00000000;
render->varyings_address = ctx->gp_output->va +
@@ -842,8 +837,8 @@ lima_update_gp_attribute_info(struct lima_context *ctx, const struct pipe_draw_i
unsigned start = info->index_size ? (ctx->min_index + draw->index_bias) : draw->start;
attribute[n++] = res->bo->va + pvb->buffer_offset + pve->src_offset
- + start * pvb->stride;
- attribute[n++] = (pvb->stride << 11) |
+ + start * pve->src_stride;
+ attribute[n++] = (pve->src_stride << 11) |
(lima_pipe_format_to_attrib_type(pve->src_format) << 2) |
(util_format_get_nr_components(pve->src_format) - 1);
}
@@ -1173,6 +1168,10 @@ lima_draw_vbo(struct pipe_context *pctx,
if (lima_is_scissor_zero(ctx))
return;
+ /* extend the viewport in case of line draws with a line_width > 1.0f,
+ * otherwise use the original values */
+ lima_extend_viewport(ctx, info);
+
if (!lima_update_fs_state(ctx) || !lima_update_vs_state(ctx))
return;
@@ -1182,10 +1181,12 @@ lima_draw_vbo(struct pipe_context *pctx,
lima_dump_command_stream_print(
job->dump, ctx->vs->bo->map, ctx->vs->state.shader_size, false,
"add vs at va %x\n", ctx->vs->bo->va);
+ lima_dump_shader(job->dump, ctx->vs->bo->map, ctx->vs->state.shader_size, false);
lima_dump_command_stream_print(
job->dump, ctx->fs->bo->map, ctx->fs->state.shader_size, false,
"add fs at va %x\n", ctx->fs->bo->va);
+ lima_dump_shader(job->dump, ctx->fs->bo->map, ctx->fs->state.shader_size, true);
lima_job_add_bo(job, LIMA_PIPE_GP, ctx->vs->bo, LIMA_SUBMIT_BO_READ);
lima_job_add_bo(job, LIMA_PIPE_PP, ctx->fs->bo, LIMA_SUBMIT_BO_READ);
@@ -1201,7 +1202,6 @@ lima_draw_vbo(struct pipe_context *pctx,
if (job->draws > MAX_DRAWS_PER_JOB) {
unsigned resolve = job->resolve;
lima_do_job(job);
- job = lima_job_get(ctx);
/* Subsequent job will need to resolve the same buffers */
lima_update_job_wb(ctx, resolve);
}
diff --git a/src/gallium/drivers/lima/lima_format.h b/src/gallium/drivers/lima/lima_format.h
index fb472641d6e..781f77e38e9 100644
--- a/src/gallium/drivers/lima/lima_format.h
+++ b/src/gallium/drivers/lima/lima_format.h
@@ -26,7 +26,7 @@
#include <stdbool.h>
-#include <pipe/p_format.h>
+#include <util/format/u_formats.h>
bool lima_format_texel_supported(enum pipe_format f);
bool lima_format_pixel_supported(enum pipe_format f);
diff --git a/src/gallium/drivers/lima/lima_gpu.h b/src/gallium/drivers/lima/lima_gpu.h
index 4d35640592f..4e3ea65d026 100644
--- a/src/gallium/drivers/lima/lima_gpu.h
+++ b/src/gallium/drivers/lima/lima_gpu.h
@@ -73,9 +73,9 @@ struct lima_pp_wb_reg {
uint32_t downsample_factor;
uint32_t pixel_layout;
uint32_t pitch;
+ uint32_t flags;
uint32_t mrt_bits;
uint32_t mrt_pitch;
- uint32_t zero;
uint32_t unused0;
uint32_t unused1;
uint32_t unused2;
@@ -120,11 +120,15 @@ struct lima_render_state {
plbu_cmd[i++] = v2; \
} while (0)
+#define PLBU_BLOCK_W_MASK 0xff
+#define PLBU_BLOCK_H_MASK 0xff
+
#define PLBU_CMD_BLOCK_STEP(shift_min, shift_h, shift_w) \
PLBU_CMD(((shift_min) << 28) | ((shift_h) << 16) | (shift_w), 0x1000010C)
#define PLBU_CMD_TILED_DIMENSIONS(tiled_w, tiled_h) \
PLBU_CMD((((tiled_w) - 1) << 24) | (((tiled_h) - 1) << 8), 0x10000109)
-#define PLBU_CMD_BLOCK_STRIDE(block_w) PLBU_CMD((block_w) & 0xff, 0x30000000)
+#define PLBU_CMD_BLOCK_STRIDE(block_w) \
+ PLBU_CMD((block_w) & PLBU_BLOCK_W_MASK, 0x30000000)
#define PLBU_CMD_ARRAY_ADDRESS(gp_stream, block_num) \
PLBU_CMD(gp_stream, 0x28000000 | ((block_num) - 1) | 1)
#define PLBU_CMD_VIEWPORT_LEFT(v) PLBU_CMD(v, 0x10000107)
diff --git a/src/gallium/drivers/lima/lima_job.c b/src/gallium/drivers/lima/lima_job.c
index ef8a6444cb9..6400fdb2dd9 100644
--- a/src/gallium/drivers/lima/lima_job.c
+++ b/src/gallium/drivers/lima/lima_job.c
@@ -34,6 +34,7 @@
#include "util/format/u_format.h"
#include "util/u_upload_mgr.h"
#include "util/u_inlines.h"
+#include "util/u_framebuffer.h"
#include "lima_screen.h"
#include "lima_context.h"
@@ -45,6 +46,7 @@
#include "lima_texture.h"
#include "lima_fence.h"
#include "lima_gpu.h"
+#include "lima_blit.h"
#define VOID2U64(x) ((uint64_t)(unsigned long)(x))
@@ -53,9 +55,19 @@ lima_get_fb_info(struct lima_job *job)
{
struct lima_context *ctx = job->ctx;
struct lima_job_fb_info *fb = &job->fb;
+ struct lima_surface *surf = lima_surface(job->key.cbuf);
+
+ if (!surf)
+ surf = lima_surface(job->key.zsbuf);
- fb->width = ctx->framebuffer.base.width;
- fb->height = ctx->framebuffer.base.height;
+ if (!surf) {
+ /* We don't have neither cbuf nor zsbuf, use dimensions from ctx */
+ fb->width = ctx->framebuffer.base.width;
+ fb->height = ctx->framebuffer.base.height;
+ } else {
+ fb->width = surf->base.width;
+ fb->height = surf->base.height;
+ }
int width = align(fb->width, 16) >> 4;
int height = align(fb->height, 16) >> 4;
@@ -69,8 +81,9 @@ lima_get_fb_info(struct lima_job *job)
fb->shift_w = 0;
int limit = screen->plb_max_blk;
- while ((width * height) > limit) {
- if (width >= height) {
+ while ((width * height) > limit ||
+ width > PLBU_BLOCK_W_MASK || height > PLBU_BLOCK_H_MASK) {
+ if (width >= height || width > PLBU_BLOCK_W_MASK) {
width = (width + 1) >> 1;
fb->shift_w++;
} else {
@@ -86,7 +99,9 @@ lima_get_fb_info(struct lima_job *job)
}
static struct lima_job *
-lima_job_create(struct lima_context *ctx)
+lima_job_create(struct lima_context *ctx,
+ struct pipe_surface *cbuf,
+ struct pipe_surface *zsbuf)
{
struct lima_job *s;
@@ -112,9 +127,8 @@ lima_job_create(struct lima_context *ctx)
util_dynarray_init(&s->plbu_cmd_array, s);
util_dynarray_init(&s->plbu_cmd_head, s);
- struct lima_context_framebuffer *fb = &ctx->framebuffer;
- pipe_surface_reference(&s->key.cbuf, fb->base.cbufs[0]);
- pipe_surface_reference(&s->key.zsbuf, fb->base.zsbuf);
+ pipe_surface_reference(&s->key.cbuf, cbuf);
+ pipe_surface_reference(&s->key.zsbuf, zsbuf);
lima_get_fb_info(s);
@@ -145,20 +159,21 @@ lima_job_free(struct lima_job *job)
ralloc_free(job);
}
-static struct lima_job *
-_lima_job_get(struct lima_context *ctx)
+struct lima_job *
+lima_job_get_with_fb(struct lima_context *ctx,
+ struct pipe_surface *cbuf,
+ struct pipe_surface *zsbuf)
{
- struct lima_context_framebuffer *fb = &ctx->framebuffer;
struct lima_job_key local_key = {
- .cbuf = fb->base.cbufs[0],
- .zsbuf = fb->base.zsbuf,
+ .cbuf = cbuf,
+ .zsbuf = zsbuf,
};
struct hash_entry *entry = _mesa_hash_table_search(ctx->jobs, &local_key);
if (entry)
return entry->data;
- struct lima_job *job = lima_job_create(ctx);
+ struct lima_job *job = lima_job_create(ctx, cbuf, zsbuf);
if (!job)
return NULL;
@@ -167,6 +182,14 @@ _lima_job_get(struct lima_context *ctx)
return job;
}
+static struct lima_job *
+_lima_job_get(struct lima_context *ctx)
+{
+ struct lima_context_framebuffer *fb = &ctx->framebuffer;
+
+ return lima_job_get_with_fb(ctx, fb->base.cbufs[0], fb->base.zsbuf);
+}
+
/*
* Note: this function can only be called in draw code path,
* must not exist in flush code path.
@@ -337,112 +360,35 @@ lima_fb_zsbuf_needs_reload(struct lima_job *job)
static void
lima_pack_reload_plbu_cmd(struct lima_job *job, struct pipe_surface *psurf)
{
- #define lima_reload_render_state_offset 0x0000
- #define lima_reload_gl_pos_offset 0x0040
- #define lima_reload_varying_offset 0x0080
- #define lima_reload_tex_desc_offset 0x00c0
- #define lima_reload_tex_array_offset 0x0100
- #define lima_reload_buffer_size 0x0140
-
+ struct lima_job_fb_info *fb = &job->fb;
struct lima_context *ctx = job->ctx;
- struct lima_surface *surf = lima_surface(psurf);
- int level = psurf->u.tex.level;
- unsigned first_layer = psurf->u.tex.first_layer;
-
- uint32_t va;
- void *cpu = lima_job_create_stream_bo(
- job, LIMA_PIPE_PP, lima_reload_buffer_size, &va);
-
- struct lima_screen *screen = lima_screen(ctx->base.screen);
+ struct pipe_box src = {
+ .x = 0,
+ .y = 0,
+ .width = fb->width,
+ .height = fb->height,
+ };
- uint32_t reload_shader_first_instr_size =
- ((uint32_t *)(screen->pp_buffer->map + pp_reload_program_offset))[0] & 0x1f;
- uint32_t reload_shader_va = screen->pp_buffer->va + pp_reload_program_offset;
-
- struct lima_render_state reload_render_state = {
- .alpha_blend = 0xf03b1ad2,
- .depth_test = 0x0000000e,
- .depth_range = 0xffff0000,
- .stencil_front = 0x00000007,
- .stencil_back = 0x00000007,
- .multi_sample = 0x0000f007,
- .shader_address = reload_shader_va | reload_shader_first_instr_size,
- .varying_types = 0x00000001,
- .textures_address = va + lima_reload_tex_array_offset,
- .aux0 = 0x00004021,
- .varyings_address = va + lima_reload_varying_offset,
+ struct pipe_box dst = {
+ .x = 0,
+ .y = 0,
+ .width = fb->width,
+ .height = fb->height,
};
- if (util_format_is_depth_or_stencil(psurf->format)) {
- reload_render_state.alpha_blend &= 0x0fffffff;
- if (psurf->format != PIPE_FORMAT_Z16_UNORM)
- reload_render_state.depth_test |= 0x400;
- if (surf->reload & PIPE_CLEAR_DEPTH)
- reload_render_state.depth_test |= 0x801;
- if (surf->reload & PIPE_CLEAR_STENCIL) {
- reload_render_state.depth_test |= 0x1000;
- reload_render_state.stencil_front = 0x0000024f;
- reload_render_state.stencil_back = 0x0000024f;
- reload_render_state.stencil_test = 0x0000ffff;
+ if (ctx->framebuffer.base.samples > 1) {
+ for (int i = 0; i < LIMA_MAX_SAMPLES; i++) {
+ lima_pack_blit_cmd(job, &job->plbu_cmd_head,
+ psurf, &src, &dst,
+ PIPE_TEX_FILTER_NEAREST, false,
+ (1 << i), i);
}
+ } else {
+ lima_pack_blit_cmd(job, &job->plbu_cmd_head,
+ psurf, &src, &dst,
+ PIPE_TEX_FILTER_NEAREST, false,
+ 0xf, 0);
}
-
- memcpy(cpu + lima_reload_render_state_offset, &reload_render_state,
- sizeof(reload_render_state));
-
- lima_tex_desc *td = cpu + lima_reload_tex_desc_offset;
- memset(td, 0, lima_min_tex_desc_size);
- lima_texture_desc_set_res(ctx, td, psurf->texture, level, level, first_layer);
- td->format = lima_format_get_texel_reload(psurf->format);
- td->unnorm_coords = 1;
- td->texture_type = LIMA_TEXTURE_TYPE_2D;
- td->min_img_filter_nearest = 1;
- td->mag_img_filter_nearest = 1;
- td->wrap_s_clamp_to_edge = 1;
- td->wrap_t_clamp_to_edge = 1;
- td->unknown_2_2 = 0x1;
-
- uint32_t *ta = cpu + lima_reload_tex_array_offset;
- ta[0] = va + lima_reload_tex_desc_offset;
-
- struct lima_job_fb_info *fb = &job->fb;
- float reload_gl_pos[] = {
- fb->width, 0, 0, 1,
- 0, 0, 0, 1,
- 0, fb->height, 0, 1,
- };
- memcpy(cpu + lima_reload_gl_pos_offset, reload_gl_pos,
- sizeof(reload_gl_pos));
-
- float reload_varying[] = {
- fb->width, 0, 0, 0,
- 0, fb->height, 0, 0,
- };
- memcpy(cpu + lima_reload_varying_offset, reload_varying,
- sizeof(reload_varying));
-
- PLBU_CMD_BEGIN(&job->plbu_cmd_head, 20);
-
- PLBU_CMD_VIEWPORT_LEFT(0);
- PLBU_CMD_VIEWPORT_RIGHT(fui(fb->width));
- PLBU_CMD_VIEWPORT_BOTTOM(0);
- PLBU_CMD_VIEWPORT_TOP(fui(fb->height));
-
- PLBU_CMD_RSW_VERTEX_ARRAY(
- va + lima_reload_render_state_offset,
- va + lima_reload_gl_pos_offset);
-
- PLBU_CMD_UNKNOWN2();
- PLBU_CMD_UNKNOWN1();
-
- PLBU_CMD_INDICES(screen->pp_buffer->va + pp_shared_index_offset);
- PLBU_CMD_INDEXED_DEST(va + lima_reload_gl_pos_offset);
- PLBU_CMD_DRAW_ELEMENTS(0xf, 0, 3);
-
- PLBU_CMD_END();
-
- lima_dump_command_stream_print(job->dump, cpu, lima_reload_buffer_size,
- false, "reload plbu cmd at va %x\n", va);
}
static void
@@ -453,6 +399,9 @@ lima_pack_head_plbu_cmd(struct lima_job *job)
PLBU_CMD_BEGIN(&job->plbu_cmd_head, 10);
+ assert((fb->block_w & PLBU_BLOCK_W_MASK) == fb->block_w);
+ assert((fb->block_h & PLBU_BLOCK_H_MASK) == fb->block_h);
+
PLBU_CMD_UNKNOWN2();
PLBU_CMD_BLOCK_STEP(fb->shift_min, fb->shift_h, fb->shift_w);
PLBU_CMD_TILED_DIMENSIONS(fb->tiled_w, fb->tiled_h);
@@ -464,8 +413,9 @@ lima_pack_head_plbu_cmd(struct lima_job *job)
PLBU_CMD_END();
- if (lima_fb_cbuf_needs_reload(job))
+ if (lima_fb_cbuf_needs_reload(job)) {
lima_pack_reload_plbu_cmd(job, job->key.cbuf);
+ }
if (lima_fb_zsbuf_needs_reload(job))
lima_pack_reload_plbu_cmd(job, job->key.zsbuf);
@@ -543,7 +493,8 @@ lima_generate_pp_stream(struct lima_job *job, int off_x, int off_y,
struct lima_pp_stream_state *ps = &ctx->pp_stream;
struct lima_job_fb_info *fb = &job->fb;
struct lima_screen *screen = lima_screen(ctx->base.screen);
- int i, num_pp = screen->num_pp;
+ int num_pp = screen->num_pp;
+ assert(num_pp > 0);
/* use hilbert_coords to generates 1D to 2D relationship.
* 1D for pp stream index and 2D for plb block x/y on framebuffer.
@@ -565,10 +516,10 @@ lima_generate_pp_stream(struct lima_job *job, int off_x, int off_y,
count = 1 << (dim + dim);
}
- for (i = 0; i < num_pp; i++)
+ for (int i = 0; i < num_pp; i++)
stream[i] = ps->map + ps->offset[i];
- for (i = 0; i < count; i++) {
+ for (int i = 0; i < count; i++) {
int x, y;
hilbert_coords(max, i, &x, &y);
if (x < tiled_w && y < tiled_h) {
@@ -589,7 +540,7 @@ lima_generate_pp_stream(struct lima_job *job, int off_x, int off_y,
}
}
- for (i = 0; i < num_pp; i++) {
+ for (int i = 0; i < num_pp; i++) {
stream[i][si[i]++] = 0;
stream[i][si[i]++] = 0xBC000000;
stream[i][si[i]++] = 0;
@@ -800,7 +751,13 @@ lima_pack_wb_zsbuf_reg(struct lima_job *job, uint32_t *wb_reg, int wb_idx)
wb[wb_idx].pixel_layout = 0x0;
wb[wb_idx].pitch = res->levels[level].stride / 8;
}
- wb[wb_idx].mrt_bits = 0;
+ wb[wb_idx].flags = 0;
+ unsigned nr_samples = zsbuf->nr_samples ?
+ zsbuf->nr_samples : MAX2(1, zsbuf->texture->nr_samples);
+ if (nr_samples > 1) {
+ wb[wb_idx].mrt_pitch = res->mrt_pitch;
+ wb[wb_idx].mrt_bits = u_bit_consecutive(0, nr_samples);
+ }
}
static void
@@ -829,7 +786,13 @@ lima_pack_wb_cbuf_reg(struct lima_job *job, uint32_t *frame_reg,
wb[wb_idx].pixel_layout = 0x0;
wb[wb_idx].pitch = res->levels[level].stride / 8;
}
- wb[wb_idx].mrt_bits = swap_channels ? 0x4 : 0x0;
+ wb[wb_idx].flags = swap_channels ? 0x4 : 0x0;
+ unsigned nr_samples = cbuf->nr_samples ?
+ cbuf->nr_samples : MAX2(1, cbuf->texture->nr_samples);
+ if (nr_samples > 1) {
+ wb[wb_idx].mrt_pitch = res->mrt_pitch;
+ wb[wb_idx].mrt_bits = u_bit_consecutive(0, nr_samples);
+ }
}
static void
@@ -949,7 +912,7 @@ lima_do_job(struct lima_job *job)
fprintf(stderr, "gp job error\n");
if (job->dump) {
- if (lima_job_wait(job, LIMA_PIPE_GP, PIPE_TIMEOUT_INFINITE)) {
+ if (lima_job_wait(job, LIMA_PIPE_GP, OS_TIMEOUT_INFINITE)) {
if (ctx->gp_output) {
float *pos = lima_bo_map(ctx->gp_output);
lima_dump_command_stream_print(
@@ -1030,7 +993,7 @@ lima_do_job(struct lima_job *job)
}
if (job->dump) {
- if (!lima_job_wait(job, LIMA_PIPE_PP, PIPE_TIMEOUT_INFINITE)) {
+ if (!lima_job_wait(job, LIMA_PIPE_PP, OS_TIMEOUT_INFINITE)) {
fprintf(stderr, "pp wait error\n");
exit(1);
}
@@ -1111,6 +1074,14 @@ lima_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
}
}
+static void
+lima_texture_barrier(struct pipe_context *pctx, unsigned flags)
+{
+ struct lima_context *ctx = lima_context(pctx);
+
+ lima_flush(ctx);
+}
+
static bool
lima_job_compare(const void *s1, const void *s2)
{
@@ -1145,6 +1116,7 @@ bool lima_job_init(struct lima_context *ctx)
}
ctx->base.flush = lima_pipe_flush;
+ ctx->base.texture_barrier = lima_texture_barrier;
return true;
}
diff --git a/src/gallium/drivers/lima/lima_job.h b/src/gallium/drivers/lima/lima_job.h
index a43b8be1c10..0eb05a5378c 100644
--- a/src/gallium/drivers/lima/lima_job.h
+++ b/src/gallium/drivers/lima/lima_job.h
@@ -95,6 +95,9 @@ lima_job_has_draw_pending(struct lima_job *job)
}
struct lima_job *lima_job_get(struct lima_context *ctx);
+struct lima_job * lima_job_get_with_fb(struct lima_context *ctx,
+ struct pipe_surface *cbuf,
+ struct pipe_surface *zsbuf);
bool lima_job_add_bo(struct lima_job *job, int pipe,
struct lima_bo *bo, uint32_t flags);
diff --git a/src/gallium/drivers/lima/lima_parser.c b/src/gallium/drivers/lima/lima_parser.c
index b0a6c86a9e5..da8379975da 100644
--- a/src/gallium/drivers/lima/lima_parser.c
+++ b/src/gallium/drivers/lima/lima_parser.c
@@ -32,6 +32,9 @@
#include "lima_parser.h"
#include "lima_texture.h"
+#include "lima/ir/gp/codegen.h"
+#include "lima/ir/pp/codegen.h"
+
typedef struct {
char *info;
} render_state_info;
@@ -433,6 +436,35 @@ lima_parse_plbu(FILE *fp, uint32_t *data, int size, uint32_t start)
fprintf(fp, "\n");
}
+void
+lima_parse_shader(FILE *fp, uint32_t *data, int size, bool is_frag)
+{
+ uint32_t *value = &data[0];
+
+ if (is_frag) {
+ uint32_t *bin = value;
+ uint32_t offt = 0;
+ uint32_t next_instr_length = 0;
+
+ fprintf(fp, "/* ============ FS DISASSEMBLY BEGIN ============== */\n");
+
+ do {
+ ppir_codegen_ctrl *ctrl = (ppir_codegen_ctrl *)bin;
+ fprintf(fp, "@%6d: ", offt);
+ ppir_disassemble_instr(bin, offt, fp);
+ bin += ctrl->count;
+ offt += ctrl->count;
+ next_instr_length = ctrl->next_count;
+ } while (next_instr_length);
+
+ fprintf(fp, "/* ============ FS DISASSEMBLY END ================= */\n");
+ } else {
+ fprintf(fp, "/* ============ VS DISASSEMBLY BEGIN ============== */\n");
+ gpir_disassemble_program((gpir_codegen_instr *)value, size / sizeof(gpir_codegen_instr), fp);
+ fprintf(fp, "/* ============ VS DISASSEMBLY END ================= */\n");
+ }
+}
+
static void
parse_rsw(FILE *fp, uint32_t *value, int i, uint32_t *helper)
{
@@ -489,7 +521,11 @@ parse_rsw(FILE *fp, uint32_t *value, int i, uint32_t *helper)
if (*value & 0x1000)
fprintf(fp, ", shader writes stencil");
fprintf(fp, " */\n\t\t\t\t\t\t/* %s(3)", render_state_infos[i].info);
- fprintf(fp, ": unknown bits 4-9: 0x%08x", *value & 0x000003f0);
+ if ((*value & 0x00000010) == 0x00000010)
+ fprintf(fp, ": ignore depth clip near");
+ if ((*value & 0x00000020) == 0x00000020)
+ fprintf(fp, ", ignore depth clip far");
+ fprintf(fp, ", register for gl_FragDepth: $%d", (*value & 0x000003c0) >> 6);
fprintf(fp, ", unknown bits 13-15: 0x%08x */\n", *value & 0x00000e000);
break;
case 4: /* DEPTH RANGE */
@@ -553,12 +589,27 @@ parse_rsw(FILE *fp, uint32_t *value, int i, uint32_t *helper)
fprintf(fp, ": unknown");
if ((*value & 0x00000078) == 0x00000068)
- fprintf(fp, ", fb_samples */\n");
+ fprintf(fp, ", msaa */\n");
else if ((*value & 0x00000078) == 0x00000000)
fprintf(fp, " */\n");
else
- fprintf(fp, ", UNKNOWN\n");
- fprintf(fp, "\t\t\t\t\t\t/* %s(2)", render_state_infos[i].info);
+ fprintf(fp, ", UNKNOWN */\n");
+
+ fprintf(fp, "\t\t\t\t\t\t/* %s(3)", render_state_infos[i].info);
+ fprintf(fp, ": sample_mask: 0x%.x", ((*value & 0xf000) >> 12));
+ if ((*value & (1 << 7)))
+ fprintf(fp, ", alpha_to_coverage");
+ if ((*value & (1 << 8)))
+ fprintf(fp, ", alpha_to_one");
+ fprintf(fp, " */\n");
+
+ fprintf(fp, "\t\t\t\t\t\t/* %s(4)", render_state_infos[i].info);
+ fprintf(fp, ", register for gl_FragColor: $%d $%d $%d $%d */\n",
+ (*value & 0xf0000000) >> 28,
+ (*value & 0x0f000000) >> 24,
+ (*value & 0x00f00000) >> 20,
+ (*value & 0x000f0000) >> 16);
+ fprintf(fp, "\t\t\t\t\t\t/* %s(3)", render_state_infos[i].info);
fprintf(fp, ": alpha_test_func: %d (%s) */\n",
(*value & 0x00000007),
lima_get_compare_func_string((*value & 0x00000007))); /* alpha_test_func */
@@ -631,12 +682,23 @@ parse_rsw(FILE *fp, uint32_t *value, int i, uint32_t *helper)
if ((*value & 0x00002000) == 0x00002000) /* bit 13 unknown */
fprintf(fp, ", bit 13 set");
+
+ fprintf(fp, " */\n");
+ fprintf(fp, "\n\t\t\t\t\t\t/* %s(3):", render_state_infos[i].info);
+ fprintf(fp, " register for gl_SecondaryFragColor: $%d",
+ (*value & 0xf0000000) >> 28);
fprintf(fp, " */\n");
break;
case 14: /* AUX1 */
fprintf(fp, ": ");
if ((*value & 0x00002000) == 0x00002000)
fprintf(fp, "blend->base.dither true, ");
+
+ if ((*value & 0x00001000) == 0x00001000)
+ fprintf(fp, "glFrontFace(GL_CCW), ");
+ else
+ fprintf(fp, "glFrontFace(GL_CW), ");
+
if ((*value & 0x00010000) == 0x00010000)
fprintf(fp, "ctx->const_buffer[PIPE_SHADER_FRAGMENT].buffer true ");
fprintf(fp, "*/\n");
@@ -686,14 +748,16 @@ parse_texture(FILE *fp, uint32_t *data, uint32_t start, uint32_t offset)
fprintf(fp, "\t stride: 0x%x (%d)\n", desc->stride, desc->stride);
fprintf(fp, "\t unknown_0_2: 0x%x (%d)\n", desc->unknown_0_2, desc->unknown_0_2);
- /* Word 1 - 3 */
- fprintf(fp, "/* 0x%08x (0x%08x) */\t0x%08x 0x%08x 0x%08x\n",
- start + i * 4, i * 4, *(&data[i + offset]), *(&data[i + 1 + offset]), *(&data[i + 2 + offset]));
- i += 3;
+ /* Word 1 - 5 */
+ fprintf(fp, "/* 0x%08x (0x%08x) */\t0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
+ start + i * 4, i * 4, *(&data[i + offset]), *(&data[i + 1 + offset]),
+ *(&data[i + 2 + offset]), *(&data[i + 3 + offset]), *(&data[i + 4 + offset]));
+ i += 5;
fprintf(fp, "\t unknown_1_1: 0x%x (%d)\n", desc->unknown_1_1, desc->unknown_1_1);
fprintf(fp, "\t unnorm_coords: 0x%x (%d)\n", desc->unnorm_coords, desc->unnorm_coords);
fprintf(fp, "\t unknown_1_2: 0x%x (%d)\n", desc->unknown_1_2, desc->unknown_1_2);
- fprintf(fp, "\t texture_type: 0x%x (%d)\n", desc->texture_type, desc->texture_type);
+ fprintf(fp, "\t cube_map: 0x%x (%d)\n", desc->cube_map, desc->cube_map);
+ fprintf(fp, "\t sampler_dim: 0x%x (%d)\n", desc->sampler_dim, desc->sampler_dim);
fprintf(fp, "\t min_lod: 0x%x (%d) (%f)\n", desc->min_lod, desc->min_lod, lima_fixed8_to_float(desc->min_lod));
fprintf(fp, "\t max_lod: 0x%x (%d) (%f)\n", desc->max_lod, desc->max_lod, lima_fixed8_to_float(desc->max_lod));
fprintf(fp, "\t lod_bias: 0x%x (%d) (%f)\n", desc->lod_bias, desc->lod_bias, lima_fixed8_to_float(desc->lod_bias));
@@ -702,29 +766,20 @@ parse_texture(FILE *fp, uint32_t *data, uint32_t start, uint32_t offset)
fprintf(fp, "\t min_mipfilter_2: 0x%x (%d)\n", desc->min_mipfilter_2, desc->min_mipfilter_2);
fprintf(fp, "\t min_img_filter_nearest: 0x%x (%d)\n", desc->min_img_filter_nearest, desc->min_img_filter_nearest);
fprintf(fp, "\t mag_img_filter_nearest: 0x%x (%d)\n", desc->mag_img_filter_nearest, desc->mag_img_filter_nearest);
- fprintf(fp, "\t wrap_s_clamp_to_edge: 0x%x (%d)\n", desc->wrap_s_clamp_to_edge, desc->wrap_s_clamp_to_edge);
- fprintf(fp, "\t wrap_s_clamp: 0x%x (%d)\n", desc->wrap_s_clamp, desc->wrap_s_clamp);
- fprintf(fp, "\t wrap_s_mirror_repeat: 0x%x (%d)\n", desc->wrap_s_mirror_repeat, desc->wrap_s_mirror_repeat);
- fprintf(fp, "\t wrap_t_clamp_to_edge: 0x%x (%d)\n", desc->wrap_t_clamp_to_edge, desc->wrap_t_clamp_to_edge);
- fprintf(fp, "\t wrap_t_clamp: 0x%x (%d)\n", desc->wrap_t_clamp, desc->wrap_t_clamp);
- fprintf(fp, "\t wrap_t_mirror_repeat: 0x%x (%d)\n", desc->wrap_t_mirror_repeat, desc->wrap_t_mirror_repeat);
- fprintf(fp, "\t unknown_2_2: 0x%x (%d)\n", desc->unknown_2_2, desc->unknown_2_2);
+ fprintf(fp, "\t wrap_s: %d (%s)\n", desc->wrap_s,
+ lima_get_wrap_mode_string(desc->wrap_s));
+ fprintf(fp, "\t wrap_t: %d (%s)\n", desc->wrap_t,
+ lima_get_wrap_mode_string(desc->wrap_t));
+ fprintf(fp, "\t wrap_r: %d (%s)\n", desc->wrap_r,
+ lima_get_wrap_mode_string(desc->wrap_r));
fprintf(fp, "\t width: 0x%x (%d)\n", desc->width, desc->width);
fprintf(fp, "\t height: 0x%x (%d)\n", desc->height, desc->height);
- fprintf(fp, "\t unknown_3_1: 0x%x (%d)\n", desc->unknown_3_1, desc->unknown_3_1);
- fprintf(fp, "\t unknown_3_2: 0x%x (%d)\n", desc->unknown_3_2, desc->unknown_3_2);
-
- /* Word 4 */
- fprintf(fp, "/* 0x%08x (0x%08x) */\t0x%08x\n",
- start + i * 4, i * 4, *(&data[i + offset]));
- i++;
- fprintf(fp, "\t unknown_4: 0x%x (%d)\n", desc->unknown_4, desc->unknown_4);
-
- /* Word 5 */
- fprintf(fp, "/* 0x%08x (0x%08x) */\t0x%08x\n",
- start + i * 4, i * 4, *(&data[i + offset]));
- i++;
- fprintf(fp, "\t unknown_5: 0x%x (%d)\n", desc->unknown_5, desc->unknown_5);
+ fprintf(fp, "\t depth: 0x%x (%d)\n", desc->depth, desc->depth);
+ fprintf(fp, "\t border_red: 0x%x (%d)\n", desc->border_red, desc->border_red);
+ fprintf(fp, "\t border_green: 0x%x (%d)\n", desc->border_green, desc->border_green);
+ fprintf(fp, "\t border_blue: 0x%x (%d)\n", desc->border_blue, desc->border_blue);
+ fprintf(fp, "\t border_alpha: 0x%x (%d)\n", desc->border_alpha, desc->border_alpha);
+ fprintf(fp, "\t unknown_5_1: 0x%x (%d)\n", desc->unknown_5_1, desc->unknown_5_1);
/* Word 6 - */
fprintf(fp, "/* 0x%08x (0x%08x) */",
diff --git a/src/gallium/drivers/lima/lima_parser.h b/src/gallium/drivers/lima/lima_parser.h
index 2378cfc02db..eed7926fb5e 100644
--- a/src/gallium/drivers/lima/lima_parser.h
+++ b/src/gallium/drivers/lima/lima_parser.h
@@ -61,7 +61,7 @@ static const char *PIPE_BLENDFACTOR_STRING[] = {
"CONST_COLOR", /* 2 */
"ZERO", /* 3 */
"UNKNOWN_4", /* 4 */
- "UNKNOWN_5", /* 5 */
+ "SRC2_COLOR", /* 5 */
"UNKNOWN_6", /* 6 */
"SRC_ALPHA_SAT", /* 7 */
"INV_SRC_COLOR", /* 8 */
@@ -69,7 +69,7 @@ static const char *PIPE_BLENDFACTOR_STRING[] = {
"INV_CONST_COLOR", /* 10 */
"ONE", /* 11 */
"UNKNOWN_12", /* 12 */
- "UNKNOWN_13", /* 13 */
+ "INV_SRC2_COLOR", /* 13 */
"UNKNOWN_14", /* 14 */
"UNKNOWN_15", /* 15 */
"SRC_ALPHA", /* 16 */
@@ -77,13 +77,26 @@ static const char *PIPE_BLENDFACTOR_STRING[] = {
"CONST_ALPHA", /* 18 */
"UNKNOWN_19", /* 19 */
"UNKNOWN_20", /* 20 */
- "UNKNOWN_21", /* 21 */
+ "SRC2_ALPHA", /* 21 */
"UNKNOWN_22", /* 22 */
"UNKNOWN_23", /* 23 */
"INV_SRC_ALPHA", /* 24 */
"INV_DST_ALPHA", /* 25 */
"INV_CONST_ALPHA", /* 26 */
+ "UNKNOWN_27", /* 27 */
+ "UNKNOWN_28", /* 28 */
+ "INV_SRC2_ALPHA", /* 29 */
+};
+static const char *LIMA_WRAP_MODE_STRING[] = {
+ "TEX_WRAP_REPEAT", /* 0 */
+ "TEX_WRAP_CLAMP_TO_EDGE", /* 1 */
+ "TEX_WRAP_CLAMP", /* 2 */
+ "TEX_WRAP_CLAMP_TO_BORDER", /* 3 */
+ "TEX_WRAP_MIRROR_REPEAT", /* 4 */
+ "TEX_WRAP_MIRROR_CLAMP_TO_EDGE", /* 5 */
+ "TEX_WRAP_MIRROR_CLAMP", /* 6 */
+ "TEX_WRAP_MIRROR_CLAMP_TO_BORDER", /* 7 */
};
static inline const char
@@ -118,6 +131,15 @@ static inline const char
return "UNKNOWN";
}
+static inline const char
+*lima_get_wrap_mode_string(int mode) {
+ if ((mode >= 0) && (mode <= 7))
+ return LIMA_WRAP_MODE_STRING[mode];
+ else
+ return "UNKNOWN";
+}
+
+void lima_parse_shader(FILE *fp, uint32_t *data, int size, bool is_frag);
void lima_parse_vs(FILE *fp, uint32_t *data, int size, uint32_t start);
void lima_parse_plbu(FILE *fp, uint32_t *data, int size, uint32_t start);
void lima_parse_render_state(FILE *fp, uint32_t *data, int size, uint32_t start);
diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c
index a4d9b8a0ef9..0ef3ebbfc98 100644
--- a/src/gallium/drivers/lima/lima_program.c
+++ b/src/gallium/drivers/lima/lima_program.c
@@ -30,6 +30,7 @@
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_serialize.h"
#include "nir/tgsi_to_nir.h"
+#include "nir_legacy.h"
#include "pipe/p_state.h"
@@ -56,12 +57,14 @@ static const nir_shader_compiler_options vs_nir_options = {
/* could be implemented by clamp */
.lower_fsat = true,
.lower_bitops = true,
- .lower_rotate = true,
.lower_sincos = true,
.lower_fceil = true,
.lower_insert_byte = true,
.lower_insert_word = true,
- .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
+ .force_indirect_unrolling = nir_var_all,
+ .force_indirect_unrolling_sampler = true,
+ .lower_varying_from_uniform = true,
+ .max_unroll_iterations = 32,
};
static const nir_shader_compiler_options fs_nir_options = {
@@ -74,14 +77,16 @@ static const nir_shader_compiler_options fs_nir_options = {
.lower_flrp32 = true,
.lower_flrp64 = true,
.lower_fsign = true,
- .lower_rotate = true,
.lower_fdot = true,
.lower_fdph = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_bitops = true,
.lower_vector_cmp = true,
- .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
+ .force_indirect_unrolling = (nir_var_shader_out | nir_var_function_temp),
+ .force_indirect_unrolling_sampler = true,
+ .lower_varying_from_uniform = true,
+ .max_unroll_iterations = 32,
};
const void *
@@ -115,7 +120,7 @@ lima_program_optimize_vs_nir(struct nir_shader *s)
NIR_PASS_V(s, nir_lower_load_const_to_scalar);
NIR_PASS_V(s, lima_nir_lower_uniform_to_scalar);
NIR_PASS_V(s, nir_lower_io_to_scalar,
- nir_var_shader_in|nir_var_shader_out);
+ nir_var_shader_in|nir_var_shader_out, NULL, NULL);
do {
progress = false;
@@ -133,19 +138,21 @@ lima_program_optimize_vs_nir(struct nir_shader *s)
NIR_PASS(progress, s, lima_nir_lower_ftrunc);
NIR_PASS(progress, s, nir_opt_constant_folding);
NIR_PASS(progress, s, nir_opt_undef);
+ NIR_PASS(progress, s, nir_lower_undef_to_zero);
NIR_PASS(progress, s, nir_opt_loop_unroll);
+ NIR_PASS(progress, s, nir_lower_undef_to_zero);
} while (progress);
NIR_PASS_V(s, nir_lower_int_to_float);
/* int_to_float pass generates ftrunc, so lower it */
NIR_PASS(progress, s, lima_nir_lower_ftrunc);
- NIR_PASS_V(s, nir_lower_bool_to_float);
+ NIR_PASS_V(s, nir_lower_bool_to_float, true);
NIR_PASS_V(s, nir_copy_prop);
NIR_PASS_V(s, nir_opt_dce);
NIR_PASS_V(s, lima_nir_split_loads);
- NIR_PASS_V(s, nir_lower_locals_to_regs);
NIR_PASS_V(s, nir_convert_from_ssa, true);
+ NIR_PASS_V(s, nir_opt_dce);
NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
nir_sweep(s);
}
@@ -159,6 +166,10 @@ lima_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data)
nir_alu_instr *alu = nir_instr_as_alu(instr);
switch (alu->op) {
case nir_op_frcp:
+ /* nir_op_idiv is lowered to frcp by lower_int_to_floats which
+ * will be run later, so lower idiv here
+ */
+ case nir_op_idiv:
case nir_op_frsq:
case nir_op_flog2:
case nir_op_fexp2:
@@ -183,7 +194,7 @@ lima_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data)
return false;
}
- int num_components = nir_dest_num_components(alu->dest.dest);
+ int num_components = alu->def.num_components;
uint8_t swizzle = alu->src[0].swizzle[0];
@@ -195,7 +206,7 @@ lima_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data)
}
static bool
-lima_vec_to_movs_filter_cb(const nir_instr *instr, unsigned writemask,
+lima_vec_to_regs_filter_cb(const nir_instr *instr, unsigned writemask,
const void *data)
{
assert(writemask > 0);
@@ -214,8 +225,8 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
NIR_PASS_V(s, nir_lower_fragcoord_wtrans);
NIR_PASS_V(s, nir_lower_io,
nir_var_shader_in | nir_var_shader_out, type_size, 0);
- NIR_PASS_V(s, nir_lower_regs_to_ssa);
NIR_PASS_V(s, nir_lower_tex, tex_options);
+ NIR_PASS_V(s, lima_nir_lower_txp);
do {
progress = false;
@@ -241,7 +252,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
} while (progress);
NIR_PASS_V(s, nir_lower_int_to_float);
- NIR_PASS_V(s, nir_lower_bool_to_float);
+ NIR_PASS_V(s, nir_lower_bool_to_float, true);
/* Some ops must be lowered after being converted from int ops,
* so re-run nir_opt_algebraic after int lowering. */
@@ -253,23 +264,23 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
/* Must be run after optimization loop */
NIR_PASS_V(s, lima_nir_scale_trig);
- /* Lower modifiers */
- NIR_PASS_V(s, nir_lower_to_source_mods, nir_lower_all_source_mods);
NIR_PASS_V(s, nir_copy_prop);
NIR_PASS_V(s, nir_opt_dce);
- NIR_PASS_V(s, nir_lower_locals_to_regs);
NIR_PASS_V(s, nir_convert_from_ssa, true);
NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
- NIR_PASS_V(s, nir_move_vec_src_uses_to_dest);
- NIR_PASS_V(s, nir_lower_vec_to_movs, lima_vec_to_movs_filter_cb, NULL);
+ NIR_PASS_V(s, nir_move_vec_src_uses_to_dest, false);
+ NIR_PASS_V(s, nir_lower_vec_to_regs, lima_vec_to_regs_filter_cb, NULL);
+
NIR_PASS_V(s, nir_opt_dce); /* clean up any new dead code from vec to movs */
NIR_PASS_V(s, lima_nir_duplicate_load_uniforms);
NIR_PASS_V(s, lima_nir_duplicate_load_inputs);
NIR_PASS_V(s, lima_nir_duplicate_load_consts);
+ NIR_PASS_V(s, nir_legacy_trivialize, true);
+
nir_sweep(s);
}
@@ -283,8 +294,8 @@ lima_fs_compile_shader(struct lima_context *ctx,
nir_shader *nir = nir_shader_clone(fs, ufs->base.ir.nir);
struct nir_lower_tex_options tex_options = {
- .lower_txp = ~0u,
.swizzle_result = ~0u,
+ .lower_invalid_implicit_lod = true,
};
for (int i = 0; i < ARRAY_SIZE(key->tex); i++) {
@@ -297,7 +308,7 @@ lima_fs_compile_shader(struct lima_context *ctx,
if (lima_debug & LIMA_DEBUG_PP)
nir_print_shader(nir, stdout);
- if (!ppir_compile_nir(fs, nir, screen->pp_ra, &ctx->debug)) {
+ if (!ppir_compile_nir(fs, nir, screen->pp_ra, &ctx->base.debug)) {
ralloc_free(nir);
return false;
}
@@ -312,15 +323,25 @@ static bool
lima_fs_upload_shader(struct lima_context *ctx,
struct lima_fs_compiled_shader *fs)
{
+ static const uint32_t pp_clear_program[] = {
+ PP_CLEAR_PROGRAM
+ };
+ int shader_size = sizeof(pp_clear_program);
+ void *shader = (void *)pp_clear_program;
struct lima_screen *screen = lima_screen(ctx->base.screen);
- fs->bo = lima_bo_create(screen, fs->state.shader_size, 0);
+ if (fs->state.shader_size) {
+ shader_size = fs->state.shader_size;
+ shader = fs->shader;
+ }
+
+ fs->bo = lima_bo_create(screen, shader_size, 0);
if (!fs->bo) {
fprintf(stderr, "lima: create fs shader bo fail\n");
return false;
}
- memcpy(lima_bo_map(fs->bo), fs->shader, fs->state.shader_size);
+ memcpy(lima_bo_map(fs->bo), shader, shader_size);
return true;
}
@@ -473,7 +494,7 @@ lima_vs_compile_shader(struct lima_context *ctx,
if (lima_debug & LIMA_DEBUG_GP)
nir_print_shader(nir, stdout);
- if (!gpir_compile_nir(vs, nir, &ctx->debug)) {
+ if (!gpir_compile_nir(vs, nir, &ctx->base.debug)) {
ralloc_free(nir);
return false;
}
@@ -591,15 +612,19 @@ lima_update_fs_state(struct lima_context *ctx)
memcpy(key->nir_sha1, ctx->uncomp_fs->nir_sha1,
sizeof(ctx->uncomp_fs->nir_sha1));
+ uint8_t identity[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
+ PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W };
for (int i = 0; i < lima_tex->num_textures; i++) {
struct lima_sampler_view *sampler = lima_sampler_view(lima_tex->textures[i]);
+ if (!sampler) {
+ memcpy(key->tex[i].swizzle, identity, 4);
+ continue;
+ }
for (int j = 0; j < 4; j++)
key->tex[i].swizzle[j] = sampler->swizzle[j];
}
/* Fill rest with identity swizzle */
- uint8_t identity[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
- PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W };
for (int i = lima_tex->num_textures; i < ARRAY_SIZE(key->tex); i++)
memcpy(key->tex[i].swizzle, identity, 4);
diff --git a/src/gallium/drivers/lima/lima_resource.c b/src/gallium/drivers/lima/lima_resource.c
index 946edc86df8..1e263e95e57 100644
--- a/src/gallium/drivers/lima/lima_resource.c
+++ b/src/gallium/drivers/lima/lima_resource.c
@@ -28,8 +28,10 @@
#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_debug.h"
+#include "util/u_resource.h"
#include "util/u_transfer.h"
#include "util/u_surface.h"
+#include "util/u_transfer_helper.h"
#include "util/hash_table.h"
#include "util/ralloc.h"
#include "util/u_drm.h"
@@ -45,6 +47,7 @@
#include "lima_resource.h"
#include "lima_bo.h"
#include "lima_util.h"
+#include "lima_blit.h"
#include "pan_minmax_cache.h"
#include "pan_tiling.h"
@@ -57,7 +60,10 @@ lima_resource_create_scanout(struct pipe_screen *pscreen,
struct lima_screen *screen = lima_screen(pscreen);
struct renderonly_scanout *scanout;
struct winsys_handle handle;
- struct pipe_resource *pres;
+
+ struct lima_resource *res = CALLOC_STRUCT(lima_resource);
+ if (!res)
+ return NULL;
struct pipe_resource scanout_templat = *templat;
scanout_templat.width0 = width;
@@ -69,32 +75,44 @@ lima_resource_create_scanout(struct pipe_screen *pscreen,
if (!scanout)
return NULL;
+ res->base = *templat;
+ res->base.screen = pscreen;
+ pipe_reference_init(&res->base.reference, 1);
+ res->levels[0].offset = handle.offset;
+ res->levels[0].stride = handle.stride;
+
assert(handle.type == WINSYS_HANDLE_TYPE_FD);
- pres = pscreen->resource_from_handle(pscreen, templat, &handle,
- PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE);
+ res->bo = lima_bo_import(screen, &handle);
+ if (!res->bo) {
+ FREE(res);
+ return NULL;
+ }
+
+ res->modifier_constant = true;
close(handle.handle);
- if (!pres) {
+ if (!res->bo) {
renderonly_scanout_destroy(scanout, screen->ro);
+ FREE(res);
return NULL;
}
- struct lima_resource *res = lima_resource(pres);
res->scanout = scanout;
- return pres;
+ return &res->base;
}
static uint32_t
setup_miptree(struct lima_resource *res,
unsigned width0, unsigned height0,
- bool should_align_dimensions)
+ bool align_to_tile)
{
struct pipe_resource *pres = &res->base;
unsigned level;
unsigned width = width0;
unsigned height = height0;
unsigned depth = pres->depth0;
+ unsigned nr_samples = MAX2(pres->nr_samples, 1);
uint32_t size = 0;
for (level = 0; level <= pres->last_level; level++) {
@@ -103,7 +121,7 @@ setup_miptree(struct lima_resource *res,
unsigned aligned_width;
unsigned aligned_height;
- if (should_align_dimensions) {
+ if (align_to_tile) {
aligned_width = align(width, 16);
aligned_height = align(height, 16);
} else {
@@ -116,7 +134,6 @@ setup_miptree(struct lima_resource *res,
util_format_get_nblocksy(pres->format, aligned_height) *
pres->array_size * depth;
- res->levels[level].width = aligned_width;
res->levels[level].stride = stride;
res->levels[level].offset = size;
res->levels[level].layer_stride = util_format_get_stride(pres->format, align(width, 16)) * align(height, 16);
@@ -124,19 +141,18 @@ setup_miptree(struct lima_resource *res,
if (util_format_is_compressed(pres->format))
res->levels[level].layer_stride /= 4;
- /* The start address of each level except the last level
- * must be 64-aligned in order to be able to pass the
- * addresses to the hardware. */
- if (level != pres->last_level)
- size += align(actual_level_size, 64);
- else
- size += actual_level_size; /* Save some memory */
+ size += align(actual_level_size, 64);
width = u_minify(width, 1);
height = u_minify(height, 1);
depth = u_minify(depth, 1);
}
+ if (nr_samples > 1)
+ res->mrt_pitch = size;
+
+ size *= nr_samples;
+
return size;
}
@@ -144,7 +160,7 @@ static struct pipe_resource *
lima_resource_create_bo(struct pipe_screen *pscreen,
const struct pipe_resource *templat,
unsigned width, unsigned height,
- bool should_align_dimensions)
+ bool align_to_tile)
{
struct lima_screen *screen = lima_screen(pscreen);
struct lima_resource *res;
@@ -160,7 +176,7 @@ lima_resource_create_bo(struct pipe_screen *pscreen,
pres = &res->base;
- uint32_t size = setup_miptree(res, width, height, should_align_dimensions);
+ uint32_t size = setup_miptree(res, width, height, align_to_tile);
size = align(size, LIMA_PAGE_SIZE);
res->bo = lima_bo_create(screen, size, 0);
@@ -181,8 +197,8 @@ _lima_resource_create_with_modifiers(struct pipe_screen *pscreen,
struct lima_screen *screen = lima_screen(pscreen);
bool should_tile = lima_debug & LIMA_DEBUG_NO_TILING ? false : true;
unsigned width, height;
- bool should_align_dimensions;
bool has_user_modifiers = true;
+ bool align_to_tile = false;
if (count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID)
has_user_modifiers = false;
@@ -203,24 +219,25 @@ _lima_resource_create_with_modifiers(struct pipe_screen *pscreen,
modifiers, count))
should_tile = false;
- if (should_tile || (templat->bind & PIPE_BIND_RENDER_TARGET) ||
- (templat->bind & PIPE_BIND_DEPTH_STENCIL)) {
- should_align_dimensions = true;
- width = align(templat->width0, 16);
- height = align(templat->height0, 16);
- }
- else {
- should_align_dimensions = false;
- width = templat->width0;
- height = templat->height0;
+ width = templat->width0;
+ height = templat->height0;
+
+ /* Don't align index, vertex or constant buffers */
+ if (!(templat->bind & (PIPE_BIND_INDEX_BUFFER |
+ PIPE_BIND_VERTEX_BUFFER |
+ PIPE_BIND_CONSTANT_BUFFER))) {
+ if (templat->bind & PIPE_BIND_SHARED) {
+ width = align(width, 16);
+ height = align(height, 16);
+ }
+ align_to_tile = true;
}
struct pipe_resource *pres;
if (screen->ro && (templat->bind & PIPE_BIND_SCANOUT))
pres = lima_resource_create_scanout(pscreen, templat, width, height);
else
- pres = lima_resource_create_bo(pscreen, templat, width, height,
- should_align_dimensions);
+ pres = lima_resource_create_bo(pscreen, templat, width, height, align_to_tile);
if (pres) {
struct lima_resource *res = lima_resource(pres);
@@ -346,12 +363,11 @@ lima_resource_from_handle(struct pipe_screen *pscreen,
/* check alignment for the buffer */
if (res->tiled ||
(pres->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL))) {
- unsigned width, height, stride, size;
+ unsigned width, stride, size;
width = align(pres->width0, 16);
- height = align(pres->height0, 16);
stride = util_format_get_stride(pres->format, width);
- size = util_format_get_2d_size(pres->format, stride, height);
+ size = util_format_get_2d_size(pres->format, stride, pres->height0);
if (res->tiled && res->levels[0].stride != stride) {
fprintf(stderr, "tiled imported buffer has mismatching stride: %d (BO) != %d (expected)",
@@ -375,11 +391,7 @@ lima_resource_from_handle(struct pipe_screen *pscreen,
(res->bo->size - res->levels[0].offset), size);
goto err_out;
}
-
- res->levels[0].width = width;
}
- else
- res->levels[0].width = pres->width0;
if (screen->ro) {
/* Make sure that renderonly has a handle to our buffer in the
@@ -435,7 +447,8 @@ lima_resource_get_param(struct pipe_screen *pscreen,
enum pipe_resource_param param,
unsigned usage, uint64_t *value)
{
- struct lima_resource *res = lima_resource(pres);
+ struct lima_resource *res =
+ (struct lima_resource *)util_resource_at_index(pres, plane);
switch (param) {
case PIPE_RESOURCE_PARAM_STRIDE:
@@ -449,7 +462,9 @@ lima_resource_get_param(struct pipe_screen *pscreen,
*value = DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED;
else
*value = DRM_FORMAT_MOD_LINEAR;
-
+ return true;
+ case PIPE_RESOURCE_PARAM_NPLANES:
+ *value = util_resource_num(pres);
return true;
default:
return false;
@@ -543,18 +558,6 @@ lima_resource_set_damage_region(struct pipe_screen *pscreen,
damage->num_region = nrects;
}
-void
-lima_resource_screen_init(struct lima_screen *screen)
-{
- screen->base.resource_create = lima_resource_create;
- screen->base.resource_create_with_modifiers = lima_resource_create_with_modifiers;
- screen->base.resource_from_handle = lima_resource_from_handle;
- screen->base.resource_destroy = lima_resource_destroy;
- screen->base.resource_get_handle = lima_resource_get_handle;
- screen->base.resource_get_param = lima_resource_get_param;
- screen->base.set_damage_region = lima_resource_set_damage_region;
-}
-
static struct pipe_surface *
lima_surface_create(struct pipe_context *pctx,
struct pipe_resource *pres,
@@ -577,6 +580,7 @@ lima_surface_create(struct pipe_context *pctx,
psurf->format = surf_tmpl->format;
psurf->width = u_minify(pres->width0, level);
psurf->height = u_minify(pres->height0, level);
+ psurf->nr_samples = surf_tmpl->nr_samples;
psurf->u.tex.level = level;
psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
@@ -651,17 +655,16 @@ lima_transfer_map(struct pipe_context *pctx,
unsigned op = usage & PIPE_MAP_WRITE ?
LIMA_GEM_WAIT_WRITE : LIMA_GEM_WAIT_READ;
- lima_bo_wait(bo, op, PIPE_TIMEOUT_INFINITE);
+ lima_bo_wait(bo, op, OS_TIMEOUT_INFINITE);
}
if (!lima_bo_map(bo))
return NULL;
- trans = slab_alloc(&ctx->transfer_pool);
+ trans = slab_zalloc(&ctx->transfer_pool);
if (!trans)
return NULL;
- memset(trans, 0, sizeof(*trans));
ptrans = &trans->base;
pipe_resource_reference(&ptrans->resource, pres);
@@ -678,6 +681,10 @@ lima_transfer_map(struct pipe_context *pctx,
trans->staging = malloc(ptrans->stride * ptrans->box.height * ptrans->box.depth);
if (usage & PIPE_MAP_READ) {
+ unsigned line_stride = res->levels[level].stride;
+ unsigned row_height = util_format_is_compressed(pres->format) ? 4 : 16;
+ unsigned row_stride = line_stride * row_height;
+
unsigned i;
for (i = 0; i < ptrans->box.depth; i++)
panfrost_load_tiled_image(
@@ -686,7 +693,7 @@ lima_transfer_map(struct pipe_context *pctx,
ptrans->box.x, ptrans->box.y,
ptrans->box.width, ptrans->box.height,
ptrans->stride,
- res->levels[level].stride,
+ row_stride,
pres->format);
}
@@ -701,7 +708,7 @@ lima_transfer_map(struct pipe_context *pctx,
ptrans->layer_stride = res->levels[level].layer_stride;
if ((usage & PIPE_MAP_WRITE) && (usage & PIPE_MAP_DIRECTLY))
- panfrost_minmax_cache_invalidate(res->index_cache, ptrans);
+ panfrost_minmax_cache_invalidate(res->index_cache, ptrans->box.x, ptrans->box.width);
return bo->map + res->levels[level].offset +
box->z * res->levels[level].layer_stride +
@@ -711,14 +718,6 @@ lima_transfer_map(struct pipe_context *pctx,
}
}
-static void
-lima_transfer_flush_region(struct pipe_context *pctx,
- struct pipe_transfer *ptrans,
- const struct pipe_box *box)
-{
-
-}
-
static bool
lima_should_convert_linear(struct lima_resource *res,
struct pipe_transfer *ptrans)
@@ -752,9 +751,11 @@ lima_should_convert_linear(struct lima_resource *res,
}
static void
-lima_transfer_unmap_inner(struct lima_context *ctx,
- struct pipe_transfer *ptrans)
+lima_transfer_flush_region(struct pipe_context *pctx,
+ struct pipe_transfer *ptrans,
+ const struct pipe_box *box)
{
+ struct lima_context *ctx = lima_context(pctx);
struct lima_resource *res = lima_resource(ptrans->resource);
struct lima_transfer *trans = lima_transfer(ptrans);
struct lima_bo *bo = res->bo;
@@ -784,13 +785,17 @@ lima_transfer_unmap_inner(struct lima_context *ctx,
/* Update texture descriptor */
ctx->dirty |= LIMA_CONTEXT_DIRTY_TEXTURES;
} else {
+ unsigned line_stride = res->levels[ptrans->level].stride;
+ unsigned row_height = util_format_is_compressed(pres->format) ? 4 : 16;
+ unsigned row_stride = line_stride * row_height;
+
for (i = 0; i < trans->base.box.depth; i++)
panfrost_store_tiled_image(
bo->map + res->levels[trans->base.level].offset + (i + trans->base.box.z) * res->levels[trans->base.level].layer_stride,
trans->staging + i * ptrans->stride * ptrans->box.height,
ptrans->box.x, ptrans->box.y,
ptrans->box.width, ptrans->box.height,
- res->levels[ptrans->level].stride,
+ row_stride,
ptrans->stride,
pres->format);
}
@@ -806,10 +811,14 @@ lima_transfer_unmap(struct pipe_context *pctx,
struct lima_transfer *trans = lima_transfer(ptrans);
struct lima_resource *res = lima_resource(ptrans->resource);
- lima_transfer_unmap_inner(ctx, ptrans);
+ struct pipe_box box;
+ u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box);
+ lima_transfer_flush_region(pctx, ptrans, &box);
if (trans->staging)
free(trans->staging);
- panfrost_minmax_cache_invalidate(res->index_cache, ptrans);
+ if (ptrans->usage & PIPE_MAP_WRITE) {
+ panfrost_minmax_cache_invalidate(res->index_cache, ptrans->box.x, ptrans->box.width);
+ }
pipe_resource_reference(&ptrans->resource, NULL);
slab_free(&ctx->transfer_pool, trans);
@@ -829,8 +838,8 @@ lima_util_blitter_save_states(struct lima_context *ctx)
util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
util_blitter_save_vertex_elements(ctx->blitter,
ctx->vertex_elements);
- util_blitter_save_vertex_buffer_slot(ctx->blitter,
- ctx->vertex_buffers.vb);
+ util_blitter_save_vertex_buffers(ctx->blitter,
+ ctx->vertex_buffers.vb, ctx->vertex_buffers.count);
util_blitter_save_framebuffer(ctx->blitter, &ctx->framebuffer.base);
@@ -848,7 +857,11 @@ lima_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
struct lima_context *ctx = lima_context(pctx);
struct pipe_blit_info info = *blit_info;
- if (util_try_blit_via_copy_region(pctx, &info)) {
+ if (lima_do_blit(pctx, blit_info)) {
+ return;
+ }
+
+ if (util_try_blit_via_copy_region(pctx, &info, false)) {
return; /* done */
}
@@ -883,7 +896,7 @@ lima_texture_subdata(struct pipe_context *pctx,
const struct pipe_box *box,
const void *data,
unsigned stride,
- unsigned layer_stride)
+ uintptr_t layer_stride)
{
struct lima_context *ctx = lima_context(pctx);
struct lima_resource *res = lima_resource(prsc);
@@ -909,11 +922,41 @@ lima_texture_subdata(struct pipe_context *pctx,
};
lima_flush_job_accessing_bo(ctx, res->bo, true);
- lima_bo_wait(res->bo, LIMA_GEM_WAIT_WRITE, PIPE_TIMEOUT_INFINITE);
+ lima_bo_wait(res->bo, LIMA_GEM_WAIT_WRITE, OS_TIMEOUT_INFINITE);
if (!lima_bo_map(res->bo))
return;
- lima_transfer_unmap_inner(ctx, &t.base);
+ struct pipe_box tbox;
+ u_box_2d(0, 0, t.base.box.width, t.base.box.height, &tbox);
+ lima_transfer_flush_region(pctx, &t.base, &tbox);
+}
+
+static const struct u_transfer_vtbl transfer_vtbl = {
+ .resource_create = lima_resource_create,
+ .resource_destroy = lima_resource_destroy,
+ .transfer_map = lima_transfer_map,
+ .transfer_unmap = lima_transfer_unmap,
+ .transfer_flush_region = lima_transfer_flush_region,
+};
+
+void
+lima_resource_screen_init(struct lima_screen *screen)
+{
+ screen->base.resource_create = lima_resource_create;
+ screen->base.resource_create_with_modifiers = lima_resource_create_with_modifiers;
+ screen->base.resource_from_handle = lima_resource_from_handle;
+ screen->base.resource_destroy = lima_resource_destroy;
+ screen->base.resource_get_handle = lima_resource_get_handle;
+ screen->base.resource_get_param = lima_resource_get_param;
+ screen->base.set_damage_region = lima_resource_set_damage_region;
+ screen->base.transfer_helper = u_transfer_helper_create(&transfer_vtbl,
+ U_TRANSFER_HELPER_MSAA_MAP);
+}
+
+void
+lima_resource_screen_destroy(struct lima_screen *screen)
+{
+ u_transfer_helper_destroy(screen->base.transfer_helper);
}
void
@@ -932,11 +975,11 @@ lima_resource_context_init(struct lima_context *ctx)
ctx->base.blit = lima_blit;
- ctx->base.buffer_map = lima_transfer_map;
- ctx->base.texture_map = lima_transfer_map;
- ctx->base.transfer_flush_region = lima_transfer_flush_region;
- ctx->base.buffer_unmap = lima_transfer_unmap;
- ctx->base.texture_unmap = lima_transfer_unmap;
+ ctx->base.buffer_map = u_transfer_helper_transfer_map;
+ ctx->base.texture_map = u_transfer_helper_transfer_map;
+ ctx->base.transfer_flush_region = u_transfer_helper_transfer_flush_region;
+ ctx->base.buffer_unmap = u_transfer_helper_transfer_unmap;
+ ctx->base.texture_unmap = u_transfer_helper_transfer_unmap;
ctx->base.flush_resource = lima_flush_resource;
}
diff --git a/src/gallium/drivers/lima/lima_resource.h b/src/gallium/drivers/lima/lima_resource.h
index 91443e540b5..300a606f55c 100644
--- a/src/gallium/drivers/lima/lima_resource.h
+++ b/src/gallium/drivers/lima/lima_resource.h
@@ -35,7 +35,6 @@ struct lima_screen;
struct panfrost_minmax_cache;
struct lima_resource_level {
- uint32_t width;
uint32_t stride;
uint32_t offset;
uint32_t layer_stride;
@@ -55,6 +54,7 @@ struct lima_resource {
struct renderonly_scanout *scanout;
struct lima_bo *bo;
struct panfrost_minmax_cache *index_cache;
+ uint32_t mrt_pitch;
bool tiled;
bool modifier_constant;
unsigned full_updates;
@@ -95,6 +95,9 @@ void
lima_resource_screen_init(struct lima_screen *screen);
void
+lima_resource_screen_destroy(struct lima_screen *screen);
+
+void
lima_resource_context_init(struct lima_context *ctx);
#endif
diff --git a/src/gallium/drivers/lima/lima_screen.c b/src/gallium/drivers/lima/lima_screen.c
index 9c52f30de7d..2e779693d0e 100644
--- a/src/gallium/drivers/lima/lima_screen.c
+++ b/src/gallium/drivers/lima/lima_screen.c
@@ -63,6 +63,7 @@ lima_screen_destroy(struct pipe_screen *pscreen)
lima_bo_cache_fini(screen);
lima_bo_table_fini(screen);
disk_cache_destroy(screen->disk_cache);
+ lima_resource_screen_destroy(screen);
ralloc_free(screen);
}
@@ -84,7 +85,7 @@ lima_screen_get_name(struct pipe_screen *pscreen)
static const char *
lima_screen_get_vendor(struct pipe_screen *pscreen)
{
- return "lima";
+ return "Mesa";
}
static const char *
@@ -101,27 +102,25 @@ lima_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
case PIPE_CAP_ACCELERATED:
case PIPE_CAP_UMA:
+ case PIPE_CAP_CLIP_HALFZ:
case PIPE_CAP_NATIVE_FENCE_FD:
case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
case PIPE_CAP_TEXTURE_SWIZZLE:
case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
- return 1;
-
- /* Unimplemented, but for exporting OpenGL 2.0 */
- case PIPE_CAP_OCCLUSION_QUERY:
- case PIPE_CAP_POINT_SPRITE:
+ case PIPE_CAP_TEXTURE_BARRIER:
+ case PIPE_CAP_SURFACE_SAMPLE_COUNT:
return 1;
/* not clear supported */
- case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
- case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ case PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT:
+ case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER:
+ case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
return 1;
- case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
- case PIPE_CAP_TGSI_FS_POINT_IS_SYSVAL:
- case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_FS_POINT_IS_SYSVAL:
+ case PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL:
return 1;
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
@@ -145,7 +144,7 @@ lima_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_PCI_FUNCTION:
return 0;
- case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+ case PIPE_CAP_TEXTURE_TRANSFER_MODES:
case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
@@ -160,6 +159,16 @@ lima_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
return 1;
+ /* Mali4x0 PP doesn't have a swizzle for load_input, so use POT-aligned
+ * varyings to avoid unnecessary movs for vec3 and precision downgrade
+ * in case if this vec3 is coordinates for a sampler
+ */
+ case PIPE_CAP_PREFER_POT_ALIGNED_VARYINGS:
+ return 1;
+
+ case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+ return 1;
+
default:
return u_pipe_screen_get_param_defaults(pscreen, param);
}
@@ -169,10 +178,18 @@ static float
lima_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
{
switch (param) {
+ case PIPE_CAPF_MIN_LINE_WIDTH:
+ case PIPE_CAPF_MIN_LINE_WIDTH_AA:
+ case PIPE_CAPF_MIN_POINT_SIZE:
+ case PIPE_CAPF_MIN_POINT_SIZE_AA:
+ return 1;
+ case PIPE_CAPF_POINT_SIZE_GRANULARITY:
+ case PIPE_CAPF_LINE_WIDTH_GRANULARITY:
+ return 0.1;
case PIPE_CAPF_MAX_LINE_WIDTH:
case PIPE_CAPF_MAX_LINE_WIDTH_AA:
- case PIPE_CAPF_MAX_POINT_WIDTH:
- case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+ case PIPE_CAPF_MAX_POINT_SIZE:
+ case PIPE_CAPF_MAX_POINT_SIZE_AA:
return 100.0f;
case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
return 16.0f;
@@ -206,21 +223,15 @@ get_vertex_shader_param(struct lima_screen *screen,
/* Mali-400 GP provides space for 304 vec4 uniforms, globals and
* temporary variables. */
- case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
return 304 * 4 * sizeof(float);
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
return 1;
- case PIPE_SHADER_CAP_PREFERRED_IR:
- return PIPE_SHADER_IR_NIR;
-
case PIPE_SHADER_CAP_MAX_TEMPS:
return 256; /* need investigate */
- case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
- return 32;
-
default:
return 0;
}
@@ -247,7 +258,7 @@ get_fragment_shader_param(struct lima_screen *screen,
* However, indirect access to an uniform only supports indices up
* to 8192 (a 2048 vec4 array). To prevent indices bigger than that,
* limit max const buffer size to 8192 for now. */
- case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
return 2048 * 4 * sizeof(float);
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
@@ -257,9 +268,6 @@ get_fragment_shader_param(struct lima_screen *screen,
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
return 16; /* need investigate */
- case PIPE_SHADER_CAP_PREFERRED_IR:
- return PIPE_SHADER_IR_NIR;
-
case PIPE_SHADER_CAP_MAX_TEMPS:
return 256; /* need investigate */
@@ -271,9 +279,6 @@ get_fragment_shader_param(struct lima_screen *screen,
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
return 0;
- case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
- return 32;
-
default:
return 0;
}
@@ -309,6 +314,7 @@ lima_screen_is_format_supported(struct pipe_screen *pscreen,
case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_3D:
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_CUBE:
break;
@@ -319,7 +325,7 @@ lima_screen_is_format_supported(struct pipe_screen *pscreen,
if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
return false;
- /* be able to support 16, now limit to 4 */
+ /* Utgard supports 16x, but for now limit it to 4x */
if (sample_count > 1 && sample_count != 4)
return false;
@@ -583,7 +589,9 @@ static const struct debug_named_value lima_debug_options[] = {
"Precompile shaders for shader-db" },
{ "diskcache", LIMA_DEBUG_DISK_CACHE,
"print debug info for shader disk cache" },
- { NULL }
+ { "noblit", LIMA_DEBUG_NO_BLIT,
+ "use generic u_blitter instead of lima-specific" },
+ DEBUG_NAMED_VALUE_END
};
DEBUG_GET_ONCE_FLAGS_OPTION(lima_debug, "LIMA_DEBUG", lima_debug_options, 0)
@@ -633,8 +641,16 @@ lima_get_disk_shader_cache (struct pipe_screen *pscreen)
return screen->disk_cache;
}
+static int
+lima_screen_get_fd(struct pipe_screen *pscreen)
+{
+ struct lima_screen *screen = lima_screen(pscreen);
+ return screen->fd;
+}
+
struct pipe_screen *
-lima_screen_create(int fd, struct renderonly *ro)
+lima_screen_create(int fd, const struct pipe_screen_config *config,
+ struct renderonly *ro)
{
uint64_t system_memory;
struct lima_screen *screen;
@@ -676,17 +692,15 @@ lima_screen_create(int fd, struct renderonly *ro)
screen->pp_buffer->cacheable = false;
/* fs program for clear buffer?
- * const0 1 0 0 -1.67773, mov.v0 $0 ^const0.xxxx, stop
*/
static const uint32_t pp_clear_program[] = {
- 0x00020425, 0x0000000c, 0x01e007cf, 0xb0000000,
- 0x000005f5, 0x00000000, 0x00000000, 0x00000000,
+ PP_CLEAR_PROGRAM
};
memcpy(lima_bo_map(screen->pp_buffer) + pp_clear_program_offset,
pp_clear_program, sizeof(pp_clear_program));
/* copy texture to framebuffer, used to reload gpu tile buffer
- * load.v $1 0.xy, texld_2d 0, mov.v0 $0 ^tex_sampler, sync, stop
+ * load.v $1 0.xy, texld 0, mov.v0 $0 ^tex_sampler, sync, stop
*/
static const uint32_t pp_reload_program[] = {
0x000005e6, 0xf1003c20, 0x00000000, 0x39001000,
@@ -717,6 +731,7 @@ lima_screen_create(int fd, struct renderonly *ro)
pp_frame_rsw[13] = 0x00000100;
screen->base.destroy = lima_screen_destroy;
+ screen->base.get_screen_fd = lima_screen_get_fd;
screen->base.get_name = lima_screen_get_name;
screen->base.get_vendor = lima_screen_get_vendor;
screen->base.get_device_vendor = lima_screen_get_device_vendor;
@@ -736,8 +751,6 @@ lima_screen_create(int fd, struct renderonly *ro)
slab_create_parent(&screen->transfer_pool, sizeof(struct lima_transfer), 16);
- screen->refcnt = 1;
-
return &screen->base;
err_out2:
diff --git a/src/gallium/drivers/lima/lima_screen.h b/src/gallium/drivers/lima/lima_screen.h
index bc08a490236..5f76edd551a 100644
--- a/src/gallium/drivers/lima/lima_screen.h
+++ b/src/gallium/drivers/lima/lima_screen.h
@@ -30,7 +30,7 @@
#include "util/slab.h"
#include "util/list.h"
#include "util/disk_cache.h"
-#include "os/os_thread.h"
+#include "util/u_thread.h"
#include "pipe/p_screen.h"
@@ -45,6 +45,7 @@
#define LIMA_DEBUG_SINGLE_JOB (1 << 8)
#define LIMA_DEBUG_PRECOMPILE (1 << 9)
#define LIMA_DEBUG_DISK_CACHE (1 << 10)
+#define LIMA_DEBUG_NO_BLIT (1 << 11)
extern uint32_t lima_debug;
extern int lima_ctx_num_plb;
@@ -59,13 +60,15 @@ struct ra_regs;
#define NR_BO_CACHE_BUCKETS (MAX_BO_CACHE_BUCKET - MIN_BO_CACHE_BUCKET + 1)
+/* const0 1 0 0 -1.67773, mov.v0 $0 ^const0.xxxx, stop */
+#define PP_CLEAR_PROGRAM \
+ 0x00020425, 0x0000000c, 0x01e007cf, 0xb0000000, \
+ 0x000005f5, 0x00000000, 0x00000000, 0x00000000, \
+
struct lima_screen {
struct pipe_screen base;
struct renderonly *ro;
- int refcnt;
- void *winsys_priv;
-
int fd;
int gpu_type;
int num_pp;
@@ -103,6 +106,7 @@ lima_screen(struct pipe_screen *pscreen)
}
struct pipe_screen *
-lima_screen_create(int fd, struct renderonly *ro);
+lima_screen_create(int fd, const struct pipe_screen_config *config,
+ struct renderonly *ro);
#endif
diff --git a/src/gallium/drivers/lima/lima_state.c b/src/gallium/drivers/lima/lima_state.c
index eafe772554e..198c2982534 100644
--- a/src/gallium/drivers/lima/lima_state.c
+++ b/src/gallium/drivers/lima/lima_state.c
@@ -29,6 +29,7 @@
#include "util/u_helpers.h"
#include "util/u_debug.h"
#include "util/u_framebuffer.h"
+#include "util/u_viewport.h"
#include "pipe/p_state.h"
@@ -185,18 +186,14 @@ lima_delete_vertex_elements_state(struct pipe_context *pctx, void *hwcso)
static void
lima_set_vertex_buffers(struct pipe_context *pctx,
- unsigned start_slot, unsigned count,
- unsigned unbind_num_trailing_slots,
- bool take_ownership,
+ unsigned count,
const struct pipe_vertex_buffer *vb)
{
struct lima_context *ctx = lima_context(pctx);
struct lima_context_vertex_buffer *so = &ctx->vertex_buffers;
util_set_vertex_buffers_mask(so->vb, &so->enabled_mask,
- vb, start_slot, count,
- unbind_num_trailing_slots,
- take_ownership);
+ vb, count, true);
so->count = util_last_bit(so->enabled_mask);
ctx->dirty |= LIMA_CONTEXT_DIRTY_VERTEX_BUFF;
@@ -211,18 +208,22 @@ lima_set_viewport_states(struct pipe_context *pctx,
struct lima_context *ctx = lima_context(pctx);
/* reverse calculate the parameter of glViewport */
- ctx->viewport.left = viewport->translate[0] - fabsf(viewport->scale[0]);
- ctx->viewport.right = viewport->translate[0] + fabsf(viewport->scale[0]);
- ctx->viewport.bottom = viewport->translate[1] - fabsf(viewport->scale[1]);
- ctx->viewport.top = viewport->translate[1] + fabsf(viewport->scale[1]);
+ ctx->viewport.left = ctx->ext_viewport.left =
+ viewport->translate[0] - fabsf(viewport->scale[0]);
+ ctx->viewport.right = ctx->ext_viewport.right =
+ viewport->translate[0] + fabsf(viewport->scale[0]);
+ ctx->viewport.bottom = ctx->ext_viewport.bottom =
+ viewport->translate[1] - fabsf(viewport->scale[1]);
+ ctx->viewport.top = ctx->ext_viewport.top =
+ viewport->translate[1] + fabsf(viewport->scale[1]);
/* reverse calculate the parameter of glDepthRange */
float near, far;
- near = viewport->translate[2] - viewport->scale[2];
- far = viewport->translate[2] + viewport->scale[2];
+ bool halfz = ctx->rasterizer && ctx->rasterizer->base.clip_halfz;
+ util_viewport_zmin_zmax(viewport, halfz, &near, &far);
- ctx->viewport.near = MIN2(near, far);
- ctx->viewport.far = MAX2(near, far);
+ ctx->viewport.near = ctx->rasterizer && ctx->rasterizer->base.depth_clip_near ? near : 0.0f;
+ ctx->viewport.far = ctx->rasterizer && ctx->rasterizer->base.depth_clip_far ? far : 1.0f;
ctx->viewport.transform = *viewport;
ctx->dirty |= LIMA_CONTEXT_DIRTY_VIEWPORT;
@@ -415,6 +416,9 @@ static void
lima_set_sample_mask(struct pipe_context *pctx,
unsigned sample_mask)
{
+ struct lima_context *ctx = lima_context(pctx);
+ ctx->sample_mask = sample_mask & ((1 << LIMA_MAX_SAMPLES) - 1);
+ ctx->dirty |= LIMA_CONTEXT_DIRTY_SAMPLE_MASK;
}
void
@@ -464,8 +468,5 @@ lima_state_fini(struct lima_context *ctx)
struct lima_context_vertex_buffer *so = &ctx->vertex_buffers;
util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, NULL,
- 0, 0, ARRAY_SIZE(so->vb), false);
-
- pipe_surface_reference(&ctx->framebuffer.base.cbufs[0], NULL);
- pipe_surface_reference(&ctx->framebuffer.base.zsbuf, NULL);
+ 0, false);
}
diff --git a/src/gallium/drivers/lima/lima_texture.c b/src/gallium/drivers/lima/lima_texture.c
index 7079865a3b5..90413eb22ec 100644
--- a/src/gallium/drivers/lima/lima_texture.c
+++ b/src/gallium/drivers/lima/lima_texture.c
@@ -23,6 +23,7 @@
*
*/
+#include "util/compiler.h"
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
#include "util/u_math.h"
@@ -70,23 +71,26 @@ lima_texture_desc_set_va(lima_tex_desc *desc,
void
lima_texture_desc_set_res(struct lima_context *ctx, lima_tex_desc *desc,
struct pipe_resource *prsc,
- unsigned first_level, unsigned last_level, unsigned first_layer)
+ unsigned first_level, unsigned last_level,
+ unsigned first_layer, unsigned mrt_idx)
{
- unsigned width, height, layout, i;
+ unsigned width, height, depth, layout, i;
struct lima_resource *lima_res = lima_resource(prsc);
width = prsc->width0;
height = prsc->height0;
+ depth = prsc->depth0;
if (first_level != 0) {
width = u_minify(width, first_level);
height = u_minify(height, first_level);
+ depth = u_minify(depth, first_level);
}
desc->format = lima_format_get_texel(prsc->format);
desc->swap_r_b = lima_format_get_texel_swap_rb(prsc->format);
desc->width = width;
desc->height = height;
- desc->unknown_3_1 = 1;
+ desc->depth = depth;
if (lima_res->tiled)
layout = 3;
@@ -99,7 +103,9 @@ lima_texture_desc_set_res(struct lima_context *ctx, lima_tex_desc *desc,
uint32_t base_va = lima_res->bo->va;
/* attach first level */
- uint32_t first_va = base_va + lima_res->levels[first_level].offset + first_layer * lima_res->levels[first_level].layer_stride;
+ uint32_t first_va = base_va + lima_res->levels[first_level].offset +
+ first_layer * lima_res->levels[first_level].layer_stride +
+ mrt_idx * lima_res->mrt_pitch;
desc->va_s.va_0 = first_va >> 6;
desc->va_s.layout = layout;
@@ -112,6 +118,37 @@ lima_texture_desc_set_res(struct lima_context *ctx, lima_tex_desc *desc,
}
}
+static unsigned
+pipe_wrap_to_lima(unsigned pipe_wrap, bool using_nearest)
+{
+ switch (pipe_wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return LIMA_TEX_WRAP_REPEAT;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return LIMA_TEX_WRAP_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_CLAMP:
+ if (using_nearest)
+ return LIMA_TEX_WRAP_CLAMP_TO_EDGE;
+ else
+ return LIMA_TEX_WRAP_CLAMP;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return LIMA_TEX_WRAP_CLAMP_TO_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return LIMA_TEX_WRAP_MIRROR_REPEAT;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ return LIMA_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ if (using_nearest)
+ return LIMA_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
+ else
+ return LIMA_TEX_WRAP_MIRROR_CLAMP;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return LIMA_TEX_WRAP_MIRROR_CLAMP_TO_BORDER;
+ default:
+ return LIMA_TEX_WRAP_REPEAT;
+ }
+}
+
static void
lima_update_tex_desc(struct lima_context *ctx, struct lima_sampler_state *sampler,
struct lima_sampler_view *texture, void *pdesc,
@@ -127,19 +164,28 @@ lima_update_tex_desc(struct lima_context *ctx, struct lima_sampler_state *sample
memset(desc, 0, desc_size);
+ if (!texture)
+ return;
+
switch (texture->base.target) {
+ case PIPE_TEXTURE_1D:
+ desc->sampler_dim = LIMA_SAMPLER_DIM_1D;
+ break;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
- desc->texture_type = LIMA_TEXTURE_TYPE_2D;
+ desc->sampler_dim = LIMA_SAMPLER_DIM_2D;
break;
case PIPE_TEXTURE_CUBE:
- desc->texture_type = LIMA_TEXTURE_TYPE_CUBE;
+ desc->cube_map = 1;
+ FALLTHROUGH;
+ case PIPE_TEXTURE_3D:
+ desc->sampler_dim = LIMA_SAMPLER_DIM_3D;
break;
default:
break;
}
- if (!sampler->base.normalized_coords)
+ if (sampler->base.unnormalized_coords)
desc->unnorm_coords = 1;
first_level = texture->base.u.tex.first_level;
@@ -190,39 +236,19 @@ lima_update_tex_desc(struct lima_context *ctx, struct lima_sampler_state *sample
break;
}
- /* Only clamp, clamp to edge, repeat and mirror repeat are supported */
- switch (sampler->base.wrap_s) {
- case PIPE_TEX_WRAP_CLAMP:
- desc->wrap_s_clamp = 1;
- break;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- desc->wrap_s_clamp_to_edge = 1;
- break;
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- desc->wrap_s_mirror_repeat = 1;
- break;
- case PIPE_TEX_WRAP_REPEAT:
- default:
- break;
- }
+ /* Panfrost mentions that GL_CLAMP is broken for NEAREST filter on Midgard,
+ * looks like it also broken on Utgard, since it fails in piglit
+ */
+ bool using_nearest = sampler->base.min_img_filter == PIPE_TEX_FILTER_NEAREST;
- /* Only clamp, clamp to edge, repeat and mirror repeat are supported */
- switch (sampler->base.wrap_t) {
- case PIPE_TEX_WRAP_CLAMP:
- desc->wrap_t_clamp = 1;
- break;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- desc->wrap_t_clamp_to_edge = 1;
- break;
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- desc->wrap_t_mirror_repeat = 1;
- break;
- case PIPE_TEX_WRAP_REPEAT:
- default:
- break;
- }
+ desc->wrap_s = pipe_wrap_to_lima(sampler->base.wrap_s, using_nearest);
+ desc->wrap_t = pipe_wrap_to_lima(sampler->base.wrap_t, using_nearest);
+ desc->wrap_r = pipe_wrap_to_lima(sampler->base.wrap_r, using_nearest);
+
+ desc->border_red = float_to_ushort(sampler->base.border_color.f[0]);
+ desc->border_green = float_to_ushort(sampler->base.border_color.f[1]);
+ desc->border_blue = float_to_ushort(sampler->base.border_color.f[2]);
+ desc->border_alpha = float_to_ushort(sampler->base.border_color.f[3]);
if (desc->min_img_filter_nearest && desc->mag_img_filter_nearest &&
desc->min_mipfilter_2 == 0 &&
@@ -232,7 +258,7 @@ lima_update_tex_desc(struct lima_context *ctx, struct lima_sampler_state *sample
desc->lod_bias += lod_bias_delta;
lima_texture_desc_set_res(ctx, desc, texture->base.texture,
- first_level, last_level, first_layer);
+ first_level, last_level, first_layer, 0);
}
static unsigned
@@ -240,6 +266,10 @@ lima_calc_tex_desc_size(struct lima_sampler_view *texture)
{
unsigned size = offsetof(lima_tex_desc, va);
unsigned va_bit_size;
+
+ if (!texture)
+ return lima_min_tex_desc_size;
+
unsigned first_level = texture->base.u.tex.first_level;
unsigned last_level = texture->base.u.tex.last_level;
@@ -268,6 +298,8 @@ lima_update_textures(struct lima_context *ctx)
/* we always need to add texture bo to job */
for (int i = 0; i < lima_tex->num_samplers; i++) {
struct lima_sampler_view *texture = lima_sampler_view(lima_tex->textures[i]);
+ if (!texture)
+ continue;
struct lima_resource *rsc = lima_resource(texture->base.texture);
lima_flush_previous_job_writing_resource(ctx, texture->base.texture);
lima_job_add_bo(job, LIMA_PIPE_PP, rsc->bo, LIMA_SUBMIT_BO_READ);
diff --git a/src/gallium/drivers/lima/lima_texture.h b/src/gallium/drivers/lima/lima_texture.h
index 08a961ba4ae..18cc9c7050a 100644
--- a/src/gallium/drivers/lima/lima_texture.h
+++ b/src/gallium/drivers/lima/lima_texture.h
@@ -27,8 +27,18 @@
#define lima_min_tex_desc_size 64
-#define LIMA_TEXTURE_TYPE_2D 2
-#define LIMA_TEXTURE_TYPE_CUBE 5
+#define LIMA_SAMPLER_DIM_1D 0
+#define LIMA_SAMPLER_DIM_2D 1
+#define LIMA_SAMPLER_DIM_3D 2
+
+#define LIMA_TEX_WRAP_REPEAT 0
+#define LIMA_TEX_WRAP_CLAMP_TO_EDGE 1
+#define LIMA_TEX_WRAP_CLAMP 2
+#define LIMA_TEX_WRAP_CLAMP_TO_BORDER 3
+#define LIMA_TEX_WRAP_MIRROR_REPEAT 4
+#define LIMA_TEX_WRAP_MIRROR_CLAMP_TO_EDGE 5
+#define LIMA_TEX_WRAP_MIRROR_CLAMP 6
+#define LIMA_TEX_WRAP_MIRROR_CLAMP_TO_BORDER 7
typedef struct __attribute__((__packed__)) {
/* Word 0 */
@@ -43,7 +53,8 @@ typedef struct __attribute__((__packed__)) {
uint32_t unknown_1_1: 7;
uint32_t unnorm_coords: 1;
uint32_t unknown_1_2: 1;
- uint32_t texture_type: 3;
+ uint32_t cube_map: 1;
+ uint32_t sampler_dim: 2;
uint32_t min_lod: 8; /* Fixed point, 4.4, unsigned */
uint32_t max_lod: 8; /* Fixed point, 4.4, unsigned */
uint32_t lod_bias: 9; /* Fixed point, signed, 1.4.4 */
@@ -52,23 +63,20 @@ typedef struct __attribute__((__packed__)) {
uint32_t min_mipfilter_2: 2; /* 0x3 for linear, 0x0 for nearest */
uint32_t min_img_filter_nearest: 1;
uint32_t mag_img_filter_nearest: 1;
- uint32_t wrap_s_clamp_to_edge: 1;
- uint32_t wrap_s_clamp: 1;
- uint32_t wrap_s_mirror_repeat: 1;
- uint32_t wrap_t_clamp_to_edge: 1;
- uint32_t wrap_t_clamp: 1;
- uint32_t wrap_t_mirror_repeat: 1;
- uint32_t unknown_2_2: 3;
+ uint32_t wrap_s: 3;
+ uint32_t wrap_t: 3;
+ uint32_t wrap_r: 3;
uint32_t width: 13;
uint32_t height: 13;
- uint32_t unknown_3_1: 1;
- uint32_t unknown_3_2: 15;
+ uint32_t depth: 13;
- /* Word 4 */
- uint32_t unknown_4;
+ uint32_t border_red: 16;
+ uint32_t border_green: 16;
+ uint32_t border_blue: 16;
+ uint32_t border_alpha: 16;
- /* Word 5 */
- uint32_t unknown_5;
+ /* Word 5 (last 3 bits) */
+ uint32_t unknown_5_1: 3;
/* Word 6-15 */
/* layout is in va[0] bit 13-14 */
@@ -93,7 +101,7 @@ typedef struct __attribute__((__packed__)) {
void lima_texture_desc_set_res(struct lima_context *ctx, lima_tex_desc *desc,
struct pipe_resource *prsc,
unsigned first_level, unsigned last_level,
- unsigned first_layer);
+ unsigned first_layer, unsigned mrt_idx);
void lima_update_textures(struct lima_context *ctx);
diff --git a/src/gallium/drivers/lima/lima_util.c b/src/gallium/drivers/lima/lima_util.c
index dca9307c991..1587ac18f5c 100644
--- a/src/gallium/drivers/lima/lima_util.c
+++ b/src/gallium/drivers/lima/lima_util.c
@@ -29,6 +29,8 @@
#include "util/u_debug.h"
#include "util/u_memory.h"
+#include "util/box.h"
+#include "pipe/p_state.h"
#include "lima_util.h"
#include "lima_parser.h"
@@ -39,24 +41,6 @@ struct lima_dump {
int id;
};
-bool lima_get_absolute_timeout(uint64_t *timeout)
-{
- struct timespec current;
- uint64_t current_ns;
-
- if (*timeout == PIPE_TIMEOUT_INFINITE)
- return true;
-
- if (clock_gettime(CLOCK_MONOTONIC, &current))
- return false;
-
- current_ns = ((uint64_t)current.tv_sec) * 1000000000ull;
- current_ns += current.tv_nsec;
- *timeout += current_ns;
-
- return true;
-}
-
static void
lima_dump_blob(FILE *fp, void *data, int size, bool is_float)
{
@@ -79,6 +63,13 @@ lima_dump_blob(FILE *fp, void *data, int size, bool is_float)
}
void
+lima_dump_shader(struct lima_dump *dump, void *data, int size, bool is_frag)
+{
+ if (dump)
+ lima_parse_shader(dump->fp, (uint32_t *)data, size, is_frag);
+}
+
+void
lima_dump_vs_command_stream_print(struct lima_dump *dump, void *data,
int size, uint32_t start)
{
@@ -175,3 +166,14 @@ _lima_dump_command_stream_print(struct lima_dump *dump, void *data,
lima_dump_blob(dump->fp, data, size, is_float);
}
+
+void
+lima_damage_rect_union(struct pipe_scissor_state *rect,
+ unsigned minx, unsigned maxx,
+ unsigned miny, unsigned maxy)
+{
+ rect->minx = MIN2(rect->minx, minx);
+ rect->miny = MIN2(rect->miny, miny);
+ rect->maxx = MAX2(rect->maxx, maxx);
+ rect->maxy = MAX2(rect->maxy, maxy);
+}
diff --git a/src/gallium/drivers/lima/lima_util.h b/src/gallium/drivers/lima/lima_util.h
index 3749523f3a1..56b441ecaa9 100644
--- a/src/gallium/drivers/lima/lima_util.h
+++ b/src/gallium/drivers/lima/lima_util.h
@@ -31,12 +31,11 @@
struct lima_dump;
-bool lima_get_absolute_timeout(uint64_t *timeout);
-
struct lima_dump *lima_dump_create(void);
struct lima_dump *lima_dump_next(struct lima_dump *dump);
void lima_dump_free(struct lima_dump *dump);
+void lima_dump_shader(struct lima_dump *dump, void *data, int size, bool is_frag);
void lima_dump_vs_command_stream_print(struct lima_dump *dump, void *data,
int size, uint32_t start);
void lima_dump_plbu_command_stream_print(struct lima_dump *dump, void *data,
@@ -54,4 +53,9 @@ void _lima_dump_command_stream_print(struct lima_dump *dump, void *data,
_lima_dump_command_stream_print(dump, __VA_ARGS__); \
} while (0)
+struct pipe_scissor_state;
+
+void lima_damage_rect_union(struct pipe_scissor_state *rect,
+ unsigned minx, unsigned maxx,
+ unsigned miny, unsigned maxy);
#endif
diff --git a/src/gallium/drivers/lima/meson.build b/src/gallium/drivers/lima/meson.build
index 5bc6fbbf869..95a5094cc39 100644
--- a/src/gallium/drivers/lima/meson.build
+++ b/src/gallium/drivers/lima/meson.build
@@ -51,6 +51,7 @@ files_lima = files(
'ir/lima_nir_lower_uniform_to_scalar.c',
'ir/lima_nir_split_load_input.c',
'ir/lima_nir_split_loads.c',
+ 'ir/lima_nir_lower_txp.c',
'ir/lima_ir.h',
@@ -81,6 +82,8 @@ files_lima = files(
'lima_format.h',
'lima_format.c',
'lima_gpu.h',
+ 'lima_blit.c',
+ 'lima_blit.h',
)
lima_nir_algebraic_c = custom_target(
@@ -88,11 +91,10 @@ lima_nir_algebraic_c = custom_target(
input : 'ir/lima_nir_algebraic.py',
output : 'lima_nir_algebraic.c',
command : [
- prog_python, '@INPUT@',
- '-p', join_paths(meson.source_root(), 'src/compiler/nir/'),
+ prog_python, '@INPUT@', '-p', dir_compiler_nir,
],
capture : true,
- depend_files : nir_algebraic_py,
+ depend_files : nir_algebraic_depends,
)
liblima = static_library(
@@ -119,7 +121,7 @@ lima_compiler = executable(
'standalone/glsl.cpp'
),
include_directories : [
- inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_gallium_drivers, inc_mesa, inc_mapi, inc_compiler,
+ inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_gallium_drivers, inc_mesa, inc_mapi,
],
dependencies : [
idep_nir,
@@ -141,7 +143,7 @@ lima_disasm = executable(
'standalone/lima_disasm.c',
),
include_directories : [
- inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_gallium_drivers, inc_mesa, inc_mapi, inc_compiler,
+ inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_gallium_drivers, inc_mesa, inc_mapi,
],
dependencies : [
idep_mesautil,
diff --git a/src/gallium/drivers/lima/standalone/glsl.cpp b/src/gallium/drivers/lima/standalone/glsl.cpp
index 3cef68277ce..7b929e9b074 100644
--- a/src/gallium/drivers/lima/standalone/glsl.cpp
+++ b/src/gallium/drivers/lima/standalone/glsl.cpp
@@ -37,5 +37,5 @@ lima_do_glsl_optimizations(struct exec_list *ir)
int
st_glsl_type_size(const glsl_type *type, bool bindless)
{
- return type->count_attribute_slots(false);
+ return glsl_count_attribute_slots(type, false);
}
diff --git a/src/gallium/drivers/lima/standalone/lima_compiler_cmdline.c b/src/gallium/drivers/lima/standalone/lima_compiler_cmdline.c
index f3e1fa68397..3e5971fe4ce 100644
--- a/src/gallium/drivers/lima/standalone/lima_compiler_cmdline.c
+++ b/src/gallium/drivers/lima/standalone/lima_compiler_cmdline.c
@@ -28,10 +28,10 @@
#include "main/mtypes.h"
+#include "compiler/glsl_types.h"
#include "compiler/glsl/standalone.h"
#include "compiler/glsl/glsl_to_nir.h"
#include "compiler/glsl/gl_nir.h"
-#include "compiler/nir_types.h"
#include "lima_context.h"
#include "lima_program.h"
@@ -112,7 +112,9 @@ load_glsl(unsigned num_files, char* const* files, gl_shader_stage stage)
lima_do_glsl_optimizations(prog->_LinkedShaders[stage]->ir);
- nir_shader *nir = glsl_to_nir(&local_ctx, prog, stage, nir_options);
+ nir_shader *nir = glsl_to_nir(&local_ctx.Const, prog, stage, nir_options);
+
+ gl_nir_inline_functions(nir);
/* required NIR passes: */
if (nir_options->lower_all_io_to_temps ||
@@ -135,7 +137,7 @@ load_glsl(unsigned num_files, char* const* files, gl_shader_stage stage)
NIR_PASS_V(nir, nir_lower_var_copies);
nir_print_shader(nir, stdout);
NIR_PASS_V(nir, gl_nir_lower_atomics, prog, true);
- NIR_PASS_V(nir, nir_lower_atomics_to_ssbo);
+ NIR_PASS_V(nir, nir_lower_atomics_to_ssbo, 0);
nir_print_shader(nir, stdout);
switch (stage) {
@@ -214,6 +216,7 @@ main(int argc, char **argv)
struct nir_lower_tex_options tex_options = {
.lower_txp = ~0u,
+ .lower_invalid_implicit_lod = true,
};
nir_shader *nir = load_glsl(1, filename, stage);
diff --git a/src/gallium/drivers/lima/standalone/lima_disasm.c b/src/gallium/drivers/lima/standalone/lima_disasm.c
index ee4460d5fc2..9c8278cddd9 100644
--- a/src/gallium/drivers/lima/standalone/lima_disasm.c
+++ b/src/gallium/drivers/lima/standalone/lima_disasm.c
@@ -166,7 +166,7 @@ main(int argc, char **argv)
}
char *filename = NULL;
- filename = argv[n];
+ filename = argv[argc - 1];
uint32_t size = 0;
uint32_t *prog = extract_shader_binary(filename, &size, &is_frag);
@@ -183,13 +183,13 @@ main(int argc, char **argv)
do {
ppir_codegen_ctrl *ctrl = (ppir_codegen_ctrl *)bin;
printf("@%6d: ", offset);
- ppir_disassemble_instr(bin, offset);
+ ppir_disassemble_instr(bin, offset, stdout);
bin += ctrl->count;
offset += ctrl->count;
size -= ctrl->count;
} while (size);
} else {
- gpir_disassemble_program((gpir_codegen_instr *)prog, size / (sizeof(gpir_codegen_instr)));
+ gpir_disassemble_program((gpir_codegen_instr *)prog, size / (sizeof(gpir_codegen_instr)), stdout);
}
ralloc_free(prog);