summaryrefslogtreecommitdiff
path: root/src/mesa
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2009-08-18 12:20:36 -0700
committerIan Romanick <ian.d.romanick@intel.com>2009-08-18 12:20:36 -0700
commita512985fd81c1ed4ccc5e69aaa05015cf7ff844d (patch)
tree69e6e898deaeaed2b4dfb5851707c68261c464de /src/mesa
parent0b5af41c6fae2809f4567a7cecbd207e5e4f3ab5 (diff)
parentc80bc3abcd3939e5e2d45aea4b01ff22bfec244b (diff)
Merge branch 'master' into asm-shader-rework-1
Conflicts: src/mesa/shader/arbprogparse.c
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/common/meta.c1249
-rw-r--r--src/mesa/drivers/common/meta.h80
-rw-r--r--src/mesa/drivers/dri/common/.gitignore1
-rw-r--r--src/mesa/drivers/dri/common/dri_metaops.c7
-rw-r--r--src/mesa/drivers/dri/common/dri_util.c2
-rw-r--r--src/mesa/drivers/dri/common/extension_helper.h44
-rw-r--r--src/mesa/drivers/dri/fb/fb_egl.c4
-rw-r--r--src/mesa/drivers/dri/i915/i830_context.h8
-rw-r--r--src/mesa/drivers/dri/i915/i830_reg.h3
-rw-r--r--src/mesa/drivers/dri/i915/i830_state.c32
-rw-r--r--src/mesa/drivers/dri/i915/i830_vtbl.c27
-rw-r--r--src/mesa/drivers/dri/i915/i915_context.c4
-rw-r--r--src/mesa/drivers/dri/i915/i915_context.h12
-rw-r--r--src/mesa/drivers/dri/i915/i915_reg.h4
-rw-r--r--src/mesa/drivers/dri/i915/i915_state.c186
-rw-r--r--src/mesa/drivers/dri/i915/i915_vtbl.c24
-rw-r--r--src/mesa/drivers/dri/i965/Makefile1
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_line.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_point.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_tri.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_unfilled.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_util.c48
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h19
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c903
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c55
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw_upload.c141
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c30
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_emit.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_state.c21
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_structs.h15
-rw-r--r--src/mesa/drivers/dri/i965/brw_tex_layout.c13
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c73
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c48
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_fp.c112
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c156
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_iz.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass0.c33
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_sampler_state.c44
-rw-r--r--src/mesa/drivers/dri/intel/intel_batchbuffer.c10
-rw-r--r--src/mesa/drivers/dri/intel/intel_blit.c3
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c31
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.h9
-rw-r--r--src/mesa/drivers/dri/intel/intel_extensions.c6
-rw-r--r--src/mesa/drivers/dri/intel/intel_fbo.c71
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel_copy.c167
-rw-r--r--src/mesa/drivers/dri/intel/intel_regions.c21
-rw-r--r--src/mesa/drivers/dri/intel/intel_screen.c12
-rw-r--r--src/mesa/drivers/dri/intel/intel_span.c4
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_subimage.c65
-rw-r--r--src/mesa/drivers/dri/r200/.gitignore15
-rw-r--r--src/mesa/drivers/dri/r200/Makefile48
-rw-r--r--src/mesa/drivers/dri/r200/r200_cmdbuf.c48
-rw-r--r--src/mesa/drivers/dri/r200/r200_context.c12
-rw-r--r--src/mesa/drivers/dri/r200/r200_state_init.c10
l---------src/mesa/drivers/dri/r200/radeon_bo_legacy.c1
l---------src/mesa/drivers/dri/r200/radeon_bo_legacy.h1
l---------src/mesa/drivers/dri/r200/radeon_bocs_wrapper.h1
l---------src/mesa/drivers/dri/r200/radeon_chipset.h1
l---------src/mesa/drivers/dri/r200/radeon_cmdbuf.h1
l---------src/mesa/drivers/dri/r200/radeon_common.c1
l---------src/mesa/drivers/dri/r200/radeon_common.h1
l---------src/mesa/drivers/dri/r200/radeon_common_context.c1
l---------src/mesa/drivers/dri/r200/radeon_common_context.h1
l---------src/mesa/drivers/dri/r200/radeon_cs_legacy.c1
l---------src/mesa/drivers/dri/r200/radeon_cs_legacy.h1
l---------src/mesa/drivers/dri/r200/radeon_cs_space_drm.c1
l---------src/mesa/drivers/dri/r200/radeon_dma.c1
l---------src/mesa/drivers/dri/r200/radeon_dma.h1
l---------src/mesa/drivers/dri/r200/radeon_fbo.c1
l---------src/mesa/drivers/dri/r200/radeon_lock.c1
l---------src/mesa/drivers/dri/r200/radeon_lock.h1
l---------src/mesa/drivers/dri/r200/radeon_mipmap_tree.c1
l---------src/mesa/drivers/dri/r200/radeon_mipmap_tree.h1
l---------src/mesa/drivers/dri/r200/radeon_screen.c1
l---------src/mesa/drivers/dri/r200/radeon_screen.h1
l---------src/mesa/drivers/dri/r200/radeon_span.c1
l---------src/mesa/drivers/dri/r200/radeon_span.h1
l---------src/mesa/drivers/dri/r200/radeon_texture.c1
l---------src/mesa/drivers/dri/r200/radeon_texture.h1
l---------src/mesa/drivers/dri/r200/server/radeon.h1
l---------src/mesa/drivers/dri/r200/server/radeon_dri.c1
l---------src/mesa/drivers/dri/r200/server/radeon_dri.h1
l---------src/mesa/drivers/dri/r200/server/radeon_egl.c1
l---------src/mesa/drivers/dri/r200/server/radeon_macros.h1
l---------src/mesa/drivers/dri/r200/server/radeon_reg.h1
-rw-r--r--src/mesa/drivers/dri/r300/.gitignore15
-rw-r--r--src/mesa/drivers/dri/r300/Makefile63
-rw-r--r--src/mesa/drivers/dri/r300/compiler/Makefile75
-rw-r--r--src/mesa/drivers/dri/r300/compiler/memory_pool.c95
-rw-r--r--src/mesa/drivers/dri/r300/compiler/memory_pool.h49
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog.c416
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog.h49
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c (renamed from src/mesa/drivers/dri/r300/r300_fragprog_emit.c)210
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c (renamed from src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c)20
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h (renamed from src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h)0
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c149
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c615
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c177
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c (renamed from src/mesa/drivers/dri/r300/r500_fragprog.c)249
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.h (renamed from src/mesa/drivers/dri/r300/r500_fragprog.h)11
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c (renamed from src/mesa/drivers/dri/r300/r500_fragprog_emit.c)82
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.c170
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h206
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.c262
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h108
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c (renamed from src/mesa/drivers/dri/r300/radeon_nqssadce.c)141
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h (renamed from src/mesa/drivers/dri/r300/radeon_nqssadce.h)12
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.c181
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.h (renamed from src/mesa/drivers/dri/r300/radeon_program.h)44
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c (renamed from src/mesa/drivers/dri/r300/radeon_program_alu.c)460
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h (renamed from src/mesa/drivers/dri/r300/radeon_program_alu.h)21
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c (renamed from src/mesa/drivers/dri/r300/radeon_program_pair.c)314
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h (renamed from src/mesa/drivers/dri/r300/radeon_program_pair.h)37
-rw-r--r--src/mesa/drivers/dri/r300/r300_cmdbuf.c81
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.c41
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.h196
-rw-r--r--src/mesa/drivers/dri/r300/r300_draw.c436
-rw-r--r--src/mesa/drivers/dri/r300/r300_emit.c35
-rw-r--r--src/mesa/drivers/dri/r300/r300_emit.h4
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog.c451
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog.h111
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog_common.c364
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog_common.h4
-rw-r--r--src/mesa/drivers/dri/r300/r300_ioctl.c98
-rw-r--r--src/mesa/drivers/dri/r300/r300_queryobj.c250
-rw-r--r--src/mesa/drivers/dri/r300/r300_queryobj.h34
-rw-r--r--src/mesa/drivers/dri/r300/r300_reg.h30
-rw-r--r--src/mesa/drivers/dri/r300/r300_render.c216
-rw-r--r--src/mesa/drivers/dri/r300/r300_shader.c11
-rw-r--r--src/mesa/drivers/dri/r300/r300_state.c247
-rw-r--r--src/mesa/drivers/dri/r300/r300_swtcl.c2
-rw-r--r--src/mesa/drivers/dri/r300/r300_vertprog.c1576
-rw-r--r--src/mesa/drivers/dri/r300/r300_vertprog.h32
l---------src/mesa/drivers/dri/r300/radeon_bo_legacy.c1
l---------src/mesa/drivers/dri/r300/radeon_bo_legacy.h1
l---------src/mesa/drivers/dri/r300/radeon_bocs_wrapper.h1
l---------src/mesa/drivers/dri/r300/radeon_buffer_objects.c1
l---------src/mesa/drivers/dri/r300/radeon_buffer_objects.h1
l---------src/mesa/drivers/dri/r300/radeon_chipset.h1
l---------src/mesa/drivers/dri/r300/radeon_cmdbuf.h1
l---------src/mesa/drivers/dri/r300/radeon_common.c1
l---------src/mesa/drivers/dri/r300/radeon_common.h1
l---------src/mesa/drivers/dri/r300/radeon_common_context.c1
l---------src/mesa/drivers/dri/r300/radeon_common_context.h1
l---------src/mesa/drivers/dri/r300/radeon_cs_legacy.c1
l---------src/mesa/drivers/dri/r300/radeon_cs_legacy.h1
l---------src/mesa/drivers/dri/r300/radeon_cs_space_drm.c1
l---------src/mesa/drivers/dri/r300/radeon_dma.c1
l---------src/mesa/drivers/dri/r300/radeon_dma.h1
l---------src/mesa/drivers/dri/r300/radeon_fbo.c1
l---------src/mesa/drivers/dri/r300/radeon_lock.c1
l---------src/mesa/drivers/dri/r300/radeon_lock.h1
l---------src/mesa/drivers/dri/r300/radeon_mipmap_tree.c1
l---------src/mesa/drivers/dri/r300/radeon_mipmap_tree.h1
-rw-r--r--src/mesa/drivers/dri/r300/radeon_program.c128
l---------src/mesa/drivers/dri/r300/radeon_screen.c1
l---------src/mesa/drivers/dri/r300/radeon_screen.h1
l---------src/mesa/drivers/dri/r300/radeon_span.c1
l---------src/mesa/drivers/dri/r300/radeon_span.h1
l---------src/mesa/drivers/dri/r300/radeon_texture.c1
l---------src/mesa/drivers/dri/r300/radeon_texture.h1
l---------src/mesa/drivers/dri/r300/server/radeon.h1
l---------src/mesa/drivers/dri/r300/server/radeon_dri.c1
l---------src/mesa/drivers/dri/r300/server/radeon_dri.h1
l---------src/mesa/drivers/dri/r300/server/radeon_egl.c1
l---------src/mesa/drivers/dri/r300/server/radeon_macros.h1
l---------src/mesa/drivers/dri/r300/server/radeon_reg.h1
-rw-r--r--src/mesa/drivers/dri/r600/Makefile46
-rw-r--r--src/mesa/drivers/dri/r600/r600_cmdbuf.c138
-rw-r--r--src/mesa/drivers/dri/r600/r600_cmdbuf.h8
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.c16
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.h18
-rw-r--r--src/mesa/drivers/dri/r600/r600_emit.c23
-rw-r--r--src/mesa/drivers/dri/r600/r600_tex.c61
-rw-r--r--src/mesa/drivers/dri/r600/r600_texstate.c539
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.c55
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.h2
-rw-r--r--src/mesa/drivers/dri/r600/r700_chip.c269
-rw-r--r--src/mesa/drivers/dri/r600/r700_chip.h37
-rw-r--r--src/mesa/drivers/dri/r600/r700_clear.c1
-rw-r--r--src/mesa/drivers/dri/r600/r700_fragprog.c117
-rw-r--r--src/mesa/drivers/dri/r600/r700_ioctl.c3
-rw-r--r--src/mesa/drivers/dri/r600/r700_oglprog.c2
-rw-r--r--src/mesa/drivers/dri/r600/r700_render.c299
-rw-r--r--src/mesa/drivers/dri/r600/r700_state.c920
-rw-r--r--src/mesa/drivers/dri/r600/r700_state.h4
-rw-r--r--src/mesa/drivers/dri/r600/r700_vertprog.c57
l---------src/mesa/drivers/dri/r600/radeon_bo_legacy.c1
l---------src/mesa/drivers/dri/r600/radeon_bo_legacy.h1
l---------src/mesa/drivers/dri/r600/radeon_bocs_wrapper.h1
l---------src/mesa/drivers/dri/r600/radeon_chipset.h1
l---------src/mesa/drivers/dri/r600/radeon_cmdbuf.h1
l---------src/mesa/drivers/dri/r600/radeon_common.c1
l---------src/mesa/drivers/dri/r600/radeon_common.h1
l---------src/mesa/drivers/dri/r600/radeon_common_context.c1
l---------src/mesa/drivers/dri/r600/radeon_common_context.h1
-rw-r--r--src/mesa/drivers/dri/r600/radeon_context.h76
l---------src/mesa/drivers/dri/r600/radeon_cs_legacy.c1
l---------src/mesa/drivers/dri/r600/radeon_cs_legacy.h1
l---------src/mesa/drivers/dri/r600/radeon_cs_space_drm.c1
l---------src/mesa/drivers/dri/r600/radeon_dma.c1
l---------src/mesa/drivers/dri/r600/radeon_dma.h1
l---------src/mesa/drivers/dri/r600/radeon_fbo.c1
l---------src/mesa/drivers/dri/r600/radeon_lock.c1
l---------src/mesa/drivers/dri/r600/radeon_lock.h1
l---------src/mesa/drivers/dri/r600/radeon_mipmap_tree.c1
l---------src/mesa/drivers/dri/r600/radeon_mipmap_tree.h1
l---------src/mesa/drivers/dri/r600/radeon_screen.c1
l---------src/mesa/drivers/dri/r600/radeon_screen.h1
l---------src/mesa/drivers/dri/r600/radeon_span.c1
l---------src/mesa/drivers/dri/r600/radeon_span.h1
l---------src/mesa/drivers/dri/r600/radeon_texture.c1
l---------src/mesa/drivers/dri/r600/radeon_texture.h1
l---------src/mesa/drivers/dri/r600/server/radeon.h1
l---------src/mesa/drivers/dri/r600/server/radeon_dri.c1
l---------src/mesa/drivers/dri/r600/server/radeon_dri.h1
l---------src/mesa/drivers/dri/r600/server/radeon_egl.c1
l---------src/mesa/drivers/dri/r600/server/radeon_macros.h1
l---------src/mesa/drivers/dri/r600/server/radeon_reg.h1
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_bo_drm.h42
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_bo_legacy.c116
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_bo_legacy.h3
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h6
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_buffer_objects.c217
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_buffer_objects.h52
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_chipset.h7
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common.c35
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.c156
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.h15
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_cs_space_drm.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_dma.c16
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_dma.h8
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_fbo.c14
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_lock.c55
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_lock.h12
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c24
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_screen.c43
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_screen.h1
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_span.c124
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_state_init.c4
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texture.c39
-rw-r--r--src/mesa/drivers/dri/swrast/swrast.c2
-rw-r--r--src/mesa/drivers/windows/gdi/mesa.def1
-rw-r--r--src/mesa/glapi/ARB_seamless_cube_map.xml12
-rw-r--r--src/mesa/glapi/EXT_framebuffer_object.xml38
-rw-r--r--src/mesa/glapi/EXT_texture_array.xml41
-rw-r--r--src/mesa/glapi/Makefile1
-rw-r--r--src/mesa/glapi/dispatch.h44
-rw-r--r--src/mesa/glapi/gl_API.xml61
-rw-r--r--src/mesa/glapi/gl_x86-64_asm.py2
-rw-r--r--src/mesa/glapi/gl_x86_asm.py2
-rw-r--r--src/mesa/glapi/glapioffsets.h24
-rw-r--r--src/mesa/glapi/glapitable.h18
-rw-r--r--src/mesa/glapi/glapitemp.h56
-rw-r--r--src/mesa/glapi/glprocs.h644
-rw-r--r--src/mesa/glapi/glthread.c51
-rw-r--r--src/mesa/glapi/glthread.h44
-rw-r--r--src/mesa/main/api_arrayelt.c3
-rw-r--r--src/mesa/main/api_exec.c1
-rw-r--r--src/mesa/main/api_validate.c21
-rw-r--r--src/mesa/main/attrib.c146
-rw-r--r--src/mesa/main/bufferobj.c64
-rw-r--r--src/mesa/main/bufferobj.h20
-rw-r--r--src/mesa/main/colortab.c8
-rw-r--r--src/mesa/main/compiler.h8
-rw-r--r--src/mesa/main/config.h9
-rw-r--r--src/mesa/main/context.c69
-rw-r--r--src/mesa/main/context.h6
-rw-r--r--src/mesa/main/convolve.c20
-rw-r--r--src/mesa/main/debug.c97
-rw-r--r--src/mesa/main/dlist.c304
-rw-r--r--src/mesa/main/drawpix.c142
-rw-r--r--src/mesa/main/enable.c10
-rw-r--r--src/mesa/main/enums.c5980
-rw-r--r--src/mesa/main/extensions.c3
-rw-r--r--src/mesa/main/fbobject.c20
-rw-r--r--src/mesa/main/fog.c2
-rw-r--r--src/mesa/main/framebuffer.c26
-rw-r--r--src/mesa/main/get.c12
-rw-r--r--src/mesa/main/get_gen.py4
-rw-r--r--src/mesa/main/getstring.c1
-rw-r--r--src/mesa/main/histogram.c8
-rw-r--r--src/mesa/main/image.c382
-rw-r--r--src/mesa/main/image.h24
-rw-r--r--src/mesa/main/imports.c85
-rw-r--r--src/mesa/main/imports.h3
-rw-r--r--src/mesa/main/light.c8
-rw-r--r--src/mesa/main/mtypes.h18
-rw-r--r--src/mesa/main/pixel.c24
-rw-r--r--src/mesa/main/polygon.c4
-rw-r--r--src/mesa/main/readpix.c55
-rw-r--r--src/mesa/main/shared.c2
-rw-r--r--src/mesa/main/state.c5
-rw-r--r--src/mesa/main/texenv.c69
-rw-r--r--src/mesa/main/texenvprogram.c15
-rw-r--r--src/mesa/main/texgetimage.c233
-rw-r--r--src/mesa/main/texgetimage.h9
-rw-r--r--src/mesa/main/teximage.c596
-rw-r--r--src/mesa/main/teximage.h16
-rw-r--r--src/mesa/main/texobj.c56
-rw-r--r--src/mesa/main/texobj.h8
-rw-r--r--src/mesa/main/texparam.c58
-rw-r--r--src/mesa/main/texstate.h18
-rw-r--r--src/mesa/main/texstore.c6
-rw-r--r--src/mesa/main/varray.c41
-rw-r--r--src/mesa/main/varray.h6
-rw-r--r--src/mesa/main/viewport.c4
-rw-r--r--src/mesa/shader/arbprogparse.c7
-rw-r--r--src/mesa/shader/prog_instruction.h9
-rw-r--r--src/mesa/shader/prog_print.c36
-rw-r--r--src/mesa/shader/prog_print.h11
-rw-r--r--src/mesa/shader/programopt.c1
-rw-r--r--src/mesa/shader/shader_api.c12
-rw-r--r--src/mesa/shader/slang/slang_builtin.c183
-rw-r--r--src/mesa/shader/slang/slang_builtin.h14
-rw-r--r--src/mesa/shader/slang/slang_codegen.c104
-rw-r--r--src/mesa/shader/slang/slang_link.c34
-rw-r--r--src/mesa/sources.mak3
-rw-r--r--src/mesa/sparc/glapi_sparc.S22
-rw-r--r--src/mesa/state_tracker/st_atom_shader.c17
-rw-r--r--src/mesa/state_tracker/st_cb_blit.c97
-rw-r--r--src/mesa/state_tracker/st_cb_clear.c24
-rw-r--r--src/mesa/state_tracker/st_cb_drawpixels.c74
-rw-r--r--src/mesa/state_tracker/st_cb_fbo.c53
-rw-r--r--src/mesa/state_tracker/st_cb_flush.c12
-rw-r--r--src/mesa/state_tracker/st_cb_rasterpos.c2
-rw-r--r--src/mesa/state_tracker/st_cb_texture.c26
-rw-r--r--src/mesa/state_tracker/st_draw.c5
-rw-r--r--src/mesa/state_tracker/st_draw.h2
-rw-r--r--src/mesa/state_tracker/st_draw_feedback.c4
-rw-r--r--src/mesa/state_tracker/st_extensions.c4
-rw-r--r--src/mesa/state_tracker/st_format.c4
-rw-r--r--src/mesa/state_tracker/st_mesa_to_tgsi.c37
-rw-r--r--src/mesa/state_tracker/st_program.c42
-rw-r--r--src/mesa/state_tracker/st_public.h4
-rw-r--r--src/mesa/state_tracker/st_texture.c108
-rw-r--r--src/mesa/state_tracker/st_texture.h5
-rw-r--r--src/mesa/swrast/s_blit.c189
-rw-r--r--src/mesa/swrast/s_fragprog.c5
-rw-r--r--src/mesa/swrast/s_points.c28
-rw-r--r--src/mesa/tnl/t_context.c2
-rw-r--r--src/mesa/tnl/t_draw.c18
-rw-r--r--src/mesa/tnl/t_vb_program.c3
-rw-r--r--src/mesa/tnl/tnl.h10
-rw-r--r--src/mesa/vbo/vbo.h6
-rw-r--r--src/mesa/vbo/vbo_exec_api.c21
-rw-r--r--src/mesa/vbo/vbo_exec_array.c172
-rw-r--r--src/mesa/vbo/vbo_exec_draw.c13
-rw-r--r--src/mesa/vbo/vbo_rebase.c1
-rw-r--r--src/mesa/vbo/vbo_save_api.c2
-rw-r--r--src/mesa/vbo/vbo_save_draw.c1
-rw-r--r--src/mesa/vbo/vbo_split_copy.c12
-rw-r--r--src/mesa/vbo/vbo_split_inplace.c3
-rw-r--r--src/mesa/x86-64/glapi_x86-64.S260
-rw-r--r--src/mesa/x86/glapi_x86.S28
364 files changed, 16449 insertions, 11390 deletions
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
new file mode 100644
index 00000000000..e42beabc9ba
--- /dev/null
+++ b/src/mesa/drivers/common/meta.c
@@ -0,0 +1,1249 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.6
+ *
+ * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Meta operations. Some GL operations can be expressed in terms of
+ * other GL operations. For example, glBlitFramebuffer() can be done
+ * with texture mapping and glClear() can be done with polygon rendering.
+ *
+ * \author Brian Paul
+ */
+
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "main/arrayobj.h"
+#include "main/blend.h"
+#include "main/bufferobj.h"
+#include "main/depth.h"
+#include "main/enable.h"
+#include "main/image.h"
+#include "main/macros.h"
+#include "main/matrix.h"
+#include "main/polygon.h"
+#include "main/scissor.h"
+#include "main/shaders.h"
+#include "main/stencil.h"
+#include "main/texobj.h"
+#include "main/texenv.h"
+#include "main/teximage.h"
+#include "main/texparam.h"
+#include "main/texstate.h"
+#include "main/varray.h"
+#include "main/viewport.h"
+#include "shader/program.h"
+#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
+
+
+/**
+ * State which we may save/restore across meta ops.
+ * XXX this may be incomplete...
+ */
+struct save_state
+{
+ GLbitfield SavedState; /**< bitmask of META_* flags */
+
+ /** META_ALPHA_TEST */
+ GLboolean AlphaEnabled;
+
+ /** META_BLEND */
+ GLboolean BlendEnabled;
+ GLboolean ColorLogicOpEnabled;
+
+ /** META_COLOR_MASK */
+ GLubyte ColorMask[4];
+
+ /** META_DEPTH_TEST */
+ struct gl_depthbuffer_attrib Depth;
+
+ /** META_FOG */
+ GLboolean Fog;
+
+ /** META_PIXELSTORE */
+ /* XXX / TO-DO */
+
+ /** META_RASTERIZATION */
+ GLenum FrontPolygonMode, BackPolygonMode;
+ GLboolean PolygonOffset;
+ GLboolean PolygonSmooth;
+ GLboolean PolygonStipple;
+ GLboolean PolygonCull;
+
+ /** META_SCISSOR */
+ struct gl_scissor_attrib Scissor;
+
+ /** META_SHADER */
+ GLboolean VertexProgramEnabled;
+ struct gl_vertex_program *VertexProgram;
+ GLboolean FragmentProgramEnabled;
+ struct gl_fragment_program *FragmentProgram;
+ GLuint Shader;
+
+ /** META_STENCIL_TEST */
+ struct gl_stencil_attrib Stencil;
+
+ /** META_TRANSFORM */
+ GLenum MatrixMode;
+ GLfloat ModelviewMatrix[16];
+ GLfloat ProjectionMatrix[16];
+ GLfloat TextureMatrix[16];
+ GLbitfield ClipPlanesEnabled;
+
+ /** META_TEXTURE */
+ GLuint ActiveUnit;
+ GLuint ClientActiveUnit;
+ /** for unit[0] only */
+ struct gl_texture_object *CurrentTexture[NUM_TEXTURE_TARGETS];
+ /** mask of TEXTURE_2D_BIT, etc */
+ GLbitfield TexEnabled[MAX_TEXTURE_UNITS];
+ GLbitfield TexGenEnabled[MAX_TEXTURE_UNITS];
+ GLuint EnvMode; /* unit[0] only */
+
+ /** META_VERTEX */
+ struct gl_array_object *ArrayObj;
+ struct gl_buffer_object *ArrayBufferObj;
+
+ /** META_VIEWPORT */
+ GLint ViewportX, ViewportY, ViewportW, ViewportH;
+ GLclampd DepthNear, DepthFar;
+
+ /** Miscellaneous (always disabled) */
+ GLboolean Lighting;
+};
+
+
+/**
+ * State for glBlitFramebufer()
+ */
+struct blit_state
+{
+ GLuint TexObj;
+ GLsizei TexWidth, TexHeight;
+ GLenum TexType;
+ GLuint ArrayObj;
+ GLuint VBO;
+ GLfloat verts[4][4]; /** four verts of X,Y,S,T */
+};
+
+
+/**
+ * State for glClear()
+ */
+struct clear_state
+{
+ GLuint ArrayObj;
+ GLuint VBO;
+ GLfloat verts[4][7]; /** four verts of X,Y,Z,R,G,B,A */
+};
+
+
+/**
+ * State for glCopyPixels()
+ */
+struct copypix_state
+{
+ GLuint TexObj;
+ GLsizei TexWidth, TexHeight;
+ GLenum TexType;
+ GLuint ArrayObj;
+ GLuint VBO;
+ GLfloat verts[4][5]; /** four verts of X,Y,Z,S,T */
+};
+
+
+/**
+ * State for glDrawPixels()
+ */
+struct drawpix_state
+{
+ GLuint TexObj;
+ GLsizei TexWidth, TexHeight;
+ GLenum TexIntFormat;
+ GLuint ArrayObj;
+ GLuint VBO;
+ GLfloat verts[4][5]; /** four verts of X,Y,Z,S,T */
+};
+
+
+
+/**
+ * All per-context meta state.
+ */
+struct gl_meta_state
+{
+ struct save_state Save; /**< state saved during meta-ops */
+
+ struct blit_state Blit; /**< For _mesa_meta_blit_framebuffer() */
+ struct clear_state Clear; /**< For _mesa_meta_clear() */
+ struct copypix_state CopyPix; /**< For _mesa_meta_copy_pixels() */
+ struct drawpix_state DrawPix; /**< For _mesa_meta_draw_pixels() */
+
+ /* other possible meta-ops:
+ * glDrawPixels()
+ * glBitmap()
+ * glGenerateMipmap()
+ */
+};
+
+
+/**
+ * Initialize meta-ops for a context.
+ * To be called once during context creation.
+ */
+void
+_mesa_meta_init(GLcontext *ctx)
+{
+ ASSERT(!ctx->Meta);
+
+ ctx->Meta = CALLOC_STRUCT(gl_meta_state);
+}
+
+
+/**
+ * Free context meta-op state.
+ * To be called once during context destruction.
+ */
+void
+_mesa_meta_free(GLcontext *ctx)
+{
+ struct gl_meta_state *meta = ctx->Meta;
+
+ if (meta->Blit.TexObj) {
+ _mesa_DeleteTextures(1, &meta->Blit.TexObj);
+ _mesa_DeleteBuffersARB(1, & meta->Blit.VBO);
+ _mesa_DeleteVertexArraysAPPLE(1, &meta->Blit.ArrayObj);
+ }
+
+ if (meta->Clear.VBO) {
+ _mesa_DeleteBuffersARB(1, & meta->Clear.VBO);
+ _mesa_DeleteVertexArraysAPPLE(1, &meta->Clear.ArrayObj);
+ }
+
+ if (meta->CopyPix.TexObj) {
+ _mesa_DeleteTextures(1, &meta->CopyPix.TexObj);
+ _mesa_DeleteBuffersARB(1, & meta->CopyPix.VBO);
+ _mesa_DeleteVertexArraysAPPLE(1, &meta->CopyPix.ArrayObj);
+ }
+
+ if (meta->DrawPix.TexObj) {
+ _mesa_DeleteTextures(1, &meta->DrawPix.TexObj);
+ _mesa_DeleteBuffersARB(1, & meta->DrawPix.VBO);
+ _mesa_DeleteVertexArraysAPPLE(1, &meta->DrawPix.ArrayObj);
+ }
+
+ _mesa_free(ctx->Meta);
+ ctx->Meta = NULL;
+}
+
+
+/**
+ * Enter meta state. This is like a light-weight version of glPushAttrib
+ * but it also resets most GL state back to default values.
+ *
+ * \param state bitmask of META_* flags indicating which attribute groups
+ * to save and reset to their defaults
+ */
+static void
+_mesa_meta_begin(GLcontext *ctx, GLbitfield state)
+{
+ struct save_state *save = &ctx->Meta->Save;
+
+ save->SavedState = state;
+
+ if (state & META_ALPHA_TEST) {
+ save->AlphaEnabled = ctx->Color.AlphaEnabled;
+ if (ctx->Color.AlphaEnabled)
+ _mesa_Disable(GL_ALPHA_TEST);
+ }
+
+ if (state & META_BLEND) {
+ save->BlendEnabled = ctx->Color.BlendEnabled;
+ if (ctx->Color.BlendEnabled)
+ _mesa_Disable(GL_BLEND);
+ save->ColorLogicOpEnabled = ctx->Color.ColorLogicOpEnabled;
+ if (ctx->Color.ColorLogicOpEnabled)
+ _mesa_Disable(GL_COLOR_LOGIC_OP);
+ }
+
+ if (state & META_COLOR_MASK) {
+ COPY_4V(save->ColorMask, ctx->Color.ColorMask);
+ if (!ctx->Color.ColorMask[0] ||
+ !ctx->Color.ColorMask[1] ||
+ !ctx->Color.ColorMask[2] ||
+ !ctx->Color.ColorMask[3])
+ _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+ }
+
+ if (state & META_DEPTH_TEST) {
+ save->Depth = ctx->Depth; /* struct copy */
+ if (ctx->Depth.Test)
+ _mesa_Disable(GL_DEPTH_TEST);
+ }
+
+ if (state & META_FOG) {
+ save->Fog = ctx->Fog.Enabled;
+ if (ctx->Fog.Enabled)
+ _mesa_set_enable(ctx, GL_FOG, GL_FALSE);
+ }
+
+ if (state & META_RASTERIZATION) {
+ save->FrontPolygonMode = ctx->Polygon.FrontMode;
+ save->BackPolygonMode = ctx->Polygon.BackMode;
+ save->PolygonOffset = ctx->Polygon.OffsetFill;
+ save->PolygonSmooth = ctx->Polygon.SmoothFlag;
+ save->PolygonStipple = ctx->Polygon.StippleFlag;
+ save->PolygonCull = ctx->Polygon.CullFlag;
+ _mesa_PolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+ _mesa_set_enable(ctx, GL_POLYGON_OFFSET_FILL, GL_FALSE);
+ _mesa_set_enable(ctx, GL_POLYGON_SMOOTH, GL_FALSE);
+ _mesa_set_enable(ctx, GL_POLYGON_STIPPLE, GL_FALSE);
+ _mesa_set_enable(ctx, GL_CULL_FACE, GL_FALSE);
+ }
+
+ if (state & META_SCISSOR) {
+ save->Scissor = ctx->Scissor; /* struct copy */
+ }
+
+ if (state & META_SHADER) {
+ if (ctx->Extensions.ARB_vertex_program) {
+ save->VertexProgramEnabled = ctx->VertexProgram.Enabled;
+ save->VertexProgram = ctx->VertexProgram.Current;
+ _mesa_set_enable(ctx, GL_VERTEX_PROGRAM_ARB, GL_FALSE);
+ }
+
+ if (ctx->Extensions.ARB_fragment_program) {
+ save->FragmentProgramEnabled = ctx->FragmentProgram.Enabled;
+ save->FragmentProgram = ctx->FragmentProgram.Current;
+ _mesa_set_enable(ctx, GL_FRAGMENT_PROGRAM_ARB, GL_FALSE);
+ }
+
+ if (ctx->Extensions.ARB_shader_objects) {
+ save->Shader = ctx->Shader.CurrentProgram ?
+ ctx->Shader.CurrentProgram->Name : 0;
+ _mesa_UseProgramObjectARB(0);
+ }
+ }
+
+ if (state & META_STENCIL_TEST) {
+ save->Stencil = ctx->Stencil; /* struct copy */
+ if (ctx->Stencil.Enabled)
+ _mesa_Disable(GL_STENCIL_TEST);
+ /* NOTE: other stencil state not reset */
+ }
+
+ if (state & META_TEXTURE) {
+ GLuint u, tgt;
+
+ save->ActiveUnit = ctx->Texture.CurrentUnit;
+ save->ClientActiveUnit = ctx->Array.ActiveTexture;
+ save->EnvMode = ctx->Texture.Unit[0].EnvMode;
+
+ /* Disable all texture units */
+ for (u = 0; u < ctx->Const.MaxTextureUnits; u++) {
+ save->TexEnabled[u] = ctx->Texture.Unit[u].Enabled;
+ save->TexGenEnabled[u] = ctx->Texture.Unit[u].TexGenEnabled;
+ if (ctx->Texture.Unit[u].Enabled ||
+ ctx->Texture.Unit[u].TexGenEnabled) {
+ _mesa_ActiveTextureARB(GL_TEXTURE0 + u);
+ _mesa_set_enable(ctx, GL_TEXTURE_1D, GL_FALSE);
+ _mesa_set_enable(ctx, GL_TEXTURE_2D, GL_FALSE);
+ _mesa_set_enable(ctx, GL_TEXTURE_3D, GL_FALSE);
+ _mesa_set_enable(ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE);
+ _mesa_set_enable(ctx, GL_TEXTURE_RECTANGLE, GL_FALSE);
+ _mesa_set_enable(ctx, GL_TEXTURE_GEN_S, GL_FALSE);
+ _mesa_set_enable(ctx, GL_TEXTURE_GEN_T, GL_FALSE);
+ _mesa_set_enable(ctx, GL_TEXTURE_GEN_R, GL_FALSE);
+ _mesa_set_enable(ctx, GL_TEXTURE_GEN_Q, GL_FALSE);
+ }
+ }
+
+ /* save current texture objects for unit[0] only */
+ for (tgt = 0; tgt < NUM_TEXTURE_TARGETS; tgt++) {
+ _mesa_reference_texobj(&save->CurrentTexture[tgt],
+ ctx->Texture.Unit[0].CurrentTex[tgt]);
+ }
+
+ /* set defaults for unit[0] */
+ _mesa_ActiveTextureARB(GL_TEXTURE0);
+ _mesa_ClientActiveTextureARB(GL_TEXTURE0);
+ _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
+ }
+
+ if (state & META_TRANSFORM) {
+ GLuint activeTexture = ctx->Texture.CurrentUnit;
+ _mesa_memcpy(save->ModelviewMatrix, ctx->ModelviewMatrixStack.Top->m,
+ 16 * sizeof(GLfloat));
+ _mesa_memcpy(save->ProjectionMatrix, ctx->ProjectionMatrixStack.Top->m,
+ 16 * sizeof(GLfloat));
+ _mesa_memcpy(save->TextureMatrix, ctx->TextureMatrixStack[0].Top->m,
+ 16 * sizeof(GLfloat));
+ save->MatrixMode = ctx->Transform.MatrixMode;
+ /* set 1:1 vertex:pixel coordinate transform */
+ _mesa_ActiveTextureARB(GL_TEXTURE0);
+ _mesa_MatrixMode(GL_TEXTURE);
+ _mesa_LoadIdentity();
+ _mesa_ActiveTextureARB(GL_TEXTURE0 + activeTexture);
+ _mesa_MatrixMode(GL_MODELVIEW);
+ _mesa_LoadIdentity();
+ _mesa_MatrixMode(GL_PROJECTION);
+ _mesa_LoadIdentity();
+ _mesa_Ortho(0.0F, ctx->DrawBuffer->Width,
+ 0.0F, ctx->DrawBuffer->Height,
+ -1.0F, 1.0F);
+ save->ClipPlanesEnabled = ctx->Transform.ClipPlanesEnabled;
+ if (ctx->Transform.ClipPlanesEnabled) {
+ GLuint i;
+ for (i = 0; i < ctx->Const.MaxClipPlanes; i++) {
+ _mesa_set_enable(ctx, GL_CLIP_PLANE0 + i, GL_FALSE);
+ }
+ }
+ }
+
+ if (state & META_VERTEX) {
+ /* save vertex array object state */
+ _mesa_reference_array_object(ctx, &save->ArrayObj,
+ ctx->Array.ArrayObj);
+ _mesa_reference_buffer_object(ctx, &save->ArrayBufferObj,
+ ctx->Array.ArrayBufferObj);
+ /* set some default state? */
+ }
+
+ if (state & META_VIEWPORT) {
+ save->ViewportX = ctx->Viewport.X;
+ save->ViewportY = ctx->Viewport.Y;
+ save->ViewportW = ctx->Viewport.Width;
+ save->ViewportH = ctx->Viewport.Height;
+ _mesa_Viewport(0, 0, ctx->DrawBuffer->Width, ctx->DrawBuffer->Height);
+ save->DepthNear = ctx->Viewport.Near;
+ save->DepthFar = ctx->Viewport.Far;
+ _mesa_DepthRange(0.0, 1.0);
+ }
+
+ /* misc */
+ {
+ save->Lighting = ctx->Light.Enabled;
+ if (ctx->Light.Enabled)
+ _mesa_set_enable(ctx, GL_LIGHTING, GL_FALSE);
+ }
+}
+
+
+/**
+ * Leave meta state. This is like a light-weight version of glPopAttrib().
+ */
+static void
+_mesa_meta_end(GLcontext *ctx)
+{
+ struct save_state *save = &ctx->Meta->Save;
+ const GLbitfield state = save->SavedState;
+
+ if (state & META_ALPHA_TEST) {
+ if (ctx->Color.AlphaEnabled != save->AlphaEnabled)
+ _mesa_set_enable(ctx, GL_ALPHA_TEST, save->AlphaEnabled);
+ }
+
+ if (state & META_BLEND) {
+ if (ctx->Color.BlendEnabled != save->BlendEnabled)
+ _mesa_set_enable(ctx, GL_BLEND, save->BlendEnabled);
+ if (ctx->Color.ColorLogicOpEnabled != save->ColorLogicOpEnabled)
+ _mesa_set_enable(ctx, GL_COLOR_LOGIC_OP, save->ColorLogicOpEnabled);
+ }
+
+ if (state & META_COLOR_MASK) {
+ if (!TEST_EQ_4V(ctx->Color.ColorMask, save->ColorMask))
+ _mesa_ColorMask(save->ColorMask[0], save->ColorMask[1],
+ save->ColorMask[2], save->ColorMask[3]);
+ }
+
+ if (state & META_DEPTH_TEST) {
+ if (ctx->Depth.Test != save->Depth.Test)
+ _mesa_set_enable(ctx, GL_DEPTH_TEST, save->Depth.Test);
+ _mesa_DepthFunc(save->Depth.Func);
+ _mesa_DepthMask(save->Depth.Mask);
+ }
+
+ if (state & META_FOG) {
+ _mesa_set_enable(ctx, GL_FOG, save->Fog);
+ }
+
+ if (state & META_RASTERIZATION) {
+ _mesa_PolygonMode(GL_FRONT, save->FrontPolygonMode);
+ _mesa_PolygonMode(GL_BACK, save->BackPolygonMode);
+ _mesa_set_enable(ctx, GL_POLYGON_STIPPLE, save->PolygonStipple);
+ _mesa_set_enable(ctx, GL_POLYGON_OFFSET_FILL, save->PolygonOffset);
+ _mesa_set_enable(ctx, GL_POLYGON_SMOOTH, save->PolygonSmooth);
+ _mesa_set_enable(ctx, GL_CULL_FACE, save->PolygonCull);
+ }
+
+ if (state & META_SCISSOR) {
+ _mesa_set_enable(ctx, GL_SCISSOR_TEST, save->Scissor.Enabled);
+ _mesa_Scissor(save->Scissor.X, save->Scissor.Y,
+ save->Scissor.Width, save->Scissor.Height);
+ }
+
+ if (state & META_SHADER) {
+ if (ctx->Extensions.ARB_vertex_program) {
+ _mesa_set_enable(ctx, GL_VERTEX_PROGRAM_ARB,
+ save->VertexProgramEnabled);
+ _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current,
+ save->VertexProgram);
+ }
+
+ if (ctx->Extensions.ARB_fragment_program) {
+ _mesa_set_enable(ctx, GL_FRAGMENT_PROGRAM_ARB,
+ save->FragmentProgramEnabled);
+ _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current,
+ save->FragmentProgram);
+ }
+
+ if (ctx->Extensions.ARB_shader_objects) {
+ _mesa_UseProgramObjectARB(save->Shader);
+ }
+ }
+
+ if (state & META_STENCIL_TEST) {
+ const struct gl_stencil_attrib *stencil = &save->Stencil;
+
+ _mesa_set_enable(ctx, GL_STENCIL_TEST, stencil->Enabled);
+ _mesa_ClearStencil(stencil->Clear);
+ if (ctx->Extensions.EXT_stencil_two_side) {
+ _mesa_set_enable(ctx, GL_STENCIL_TEST_TWO_SIDE_EXT,
+ stencil->TestTwoSide);
+ _mesa_ActiveStencilFaceEXT(stencil->ActiveFace
+ ? GL_BACK : GL_FRONT);
+ }
+ /* front state */
+ _mesa_StencilFuncSeparate(GL_FRONT,
+ stencil->Function[0],
+ stencil->Ref[0],
+ stencil->ValueMask[0]);
+ _mesa_StencilMaskSeparate(GL_FRONT, stencil->WriteMask[0]);
+ _mesa_StencilOpSeparate(GL_FRONT, stencil->FailFunc[0],
+ stencil->ZFailFunc[0],
+ stencil->ZPassFunc[0]);
+ /* back state */
+ _mesa_StencilFuncSeparate(GL_BACK,
+ stencil->Function[1],
+ stencil->Ref[1],
+ stencil->ValueMask[1]);
+ _mesa_StencilMaskSeparate(GL_BACK, stencil->WriteMask[1]);
+ _mesa_StencilOpSeparate(GL_BACK, stencil->FailFunc[1],
+ stencil->ZFailFunc[1],
+ stencil->ZPassFunc[1]);
+ }
+
+ if (state & META_TEXTURE) {
+ GLuint u, tgt;
+
+ ASSERT(ctx->Texture.CurrentUnit == 0);
+
+ /* restore texenv for unit[0] */
+ _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, save->EnvMode);
+
+ /* restore texture objects for unit[0] only */
+ for (tgt = 0; tgt < NUM_TEXTURE_TARGETS; tgt++) {
+ _mesa_reference_texobj(&ctx->Texture.Unit[0].CurrentTex[tgt],
+ save->CurrentTexture[tgt]);
+ }
+
+ /* Re-enable textures, texgen */
+ for (u = 0; u < ctx->Const.MaxTextureUnits; u++) {
+ if (save->TexEnabled[u]) {
+ _mesa_ActiveTextureARB(GL_TEXTURE0 + u);
+
+ if (save->TexEnabled[u] & TEXTURE_1D_BIT)
+ _mesa_set_enable(ctx, GL_TEXTURE_1D, GL_TRUE);
+ if (save->TexEnabled[u] & TEXTURE_2D_BIT)
+ _mesa_set_enable(ctx, GL_TEXTURE_2D, GL_TRUE);
+ if (save->TexEnabled[u] & TEXTURE_3D_BIT)
+ _mesa_set_enable(ctx, GL_TEXTURE_3D, GL_TRUE);
+ if (save->TexEnabled[u] & TEXTURE_CUBE_BIT)
+ _mesa_set_enable(ctx, GL_TEXTURE_CUBE_MAP, GL_TRUE);
+ if (save->TexEnabled[u] & TEXTURE_RECT_BIT)
+ _mesa_set_enable(ctx, GL_TEXTURE_RECTANGLE, GL_TRUE);
+ }
+
+ if (save->TexGenEnabled[u]) {
+ _mesa_ActiveTextureARB(GL_TEXTURE0 + u);
+
+ if (save->TexGenEnabled[u] & S_BIT)
+ _mesa_set_enable(ctx, GL_TEXTURE_GEN_S, GL_TRUE);
+ if (save->TexGenEnabled[u] & T_BIT)
+ _mesa_set_enable(ctx, GL_TEXTURE_GEN_T, GL_TRUE);
+ if (save->TexGenEnabled[u] & R_BIT)
+ _mesa_set_enable(ctx, GL_TEXTURE_GEN_R, GL_TRUE);
+ if (save->TexGenEnabled[u] & Q_BIT)
+ _mesa_set_enable(ctx, GL_TEXTURE_GEN_Q, GL_TRUE);
+ }
+ }
+
+ /* restore current unit state */
+ _mesa_ActiveTextureARB(GL_TEXTURE0 + save->ActiveUnit);
+ _mesa_ClientActiveTextureARB(GL_TEXTURE0 + save->ClientActiveUnit);
+ }
+
+ if (state & META_TRANSFORM) {
+ GLuint activeTexture = ctx->Texture.CurrentUnit;
+ _mesa_ActiveTextureARB(GL_TEXTURE0);
+ _mesa_MatrixMode(GL_TEXTURE);
+ _mesa_LoadMatrixf(save->TextureMatrix);
+ _mesa_ActiveTextureARB(GL_TEXTURE0 + activeTexture);
+
+ _mesa_MatrixMode(GL_MODELVIEW);
+ _mesa_LoadMatrixf(save->ModelviewMatrix);
+
+ _mesa_MatrixMode(GL_PROJECTION);
+ _mesa_LoadMatrixf(save->ProjectionMatrix);
+
+ _mesa_MatrixMode(save->MatrixMode);
+
+ save->ClipPlanesEnabled = ctx->Transform.ClipPlanesEnabled;
+ if (save->ClipPlanesEnabled) {
+ GLuint i;
+ for (i = 0; i < ctx->Const.MaxClipPlanes; i++) {
+ if (save->ClipPlanesEnabled & (1 << i)) {
+ _mesa_set_enable(ctx, GL_CLIP_PLANE0 + i, GL_TRUE);
+ }
+ }
+ }
+ }
+
+ if (state & META_VERTEX) {
+ /* restore vertex buffer object */
+ _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, save->ArrayBufferObj->Name);
+ _mesa_reference_buffer_object(ctx, &save->ArrayBufferObj, NULL);
+
+ /* restore vertex array object */
+ _mesa_BindVertexArray(save->ArrayObj->Name);
+ _mesa_reference_array_object(ctx, &save->ArrayObj, NULL);
+ }
+
+ if (state & META_VIEWPORT) {
+ _mesa_Viewport(save->ViewportX, save->ViewportY,
+ save->ViewportW, save->ViewportH);
+ _mesa_DepthRange(save->DepthNear, save->DepthFar);
+ }
+
+ /* misc */
+ if (save->Lighting) {
+ _mesa_set_enable(ctx, GL_LIGHTING, GL_TRUE);
+ }
+ if (save->Fog) {
+ _mesa_set_enable(ctx, GL_FOG, GL_TRUE);
+ }
+}
+
+
+/**
+ * Meta implementation of ctx->Driver.BlitFramebuffer() in terms
+ * of texture mapping and polygon rendering.
+ * Note: this function requires GL_ARB_texture_rectangle support.
+ */
+void
+_mesa_meta_blit_framebuffer(GLcontext *ctx,
+ GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+ GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+ GLbitfield mask, GLenum filter)
+{
+ struct blit_state *blit = &ctx->Meta->Blit;
+ const GLint srcX = MIN2(srcX0, srcX1);
+ const GLint srcY = MIN2(srcY0, srcY1);
+ const GLint srcW = abs(srcX1 - srcX0);
+ const GLint srcH = abs(srcY1 - srcY0);
+ GLboolean srcFlipX = srcX1 < srcX0;
+ GLboolean srcFlipY = srcY1 < srcY0;
+
+ ASSERT(ctx->Extensions.NV_texture_rectangle);
+
+ if (srcW > ctx->Const.MaxTextureRectSize ||
+ srcH > ctx->Const.MaxTextureRectSize) {
+ /* XXX avoid this fallback */
+ _swrast_BlitFramebuffer(ctx, srcX0, srcY0, srcX1, srcY1,
+ dstX0, dstY0, dstX1, dstY1, mask, filter);
+ return;
+ }
+
+
+ if (srcFlipX) {
+ GLint tmp = dstX0;
+ dstX0 = dstX1;
+ dstX1 = tmp;
+ }
+
+ if (srcFlipY) {
+ GLint tmp = dstY0;
+ dstY0 = dstY1;
+ dstY1 = tmp;
+ }
+
+ /* only scissor effects blit so save/clear all other relevant state */
+ _mesa_meta_begin(ctx, ~META_SCISSOR);
+
+ if (blit->TexObj == 0) {
+ /* one-time setup */
+
+ /* create texture object */
+ _mesa_GenTextures(1, &blit->TexObj);
+ _mesa_BindTexture(GL_TEXTURE_RECTANGLE, blit->TexObj);
+ _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
+ }
+ else {
+ _mesa_BindTexture(GL_TEXTURE_RECTANGLE, blit->TexObj);
+ }
+
+ _mesa_TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, filter);
+ _mesa_TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, filter);
+
+ if (blit->ArrayObj == 0) {
+ /* one-time setup */
+
+ /* create vertex array object */
+ _mesa_GenVertexArrays(1, &blit->ArrayObj);
+ _mesa_BindVertexArray(blit->ArrayObj);
+
+ /* create vertex array buffer */
+ _mesa_GenBuffersARB(1, &blit->VBO);
+ _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, blit->VBO);
+ _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(blit->verts),
+ blit->verts, GL_STREAM_DRAW_ARB);
+
+ /* setup vertex arrays */
+ _mesa_VertexPointer(2, GL_FLOAT, 4 * sizeof(GLfloat),
+ (void*) (0 * sizeof(GLfloat)));
+ _mesa_TexCoordPointer(2, GL_FLOAT, 4 * sizeof(GLfloat),
+ (void *) (2 * sizeof(GLfloat)));
+ _mesa_EnableClientState(GL_VERTEX_ARRAY);
+ _mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY);
+ }
+ else {
+ _mesa_BindVertexArray(blit->ArrayObj);
+ _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, blit->VBO);
+ }
+
+ /* vertex positions */
+ blit->verts[0][0] = (GLfloat) dstX0;
+ blit->verts[0][1] = (GLfloat) dstY0;
+ blit->verts[1][0] = (GLfloat) dstX1;
+ blit->verts[1][1] = (GLfloat) dstY0;
+ blit->verts[2][0] = (GLfloat) dstX1;
+ blit->verts[2][1] = (GLfloat) dstY1;
+ blit->verts[3][0] = (GLfloat) dstX0;
+ blit->verts[3][1] = (GLfloat) dstY1;
+
+ /* texcoords */
+ blit->verts[0][2] = 0.0F;
+ blit->verts[0][3] = 0.0F;
+ blit->verts[1][2] = (GLfloat) srcW;
+ blit->verts[1][3] = 0.0F;
+ blit->verts[2][2] = (GLfloat) srcW;
+ blit->verts[2][3] = (GLfloat) srcH;
+ blit->verts[3][2] = 0.0F;
+ blit->verts[3][3] = (GLfloat) srcH;
+
+ /* upload new vertex data */
+ _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0,
+ sizeof(blit->verts), blit->verts);
+
+ /* copy framebuffer image to texture */
+ if (mask & GL_COLOR_BUFFER_BIT) {
+ if (blit->TexWidth == srcW &&
+ blit->TexHeight == srcH &&
+ blit->TexType == GL_RGBA) {
+ /* replace existing tex image */
+ _mesa_CopyTexSubImage2D(GL_TEXTURE_RECTANGLE, 0,
+ 0, 0, srcX, srcY, srcW, srcH);
+ }
+ else {
+ /* create new tex image */
+ _mesa_CopyTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RGBA,
+ srcX, srcY, srcW, srcH, 0);
+ blit->TexWidth = srcW;
+ blit->TexHeight = srcH;
+ blit->TexType = GL_RGBA;
+ }
+
+ mask &= ~GL_COLOR_BUFFER_BIT;
+ }
+
+ _mesa_Enable(GL_TEXTURE_RECTANGLE);
+
+ /* draw textured quad */
+ _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+ _mesa_Disable(GL_TEXTURE_RECTANGLE);
+
+ _mesa_meta_end(ctx);
+
+ /* XXX, TO-DO: try to handle these cases above! */
+ if (mask & (GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT)) {
+ _swrast_BlitFramebuffer(ctx, srcX0, srcY0, srcX1, srcY1,
+ dstX0, dstY0, dstX1, dstY1, mask, filter);
+ }
+}
+
+
+/**
+ * Meta implementation of ctx->Driver.Clear() in terms of polygon rendering.
+ */
+void
+_mesa_meta_clear(GLcontext *ctx, GLbitfield buffers)
+{
+ struct clear_state *clear = &ctx->Meta->Clear;
+ GLfloat z = 1.0 - 2.0 * ctx->Depth.Clear;
+ GLuint i;
+
+ /* only scissor and color mask effects clearing */
+ _mesa_meta_begin(ctx, ~(META_SCISSOR | META_COLOR_MASK));
+
+ if (clear->ArrayObj == 0) {
+ /* one-time setup */
+
+ /* create vertex array object */
+ _mesa_GenVertexArrays(1, &clear->ArrayObj);
+ _mesa_BindVertexArray(clear->ArrayObj);
+
+ /* create vertex array buffer */
+ _mesa_GenBuffersARB(1, &clear->VBO);
+ _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO);
+ _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(clear->verts),
+ clear->verts, GL_STREAM_DRAW_ARB);
+
+ /* setup vertex arrays */
+ _mesa_VertexPointer(3, GL_FLOAT, 7 * sizeof(GLfloat), (void *) 0);
+ _mesa_ColorPointer(4, GL_FLOAT, 7 * sizeof(GLfloat),
+ (void *) (3 * sizeof(GLfloat)));
+ _mesa_EnableClientState(GL_VERTEX_ARRAY);
+ _mesa_EnableClientState(GL_COLOR_ARRAY);
+ }
+ else {
+ _mesa_BindVertexArray(clear->ArrayObj);
+ _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO);
+ }
+
+ /* GL_COLOR_BUFFER_BIT */
+ if (buffers & BUFFER_BITS_COLOR) {
+ /* leave colormask, glDrawBuffer state as-is */
+ }
+ else {
+ _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
+ }
+
+ /* GL_DEPTH_BUFFER_BIT */
+ if (buffers & BUFFER_BIT_DEPTH) {
+ _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE);
+ _mesa_DepthFunc(GL_ALWAYS);
+ _mesa_DepthMask(GL_TRUE);
+ }
+ else {
+ assert(!ctx->Depth.Test);
+ }
+
+ /* GL_STENCIL_BUFFER_BIT */
+ if (buffers & BUFFER_BIT_STENCIL) {
+ _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_TRUE);
+ _mesa_StencilOpSeparate(GL_FRONT_AND_BACK,
+ GL_REPLACE, GL_REPLACE, GL_REPLACE);
+ _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS,
+ ctx->Stencil.Clear & 0x7fffffff,
+ ctx->Stencil.WriteMask[0]);
+ }
+ else {
+ assert(!ctx->Stencil.Enabled);
+ }
+
+ /* vertex positions */
+ clear->verts[0][0] = (GLfloat) ctx->DrawBuffer->_Xmin;
+ clear->verts[0][1] = (GLfloat) ctx->DrawBuffer->_Ymin;
+ clear->verts[0][2] = z;
+ clear->verts[1][0] = (GLfloat) ctx->DrawBuffer->_Xmax;
+ clear->verts[1][1] = (GLfloat) ctx->DrawBuffer->_Ymin;
+ clear->verts[1][2] = z;
+ clear->verts[2][0] = (GLfloat) ctx->DrawBuffer->_Xmax;
+ clear->verts[2][1] = (GLfloat) ctx->DrawBuffer->_Ymax;
+ clear->verts[2][2] = z;
+ clear->verts[3][0] = (GLfloat) ctx->DrawBuffer->_Xmin;
+ clear->verts[3][1] = (GLfloat) ctx->DrawBuffer->_Ymax;
+ clear->verts[3][2] = z;
+
+ /* vertex colors */
+ for (i = 0; i < 4; i++) {
+ COPY_4FV(&clear->verts[i][3], ctx->Color.ClearColor);
+ }
+
+ /* upload new vertex data */
+ _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0,
+ sizeof(clear->verts), clear->verts);
+
+ /* draw quad */
+ _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+ _mesa_meta_end(ctx);
+}
+
+
+/**
+ * Meta implementation of ctx->Driver.CopyPixels() in terms
+ * of texture mapping and polygon rendering.
+ * Note: this function requires GL_ARB_texture_rectangle support.
+ */
+void
+_mesa_meta_copy_pixels(GLcontext *ctx, GLint srcX, GLint srcY,
+ GLsizei width, GLsizei height,
+ GLint dstX, GLint dstY, GLenum type)
+{
+ const GLenum filter = GL_NEAREST;
+ struct copypix_state *copypix = &ctx->Meta->CopyPix;
+ const GLfloat z = ctx->Current.RasterPos[2];
+ const GLfloat dstX1 = dstX + width * ctx->Pixel.ZoomX;
+ const GLfloat dstY1 = dstY + height * ctx->Pixel.ZoomY;
+
+ ASSERT(ctx->Extensions.NV_texture_rectangle);
+
+ if (type != GL_COLOR ||
+ ctx->_ImageTransferState ||
+ ctx->Fog.Enabled ||
+ width > ctx->Const.MaxTextureRectSize ||
+ height > ctx->Const.MaxTextureRectSize) {
+ /* XXX avoid this fallback */
+ _swrast_CopyPixels(ctx, srcX, srcY, width, height, dstX, dstY, type);
+ return;
+ }
+
+ /* Most GL state applies to glCopyPixels, but a there's a few things
+ * we need to override:
+ */
+ _mesa_meta_begin(ctx, (META_RASTERIZATION |
+ META_SHADER |
+ META_TEXTURE |
+ META_TRANSFORM |
+ META_VERTEX |
+ META_VIEWPORT));
+
+ if (copypix->TexObj == 0) {
+ /* one-time setup */
+
+ /* create texture object */
+ _mesa_GenTextures(1, &copypix->TexObj);
+ _mesa_BindTexture(GL_TEXTURE_RECTANGLE, copypix->TexObj);
+ _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
+ _mesa_TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, filter);
+ _mesa_TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, filter);
+ }
+ else {
+ _mesa_BindTexture(GL_TEXTURE_RECTANGLE, copypix->TexObj);
+ }
+
+ if (copypix->ArrayObj == 0) {
+ /* one-time setup */
+
+ /* create vertex array object */
+ _mesa_GenVertexArrays(1, &copypix->ArrayObj);
+ _mesa_BindVertexArray(copypix->ArrayObj);
+
+ /* create vertex array buffer */
+ _mesa_GenBuffersARB(1, &copypix->VBO);
+ _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, copypix->VBO);
+ _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(copypix->verts),
+ copypix->verts, GL_STREAM_DRAW_ARB);
+
+ /* setup vertex arrays */
+ _mesa_VertexPointer(3, GL_FLOAT, sizeof(copypix->verts[0]),
+ (void*) (0 * sizeof(GLfloat)));
+ _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(copypix->verts[0]),
+ (void *) (3 * sizeof(GLfloat)));
+ _mesa_EnableClientState(GL_VERTEX_ARRAY);
+ _mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY);
+ }
+ else {
+ _mesa_BindVertexArray(copypix->ArrayObj);
+ _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, copypix->VBO);
+ }
+
+ /* vertex positions, texcoords */
+ copypix->verts[0][0] = (GLfloat) dstX;
+ copypix->verts[0][1] = (GLfloat) dstY;
+ copypix->verts[0][2] = z;
+ copypix->verts[0][3] = 0.0F;
+ copypix->verts[0][4] = 0.0F;
+ copypix->verts[1][0] = (GLfloat) dstX1;
+ copypix->verts[1][1] = (GLfloat) dstY;
+ copypix->verts[1][2] = z;
+ copypix->verts[1][3] = (GLfloat) width;
+ copypix->verts[1][4] = 0.0F;
+ copypix->verts[2][0] = (GLfloat) dstX1;
+ copypix->verts[2][1] = (GLfloat) dstY1;
+ copypix->verts[2][2] = z;
+ copypix->verts[2][3] = (GLfloat) width;
+ copypix->verts[2][4] = (GLfloat) height;
+ copypix->verts[3][0] = (GLfloat) dstX;
+ copypix->verts[3][1] = (GLfloat) dstY1;
+ copypix->verts[3][2] = z;
+ copypix->verts[3][3] = 0.0F;
+ copypix->verts[3][4] = (GLfloat) height;
+
+ /* upload new vertex data */
+ _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0,
+ sizeof(copypix->verts), copypix->verts);
+
+ /* copy framebuffer image to texture */
+ if (type == GL_COLOR) {
+ if (copypix->TexWidth == width &&
+ copypix->TexHeight == height &&
+ copypix->TexType == type) {
+ /* replace existing tex image */
+ _mesa_CopyTexSubImage2D(GL_TEXTURE_RECTANGLE, 0,
+ 0, 0, srcX, srcY, width, height);
+ }
+ else {
+ /* create new tex image */
+ _mesa_CopyTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RGBA,
+ srcX, srcY, width, height, 0);
+ copypix->TexWidth = width;
+ copypix->TexHeight = height;
+ copypix->TexType = type;
+ }
+ }
+ else if (type == GL_DEPTH) {
+ /* TO-DO: Use a GL_DEPTH_COMPONENT texture and a fragment program/shader
+ * that replaces the fragment.z value.
+ */
+ }
+ else {
+ ASSERT(type == GL_STENCIL);
+ /* have to use sw fallback */
+ }
+
+ _mesa_Enable(GL_TEXTURE_RECTANGLE);
+
+ /* draw textured quad */
+ _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+ _mesa_Disable(GL_TEXTURE_RECTANGLE);
+
+ _mesa_meta_end(ctx);
+}
+
+
+
+/**
+ * When the glDrawPixels() image size is greater than the max rectangle
+ * texture size we use this function to break the glDrawPixels() image
+ * into tiles which fit into the max texture size.
+ */
+static void
+tiled_draw_pixels(GLcontext *ctx,
+ GLint x, GLint y, GLsizei width, GLsizei height,
+ GLenum format, GLenum type,
+ const struct gl_pixelstore_attrib *unpack,
+ const GLvoid *pixels)
+{
+ const GLint maxSize = ctx->Const.MaxTextureRectSize;
+ struct gl_pixelstore_attrib tileUnpack = *unpack;
+ GLint i, j;
+
+ for (i = 0; i < width; i += maxSize) {
+ const GLint tileWidth = MIN2(maxSize, width - i);
+ const GLint tileX = (GLint) (x + i * ctx->Pixel.ZoomX);
+
+ tileUnpack.SkipPixels = unpack->SkipPixels + i;
+
+ for (j = 0; j < height; j += maxSize) {
+ const GLint tileHeight = MIN2(maxSize, height - j);
+ const GLint tileY = (GLint) (y + j * ctx->Pixel.ZoomY);
+
+ tileUnpack.SkipRows = unpack->SkipRows + j;
+
+ _mesa_meta_draw_pixels(ctx, tileX, tileY,
+ tileWidth, tileHeight,
+ format, type, &tileUnpack, pixels);
+ }
+ }
+}
+
+
+/**
+ * Meta implementation of ctx->Driver.DrawPixels() in terms
+ * of texture mapping and polygon rendering.
+ * Note: this function requires GL_ARB_texture_rectangle support.
+ */
+void
+_mesa_meta_draw_pixels(GLcontext *ctx,
+ GLint x, GLint y, GLsizei width, GLsizei height,
+ GLenum format, GLenum type,
+ const struct gl_pixelstore_attrib *unpack,
+ const GLvoid *pixels)
+{
+ const GLenum filter = GL_NEAREST;
+ struct drawpix_state *drawpix = &ctx->Meta->DrawPix;
+ const GLfloat z = ctx->Current.RasterPos[2];
+ const GLfloat x1 = x + width * ctx->Pixel.ZoomX;
+ const GLfloat y1 = y + height * ctx->Pixel.ZoomY;
+ const struct gl_pixelstore_attrib unpackSave = ctx->Unpack;
+ GLenum texIntFormat;
+ GLboolean fallback;
+
+ ASSERT(ctx->Extensions.NV_texture_rectangle);
+
+ /*
+ * Determine if we can do the glDrawPixels with texture mapping.
+ */
+ fallback = GL_FALSE;
+ if (ctx->_ImageTransferState ||
+ ctx->Fog.Enabled) {
+ fallback = GL_TRUE;
+ }
+
+ if (_mesa_is_color_format(format)) {
+ texIntFormat = GL_RGBA;
+ }
+ else {
+ fallback = GL_TRUE;
+ }
+
+ if (fallback) {
+ _swrast_DrawPixels(ctx, x, y, width, height,
+ format, type, unpack, pixels);
+ return;
+ }
+
+ /*
+ * Check image size against max texture size, draw as tiles if needed.
+ */
+ if (width > ctx->Const.MaxTextureRectSize ||
+ height > ctx->Const.MaxTextureRectSize) {
+ tiled_draw_pixels(ctx, x, y, width, height,
+ format, type, unpack, pixels);
+ return;
+ }
+
+ /* Most GL state applies to glDrawPixels, but a there's a few things
+ * we need to override:
+ */
+ _mesa_meta_begin(ctx, (META_RASTERIZATION |
+ META_SHADER |
+ META_TEXTURE |
+ META_TRANSFORM |
+ META_VERTEX |
+ META_VIEWPORT));
+
+ if (drawpix->TexObj == 0) {
+ /* one-time setup */
+
+ /* create texture object */
+ _mesa_GenTextures(1, &drawpix->TexObj);
+ _mesa_BindTexture(GL_TEXTURE_RECTANGLE, drawpix->TexObj);
+ _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
+ _mesa_TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, filter);
+ _mesa_TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, filter);
+ }
+ else {
+ _mesa_BindTexture(GL_TEXTURE_RECTANGLE, drawpix->TexObj);
+ }
+
+ if (drawpix->ArrayObj == 0) {
+ /* one-time setup */
+
+ /* create vertex array object */
+ _mesa_GenVertexArrays(1, &drawpix->ArrayObj);
+ _mesa_BindVertexArray(drawpix->ArrayObj);
+
+ /* create vertex array buffer */
+ _mesa_GenBuffersARB(1, &drawpix->VBO);
+ _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, drawpix->VBO);
+ _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(drawpix->verts),
+ drawpix->verts, GL_STREAM_DRAW_ARB);
+
+ /* setup vertex arrays */
+ _mesa_VertexPointer(3, GL_FLOAT, sizeof(drawpix->verts[0]),
+ (void*) (0 * sizeof(GLfloat)));
+ _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(drawpix->verts[0]),
+ (void *) (3 * sizeof(GLfloat)));
+ _mesa_EnableClientState(GL_VERTEX_ARRAY);
+ _mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY);
+ }
+ else {
+ _mesa_BindVertexArray(drawpix->ArrayObj);
+ _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, drawpix->VBO);
+ }
+
+ /* vertex positions, texcoords */
+ drawpix->verts[0][0] = (GLfloat) x;
+ drawpix->verts[0][1] = (GLfloat) y;
+ drawpix->verts[0][2] = z;
+ drawpix->verts[0][3] = 0.0F;
+ drawpix->verts[0][4] = 0.0F;
+ drawpix->verts[1][0] = (GLfloat) x1;
+ drawpix->verts[1][1] = (GLfloat) y;
+ drawpix->verts[1][2] = z;
+ drawpix->verts[1][3] = (GLfloat) width;
+ drawpix->verts[1][4] = 0.0F;
+ drawpix->verts[2][0] = (GLfloat) x1;
+ drawpix->verts[2][1] = (GLfloat) y1;
+ drawpix->verts[2][2] = z;
+ drawpix->verts[2][3] = (GLfloat) width;
+ drawpix->verts[2][4] = (GLfloat) height;
+ drawpix->verts[3][0] = (GLfloat) x;
+ drawpix->verts[3][1] = (GLfloat) y1;
+ drawpix->verts[3][2] = z;
+ drawpix->verts[3][3] = 0.0F;
+ drawpix->verts[3][4] = (GLfloat) height;
+
+ /* upload new vertex data */
+ _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0,
+ sizeof(drawpix->verts), drawpix->verts);
+
+ /* set given unpack params */
+ ctx->Unpack = *unpack; /* XXX bufobj */
+
+ /* copy pixel data to texture */
+ if (drawpix->TexWidth == width &&
+ drawpix->TexHeight == height &&
+ drawpix->TexIntFormat == texIntFormat) {
+ /* replace existing tex image */
+ _mesa_TexSubImage2D(GL_TEXTURE_RECTANGLE, 0,
+ 0, 0, width, height, format, type, pixels);
+ }
+ else {
+ /* create new tex image */
+ _mesa_TexImage2D(GL_TEXTURE_RECTANGLE, 0, texIntFormat,
+ width, height, 0, format, type, pixels);
+ drawpix->TexWidth = width;
+ drawpix->TexHeight = height;
+ drawpix->TexIntFormat = texIntFormat;
+ }
+
+ /* restore unpack params */
+ ctx->Unpack = unpackSave; /* XXX bufobj */
+
+ _mesa_Enable(GL_TEXTURE_RECTANGLE);
+
+ /* draw textured quad */
+ _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+ _mesa_Disable(GL_TEXTURE_RECTANGLE);
+
+ _mesa_meta_end(ctx);
+}
diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
new file mode 100644
index 00000000000..a9c5f980432
--- /dev/null
+++ b/src/mesa/drivers/common/meta.h
@@ -0,0 +1,80 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.6
+ *
+ * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef META_H
+#define META_H
+
+
+/**
+ * Flags passed to _mesa_meta_begin().
+ * XXX these flags may evolve...
+ */
+/*@{*/
+#define META_ALPHA_TEST 0x1
+#define META_BLEND 0x2 /**< includes logicop */
+#define META_COLOR_MASK 0x4
+#define META_DEPTH_TEST 0x8
+#define META_FOG 0x10
+#define META_RASTERIZATION 0x20
+#define META_SCISSOR 0x40
+#define META_SHADER 0x80
+#define META_STENCIL_TEST 0x100
+#define META_TRANSFORM 0x200 /**< modelview, projection */
+#define META_TEXTURE 0x400
+#define META_VERTEX 0x800
+#define META_VIEWPORT 0x1000
+#define META_ALL ~0x0
+/*@}*/
+
+
+extern void
+_mesa_meta_init(GLcontext *ctx);
+
+extern void
+_mesa_meta_free(GLcontext *ctx);
+
+extern void
+_mesa_meta_blit_framebuffer(GLcontext *ctx,
+ GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+ GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+ GLbitfield mask, GLenum filter);
+
+extern void
+_mesa_meta_clear(GLcontext *ctx, GLbitfield buffers);
+
+extern void
+_mesa_meta_copy_pixels(GLcontext *ctx, GLint srcx, GLint srcy,
+ GLsizei width, GLsizei height,
+ GLint dstx, GLint dsty, GLenum type);
+
+extern void
+_mesa_meta_draw_pixels(GLcontext *ctx,
+ GLint x, GLint y, GLsizei width, GLsizei height,
+ GLenum format, GLenum type,
+ const struct gl_pixelstore_attrib *unpack,
+ const GLvoid *pixels);
+
+
+#endif /* META_H */
diff --git a/src/mesa/drivers/dri/common/.gitignore b/src/mesa/drivers/dri/common/.gitignore
new file mode 100644
index 00000000000..1edeb79fd12
--- /dev/null
+++ b/src/mesa/drivers/dri/common/.gitignore
@@ -0,0 +1 @@
+*.os
diff --git a/src/mesa/drivers/dri/common/dri_metaops.c b/src/mesa/drivers/dri/common/dri_metaops.c
index fe183c2e870..cdbea344951 100644
--- a/src/mesa/drivers/dri/common/dri_metaops.c
+++ b/src/mesa/drivers/dri/common/dri_metaops.c
@@ -357,6 +357,7 @@ meta_clear_tris(struct dri_metaops *meta, GLbitfield mask)
GLuint saved_shader_program = 0;
unsigned int saved_active_texture;
struct gl_array_object *arraySave = NULL;
+ GLfloat saved_near, saved_far;
if (!meta->clear.arrayObj)
meta_init_clear(meta);
@@ -370,8 +371,7 @@ meta_clear_tris(struct dri_metaops *meta, GLbitfield mask)
GL_POLYGON_BIT |
GL_STENCIL_BUFFER_BIT |
GL_TRANSFORM_BIT |
- GL_CURRENT_BIT |
- GL_VIEWPORT_BIT);
+ GL_CURRENT_BIT);
saved_active_texture = ctx->Texture.CurrentUnit;
/* Disable existing GL state we don't want to apply to a clear. */
@@ -440,6 +440,8 @@ meta_clear_tris(struct dri_metaops *meta, GLbitfield mask)
/* The ClearDepth value is unaffected by DepthRange, so do a default
* mapping.
*/
+ saved_near = ctx->Viewport.Near;
+ saved_far = ctx->Viewport.Far;
_mesa_DepthRange(0.0, 1.0);
/* Prepare the vertices, which are the same regardless of which buffer we're
@@ -519,6 +521,7 @@ meta_clear_tris(struct dri_metaops *meta, GLbitfield mask)
if (saved_shader_program)
_mesa_UseProgramObjectARB(saved_shader_program);
+ _mesa_DepthRange(saved_near, saved_far);
_mesa_PopAttrib();
/* restore current array object */
diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c
index 1d940603fa8..e48e10d7c06 100644
--- a/src/mesa/drivers/dri/common/dri_util.c
+++ b/src/mesa/drivers/dri/common/dri_util.c
@@ -778,7 +778,7 @@ dri2CreateNewScreen(int scrn, int fd,
if (driDriverAPI.InitScreen2 == NULL)
return NULL;
- psp = _mesa_malloc(sizeof(*psp));
+ psp = _mesa_calloc(sizeof(*psp));
if (!psp)
return NULL;
diff --git a/src/mesa/drivers/dri/common/extension_helper.h b/src/mesa/drivers/dri/common/extension_helper.h
index e308fd28311..08a97bb1110 100644
--- a/src/mesa/drivers/dri/common/extension_helper.h
+++ b/src/mesa/drivers/dri/common/extension_helper.h
@@ -752,6 +752,13 @@ static const char VertexAttrib4ubNV_names[] =
"";
#endif
+#if defined(need_GL_APPLE_texture_range)
+static const char TextureRangeAPPLE_names[] =
+ "iip\0" /* Parameter signature */
+ "glTextureRangeAPPLE\0"
+ "";
+#endif
+
#if defined(need_GL_SUN_vertex)
static const char TexCoord2fColor4fNormal3fVertex3fSUN_names[] =
"ffffffffffff\0" /* Parameter signature */
@@ -1567,6 +1574,13 @@ static const char FragmentLightivSGIX_names[] =
"";
#endif
+#if defined(need_GL_APPLE_texture_range)
+static const char GetTexParameterPointervAPPLE_names[] =
+ "iip\0" /* Parameter signature */
+ "glGetTexParameterPointervAPPLE\0"
+ "";
+#endif
+
#if defined(need_GL_EXT_pixel_transform)
static const char PixelTransformParameterfvEXT_names[] =
"iip\0" /* Parameter signature */
@@ -2079,6 +2093,13 @@ static const char Uniform2fvARB_names[] =
"";
#endif
+#if defined(need_GL_APPLE_flush_buffer_range)
+static const char BufferParameteriAPPLE_names[] =
+ "iii\0" /* Parameter signature */
+ "glBufferParameteriAPPLE\0"
+ "";
+#endif
+
#if defined(need_GL_VERSION_1_3)
static const char MultiTexCoord3dvARB_names[] =
"ip\0" /* Parameter signature */
@@ -2803,6 +2824,13 @@ static const char IsProgramNV_names[] =
"";
#endif
+#if defined(need_GL_APPLE_flush_buffer_range)
+static const char FlushMappedBufferRangeAPPLE_names[] =
+ "iii\0" /* Parameter signature */
+ "glFlushMappedBufferRangeAPPLE\0"
+ "";
+#endif
+
#if defined(need_GL_SUN_triangle_list)
static const char ReplacementCodePointerSUN_names[] =
"iip\0" /* Parameter signature */
@@ -4973,6 +5001,22 @@ static const struct dri_extension_function GL_3DFX_tbuffer_functions[] = {
};
#endif
+#if defined(need_GL_APPLE_flush_buffer_range)
+static const struct dri_extension_function GL_APPLE_flush_buffer_range_functions[] = {
+ { BufferParameteriAPPLE_names, BufferParameteriAPPLE_remap_index, -1 },
+ { FlushMappedBufferRangeAPPLE_names, FlushMappedBufferRangeAPPLE_remap_index, -1 },
+ { NULL, 0, 0 }
+};
+#endif
+
+#if defined(need_GL_APPLE_texture_range)
+static const struct dri_extension_function GL_APPLE_texture_range_functions[] = {
+ { TextureRangeAPPLE_names, TextureRangeAPPLE_remap_index, -1 },
+ { GetTexParameterPointervAPPLE_names, GetTexParameterPointervAPPLE_remap_index, -1 },
+ { NULL, 0, 0 }
+};
+#endif
+
#if defined(need_GL_APPLE_vertex_array_object)
static const struct dri_extension_function GL_APPLE_vertex_array_object_functions[] = {
{ DeleteVertexArraysAPPLE_names, DeleteVertexArraysAPPLE_remap_index, -1 },
diff --git a/src/mesa/drivers/dri/fb/fb_egl.c b/src/mesa/drivers/dri/fb/fb_egl.c
index dee67feb5ac..4e41860d8c8 100644
--- a/src/mesa/drivers/dri/fb/fb_egl.c
+++ b/src/mesa/drivers/dri/fb/fb_egl.c
@@ -605,7 +605,7 @@ fbDestroySurface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface)
{
fbSurface *fs = Lookup_fbSurface(surface);
_eglUnlinkSurface(&fs->Base);
- if (!fs->Base.IsBound)
+ if (!_eglIsSurfaceBound(&fs->Base))
free(fs);
return EGL_TRUE;
}
@@ -616,7 +616,7 @@ fbDestroyContext(_EGLDriver *drv, EGLDisplay dpy, EGLContext context)
{
fbContext *fc = Lookup_fbContext(context);
_eglUnlinkContext(&fc->Base);
- if (!fc->Base.IsBound)
+ if (!_eglIsContextBound(&fc->Base))
free(fc);
return EGL_TRUE;
}
diff --git a/src/mesa/drivers/dri/i915/i830_context.h b/src/mesa/drivers/dri/i915/i830_context.h
index 1bdb32049d7..f73cbbf88bb 100644
--- a/src/mesa/drivers/dri/i915/i830_context.h
+++ b/src/mesa/drivers/dri/i915/i830_context.h
@@ -40,6 +40,7 @@
#define I830_UPLOAD_BUFFERS 0x2
#define I830_UPLOAD_STIPPLE 0x4
#define I830_UPLOAD_INVARIENT 0x8
+#define I830_UPLOAD_RASTER_RULES 0x10
#define I830_UPLOAD_TEX(i) (0x10<<(i))
#define I830_UPLOAD_TEXBLEND(i) (0x100<<(i))
#define I830_UPLOAD_TEX_ALL (0x0f0)
@@ -99,6 +100,11 @@
#define I830_TEXBLEND_SIZE 12 /* (4 args + op) * 2 + COLOR_FACTOR */
+enum {
+ I830_RASTER_RULES,
+ I830_RASTER_RULES_SIZE
+};
+
struct i830_texture_object
{
struct intel_texture_object intel;
@@ -112,6 +118,7 @@ struct i830_hw_state
GLuint Ctx[I830_CTX_SETUP_SIZE];
GLuint Buffer[I830_DEST_SETUP_SIZE];
GLuint Stipple[I830_STP_SETUP_SIZE];
+ GLuint RasterRules[I830_RASTER_RULES_SIZE];
GLuint Tex[I830_TEX_UNITS][I830_TEX_SETUP_SIZE];
GLuint TexBlend[I830_TEX_UNITS][I830_TEXBLEND_SIZE];
GLuint TexBlendWordsUsed[I830_TEX_UNITS];
@@ -197,6 +204,7 @@ extern void i830InitStateFuncs(struct dd_function_table *functions);
extern void i830EmitState(struct i830_context *i830);
extern void i830InitState(struct i830_context *i830);
+extern void i830_update_provoking_vertex(GLcontext *ctx);
/* i830_metaops.c
*/
diff --git a/src/mesa/drivers/dri/i915/i830_reg.h b/src/mesa/drivers/dri/i915/i830_reg.h
index db16871001d..ae1317029a2 100644
--- a/src/mesa/drivers/dri/i915/i830_reg.h
+++ b/src/mesa/drivers/dri/i915/i830_reg.h
@@ -420,8 +420,11 @@
#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8)
#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5)
#define ENABLE_TRI_STRIP_PROVOKE_VRTX (1<<2)
+#define LINE_STRIP_PROVOKE_VRTX_MASK (3<<6)
#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6)
+#define TRI_FAN_PROVOKE_VRTX_MASK (3<<3)
#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3)
+#define TRI_STRIP_PROVOKE_VRTX_MASK (3<<0)
#define TRI_STRIP_PROVOKE_VRTX(x) (x)
/* _3DSTATE_SCISSOR_ENABLE, p200 */
diff --git a/src/mesa/drivers/dri/i915/i830_state.c b/src/mesa/drivers/dri/i915/i830_state.c
index 8ef6c9144f1..645ebe30577 100644
--- a/src/mesa/drivers/dri/i915/i830_state.c
+++ b/src/mesa/drivers/dri/i915/i830_state.c
@@ -1047,6 +1047,16 @@ i830_init_packets(struct i830_context *i830)
TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) |
TEXBIND_SET0(TEXCOORDSRC_VTXSET_0));
+ i830->state.RasterRules[I830_RASTER_RULES] = (_3DSTATE_RASTER_RULES_CMD |
+ ENABLE_POINT_RASTER_RULE |
+ OGL_POINT_RASTER_RULE |
+ ENABLE_LINE_STRIP_PROVOKE_VRTX |
+ ENABLE_TRI_FAN_PROVOKE_VRTX |
+ ENABLE_TRI_STRIP_PROVOKE_VRTX |
+ LINE_STRIP_PROVOKE_VRTX(1) |
+ TRI_FAN_PROVOKE_VRTX(2) |
+ TRI_STRIP_PROVOKE_VRTX(2));
+
i830->state.Stipple[I830_STPREG_ST0] = _3DSTATE_STIPPLE;
@@ -1058,6 +1068,27 @@ i830_init_packets(struct i830_context *i830)
i830->state.Buffer[I830_DESTREG_SR2] = 0;
}
+void
+i830_update_provoking_vertex(GLcontext * ctx)
+{
+ struct i830_context *i830 = i830_context(ctx);
+
+ I830_STATECHANGE(i830, I830_UPLOAD_RASTER_RULES);
+ i830->state.RasterRules[I830_RASTER_RULES] &= ~(LINE_STRIP_PROVOKE_VRTX_MASK |
+ TRI_FAN_PROVOKE_VRTX_MASK |
+ TRI_STRIP_PROVOKE_VRTX_MASK);
+
+ /* _NEW_LIGHT */
+ if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) {
+ i830->state.RasterRules[I830_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(1) |
+ TRI_FAN_PROVOKE_VRTX(2) |
+ TRI_STRIP_PROVOKE_VRTX(2));
+ } else {
+ i830->state.RasterRules[I830_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(0) |
+ TRI_FAN_PROVOKE_VRTX(1) |
+ TRI_STRIP_PROVOKE_VRTX(0));
+ }
+}
void
i830InitStateFuncs(struct dd_function_table *functions)
@@ -1101,6 +1132,7 @@ i830InitState(struct i830_context *i830)
i830->current = &i830->state;
i830->state.emitted = 0;
i830->state.active = (I830_UPLOAD_INVARIENT |
+ I830_UPLOAD_RASTER_RULES |
I830_UPLOAD_TEXBLEND(0) |
I830_UPLOAD_STIPPLE |
I830_UPLOAD_CTX | I830_UPLOAD_BUFFERS);
diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c
index 9c6f891dd32..983f6724c98 100644
--- a/src/mesa/drivers/dri/i915/i830_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i830_vtbl.c
@@ -299,7 +299,7 @@ i830_emit_invarient_state(struct intel_context *intel)
{
BATCH_LOCALS;
- BEGIN_BATCH(30, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(29, IGNORE_CLIPRECTS);
OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
OUT_BATCH(0);
@@ -351,15 +351,6 @@ i830_emit_invarient_state(struct intel_context *intel)
OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(3));
- OUT_BATCH(_3DSTATE_RASTER_RULES_CMD |
- ENABLE_POINT_RASTER_RULE |
- OGL_POINT_RASTER_RULE |
- ENABLE_LINE_STRIP_PROVOKE_VRTX |
- ENABLE_TRI_FAN_PROVOKE_VRTX |
- ENABLE_TRI_STRIP_PROVOKE_VRTX |
- LINE_STRIP_PROVOKE_VRTX(1) |
- TRI_FAN_PROVOKE_VRTX(2) | TRI_STRIP_PROVOKE_VRTX(2));
-
OUT_BATCH(_3DSTATE_VERTEX_TRANSFORM);
OUT_BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE);
@@ -394,6 +385,9 @@ get_state_size(struct i830_hw_state *state)
if (dirty & I830_UPLOAD_INVARIENT)
sz += 40 * sizeof(int);
+ if (dirty & I830_UPLOAD_RASTER_RULES)
+ sz += sizeof(state->RasterRules);
+
if (dirty & I830_UPLOAD_CTX)
sz += sizeof(state->Ctx);
@@ -486,6 +480,11 @@ i830_emit_state(struct intel_context *intel)
i830_emit_invarient_state(intel);
}
+ if (dirty & I830_UPLOAD_RASTER_RULES) {
+ DBG("I830_UPLOAD_RASTER_RULES:\n");
+ emit(intel, state->RasterRules, sizeof(state->RasterRules));
+ }
+
if (dirty & I830_UPLOAD_CTX) {
DBG("I830_UPLOAD_CTX:\n");
emit(intel, state->Ctx, sizeof(state->Ctx));
@@ -737,6 +736,13 @@ i830_assert_not_dirty( struct intel_context *intel )
assert(!get_dirty(state));
}
+static void
+i830_invalidate_state(struct intel_context *intel, GLuint new_state)
+{
+ if (new_state & _NEW_LIGHT)
+ i830_update_provoking_vertex(&intel->ctx);
+}
+
void
i830InitVtbl(struct i830_context *i830)
{
@@ -752,4 +758,5 @@ i830InitVtbl(struct i830_context *i830)
i830->intel.vtbl.render_prevalidate = i830_render_prevalidate;
i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty;
i830->intel.vtbl.finish_batch = intel_finish_vb;
+ i830->intel.vtbl.invalidate_state = i830_invalidate_state;
}
diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c
index 367d2a3b648..bb08cf8d18f 100644
--- a/src/mesa/drivers/dri/i915/i915_context.c
+++ b/src/mesa/drivers/dri/i915/i915_context.c
@@ -75,6 +75,10 @@ i915InvalidateState(GLcontext * ctx, GLuint new_state)
if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))
i915_update_fog(ctx);
+ if (new_state & (_NEW_STENCIL | _NEW_BUFFERS | _NEW_POLYGON))
+ i915_update_stencil(ctx);
+ if (new_state & (_NEW_LIGHT))
+ i915_update_provoking_vertex(ctx);
}
diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h
index 87bbf5f9271..8de4a9d0d36 100644
--- a/src/mesa/drivers/dri/i915/i915_context.h
+++ b/src/mesa/drivers/dri/i915/i915_context.h
@@ -48,6 +48,7 @@
#define I915_UPLOAD_FOG 0x20
#define I915_UPLOAD_INVARIENT 0x40
#define I915_UPLOAD_DEFAULTS 0x80
+#define I915_UPLOAD_RASTER_RULES 0x100
#define I915_UPLOAD_TEX(i) (0x00010000<<(i))
#define I915_UPLOAD_TEX_ALL (0x00ff0000)
#define I915_UPLOAD_TEX_0_SHIFT 16
@@ -82,7 +83,9 @@
#define I915_CTXREG_IAB 6
#define I915_CTXREG_BLENDCOLOR0 7
#define I915_CTXREG_BLENDCOLOR1 8
-#define I915_CTX_SETUP_SIZE 9
+#define I915_CTXREG_BF_STENCIL_OPS 9
+#define I915_CTXREG_BF_STENCIL_MASKS 10
+#define I915_CTX_SETUP_SIZE 11
#define I915_FOGREG_COLOR 0
#define I915_FOGREG_MODE0 1
@@ -110,6 +113,10 @@
#define I915_DEFREG_Z1 5
#define I915_DEF_SETUP_SIZE 6
+enum {
+ I915_RASTER_RULES,
+ I915_RASTER_RULES_SETUP_SIZE,
+};
#define I915_MAX_CONSTANT 32
#define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT))
@@ -206,6 +213,7 @@ struct i915_hw_state
GLuint Stipple[I915_STP_SETUP_SIZE];
GLuint Fog[I915_FOG_SETUP_SIZE];
GLuint Defaults[I915_DEF_SETUP_SIZE];
+ GLuint RasterRules[I915_RASTER_RULES_SETUP_SIZE];
GLuint Tex[I915_TEX_UNITS][I915_TEX_SETUP_SIZE];
GLuint Constant[I915_CONSTANT_SIZE];
GLuint ConstantSize;
@@ -321,6 +329,8 @@ extern void i915_print_ureg(const char *msg, GLuint ureg);
extern void i915InitStateFunctions(struct dd_function_table *functions);
extern void i915InitState(struct i915_context *i915);
extern void i915_update_fog(GLcontext * ctx);
+extern void i915_update_stencil(GLcontext * ctx);
+extern void i915_update_provoking_vertex(GLcontext *ctx);
/*======================================================================
diff --git a/src/mesa/drivers/dri/i915/i915_reg.h b/src/mesa/drivers/dri/i915/i915_reg.h
index 84db58ea950..b5fa7fddb96 100644
--- a/src/mesa/drivers/dri/i915/i915_reg.h
+++ b/src/mesa/drivers/dri/i915/i915_reg.h
@@ -86,8 +86,10 @@
#define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16)
#define BFM_STENCIL_TEST_MASK_SHIFT 8
#define BFM_STENCIL_TEST_MASK_MASK (0xff<<8)
+#define BFM_STENCIL_TEST_MASK(x) (((x)&0xff) << 8)
#define BFM_STENCIL_WRITE_MASK_SHIFT 0
#define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0)
+#define BFM_STENCIL_WRITE_MASK(x) ((x)&0xff)
@@ -295,7 +297,9 @@
#define TEXKILL_4D (1<<9)
#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8)
#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5)
+#define LINE_STRIP_PROVOKE_VRTX_MASK (3 << 6)
#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6)
+#define TRI_FAN_PROVOKE_VRTX_MASK (3 << 3)
#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3)
/* _3DSTATE_SCISSOR_ENABLE, p256 */
diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c
index 814fb59fd34..b60efea75bd 100644
--- a/src/mesa/drivers/dri/i915/i915_state.c
+++ b/src/mesa/drivers/dri/i915/i915_state.c
@@ -48,73 +48,119 @@
#define FILE_DEBUG_FLAG DEBUG_STATE
-static void
-i915StencilFuncSeparate(GLcontext * ctx, GLenum face, GLenum func, GLint ref,
- GLuint mask)
+void
+i915_update_stencil(GLcontext * ctx)
{
struct i915_context *i915 = I915_CONTEXT(ctx);
- int test = intel_translate_compare_func(func);
+ GLuint front_ref, front_writemask, front_mask;
+ GLenum front_func, front_fail, front_pass_z_fail, front_pass_z_pass;
+ GLuint back_ref, back_writemask, back_mask;
+ GLenum back_func, back_fail, back_pass_z_fail, back_pass_z_pass;
- mask = mask & 0xff;
-
- DBG("%s : func: %s, ref : 0x%x, mask: 0x%x\n", __FUNCTION__,
- _mesa_lookup_enum_by_nr(func), ref, mask);
+ I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+ /* The 915 considers CW to be "front" for two-sided stencil, so choose
+ * appropriately.
+ */
+ /* _NEW_POLYGON | _NEW_STENCIL */
+ if (ctx->Polygon.FrontFace == GL_CW) {
+ front_ref = ctx->Stencil.Ref[0];
+ front_mask = ctx->Stencil.ValueMask[0];
+ front_writemask = ctx->Stencil.WriteMask[0];
+ front_func = ctx->Stencil.Function[0];
+ front_fail = ctx->Stencil.FailFunc[0];
+ front_pass_z_fail = ctx->Stencil.ZFailFunc[0];
+ front_pass_z_pass = ctx->Stencil.ZPassFunc[0];
+ back_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace];
+ back_mask = ctx->Stencil.ValueMask[ctx->Stencil._BackFace];
+ back_writemask = ctx->Stencil.WriteMask[ctx->Stencil._BackFace];
+ back_func = ctx->Stencil.Function[ctx->Stencil._BackFace];
+ back_fail = ctx->Stencil.FailFunc[ctx->Stencil._BackFace];
+ back_pass_z_fail = ctx->Stencil.ZFailFunc[ctx->Stencil._BackFace];
+ back_pass_z_pass = ctx->Stencil.ZPassFunc[ctx->Stencil._BackFace];
+ } else {
+ front_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace];
+ front_mask = ctx->Stencil.ValueMask[ctx->Stencil._BackFace];
+ front_writemask = ctx->Stencil.WriteMask[ctx->Stencil._BackFace];
+ front_func = ctx->Stencil.Function[ctx->Stencil._BackFace];
+ front_fail = ctx->Stencil.FailFunc[ctx->Stencil._BackFace];
+ front_pass_z_fail = ctx->Stencil.ZFailFunc[ctx->Stencil._BackFace];
+ front_pass_z_pass = ctx->Stencil.ZPassFunc[ctx->Stencil._BackFace];
+ back_ref = ctx->Stencil.Ref[0];
+ back_mask = ctx->Stencil.ValueMask[0];
+ back_writemask = ctx->Stencil.WriteMask[0];
+ back_func = ctx->Stencil.Function[0];
+ back_fail = ctx->Stencil.FailFunc[0];
+ back_pass_z_fail = ctx->Stencil.ZFailFunc[0];
+ back_pass_z_pass = ctx->Stencil.ZPassFunc[0];
+ }
- I915_STATECHANGE(i915, I915_UPLOAD_CTX);
- i915->state.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK;
+ /* Set front state. */
+ i915->state.Ctx[I915_CTXREG_STATE4] &= ~(MODE4_ENABLE_STENCIL_TEST_MASK |
+ MODE4_ENABLE_STENCIL_WRITE_MASK);
i915->state.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK |
- STENCIL_TEST_MASK(mask));
+ ENABLE_STENCIL_WRITE_MASK |
+ STENCIL_TEST_MASK(front_mask) |
+ STENCIL_WRITE_MASK(front_writemask));
i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_REF_MASK |
- S5_STENCIL_TEST_FUNC_MASK);
+ S5_STENCIL_TEST_FUNC_MASK |
+ S5_STENCIL_FAIL_MASK |
+ S5_STENCIL_PASS_Z_FAIL_MASK |
+ S5_STENCIL_PASS_Z_PASS_MASK);
+
+ i915->state.Ctx[I915_CTXREG_LIS5] |=
+ (front_ref << S5_STENCIL_REF_SHIFT) |
+ (intel_translate_compare_func(front_func) << S5_STENCIL_TEST_FUNC_SHIFT) |
+ (intel_translate_stencil_op(front_fail) << S5_STENCIL_FAIL_SHIFT) |
+ (intel_translate_stencil_op(front_pass_z_fail) <<
+ S5_STENCIL_PASS_Z_FAIL_SHIFT) |
+ (intel_translate_stencil_op(front_pass_z_pass) <<
+ S5_STENCIL_PASS_Z_PASS_SHIFT);
+
+ /* Set back state if different from front. */
+ if (ctx->Stencil._TestTwoSide) {
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] &=
+ ~(BFO_STENCIL_REF_MASK |
+ BFO_STENCIL_TEST_MASK |
+ BFO_STENCIL_FAIL_MASK |
+ BFO_STENCIL_PASS_Z_FAIL_MASK |
+ BFO_STENCIL_PASS_Z_PASS_MASK);
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] |= BFO_STENCIL_TWO_SIDE |
+ (back_ref << BFO_STENCIL_REF_SHIFT) |
+ (intel_translate_compare_func(back_func) << BFO_STENCIL_TEST_SHIFT) |
+ (intel_translate_stencil_op(back_fail) << BFO_STENCIL_FAIL_SHIFT) |
+ (intel_translate_stencil_op(back_pass_z_fail) <<
+ BFO_STENCIL_PASS_Z_FAIL_SHIFT) |
+ (intel_translate_stencil_op(back_pass_z_pass) <<
+ BFO_STENCIL_PASS_Z_PASS_SHIFT);
+
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] &=
+ ~(BFM_STENCIL_TEST_MASK_MASK |
+ BFM_STENCIL_WRITE_MASK_MASK);
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] |=
+ BFM_STENCIL_TEST_MASK(back_mask) |
+ BFM_STENCIL_WRITE_MASK(back_writemask);
+ } else {
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] &= ~BFO_STENCIL_TWO_SIDE;
+ }
+}
- i915->state.Ctx[I915_CTXREG_LIS5] |= ((ref << S5_STENCIL_REF_SHIFT) |
- (test <<
- S5_STENCIL_TEST_FUNC_SHIFT));
+static void
+i915StencilFuncSeparate(GLcontext * ctx, GLenum face, GLenum func, GLint ref,
+ GLuint mask)
+{
}
static void
i915StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask)
{
- struct i915_context *i915 = I915_CONTEXT(ctx);
-
- DBG("%s : mask 0x%x\n", __FUNCTION__, mask);
-
- mask = mask & 0xff;
-
- I915_STATECHANGE(i915, I915_UPLOAD_CTX);
- i915->state.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK;
- i915->state.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK |
- STENCIL_WRITE_MASK(mask));
}
-
static void
i915StencilOpSeparate(GLcontext * ctx, GLenum face, GLenum fail, GLenum zfail,
GLenum zpass)
{
- struct i915_context *i915 = I915_CONTEXT(ctx);
- int fop = intel_translate_stencil_op(fail);
- int dfop = intel_translate_stencil_op(zfail);
- int dpop = intel_translate_stencil_op(zpass);
-
-
- DBG("%s: fail : %s, zfail: %s, zpass : %s\n", __FUNCTION__,
- _mesa_lookup_enum_by_nr(fail),
- _mesa_lookup_enum_by_nr(zfail), _mesa_lookup_enum_by_nr(zpass));
-
- I915_STATECHANGE(i915, I915_UPLOAD_CTX);
-
- i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_FAIL_MASK |
- S5_STENCIL_PASS_Z_FAIL_MASK |
- S5_STENCIL_PASS_Z_PASS_MASK);
-
- i915->state.Ctx[I915_CTXREG_LIS5] |= ((fop << S5_STENCIL_FAIL_SHIFT) |
- (dfop <<
- S5_STENCIL_PASS_Z_FAIL_SHIFT) |
- (dpop <<
- S5_STENCIL_PASS_Z_PASS_SHIFT));
}
static void
@@ -945,6 +991,17 @@ i915_init_packets(struct i915_context *i915)
_3DSTATE_CONST_BLEND_COLOR_CMD;
i915->state.Ctx[I915_CTXREG_BLENDCOLOR1] = 0;
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] =
+ _3DSTATE_BACKFACE_STENCIL_MASKS |
+ BFM_ENABLE_STENCIL_TEST_MASK |
+ BFM_ENABLE_STENCIL_WRITE_MASK |
+ (0xff << BFM_STENCIL_WRITE_MASK_SHIFT) |
+ (0xff << BFM_STENCIL_TEST_MASK_SHIFT);
+ i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] =
+ _3DSTATE_BACKFACE_STENCIL_OPS |
+ BFO_ENABLE_STENCIL_REF |
+ BFO_ENABLE_STENCIL_FUNCS |
+ BFO_ENABLE_STENCIL_TWO_SIDE;
}
{
@@ -976,6 +1033,13 @@ i915_init_packets(struct i915_context *i915)
i915->state.Buffer[I915_DESTREG_SR2] = 0;
}
+ i915->state.RasterRules[I915_RASTER_RULES] = _3DSTATE_RASTER_RULES_CMD |
+ ENABLE_POINT_RASTER_RULE |
+ OGL_POINT_RASTER_RULE |
+ ENABLE_LINE_STRIP_PROVOKE_VRTX |
+ ENABLE_TRI_FAN_PROVOKE_VRTX |
+ LINE_STRIP_PROVOKE_VRTX(1) |
+ TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D;
#if 0
{
@@ -996,7 +1060,33 @@ i915_init_packets(struct i915_context *i915)
i915->state.active = (I915_UPLOAD_PROGRAM |
I915_UPLOAD_STIPPLE |
I915_UPLOAD_CTX |
- I915_UPLOAD_BUFFERS | I915_UPLOAD_INVARIENT);
+ I915_UPLOAD_BUFFERS |
+ I915_UPLOAD_INVARIENT |
+ I915_UPLOAD_RASTER_RULES);
+}
+
+void
+i915_update_provoking_vertex(GLcontext * ctx)
+{
+ struct i915_context *i915 = I915_CONTEXT(ctx);
+
+ I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+ i915->state.Ctx[I915_CTXREG_LIS6] &= ~(S6_TRISTRIP_PV_MASK);
+
+ I915_STATECHANGE(i915, I915_UPLOAD_RASTER_RULES);
+ i915->state.RasterRules[I915_RASTER_RULES] &= ~(LINE_STRIP_PROVOKE_VRTX_MASK |
+ TRI_FAN_PROVOKE_VRTX_MASK);
+
+ /* _NEW_LIGHT */
+ if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) {
+ i915->state.RasterRules[I915_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(1) |
+ TRI_FAN_PROVOKE_VRTX(2));
+ i915->state.Ctx[I915_CTXREG_LIS6] |= (2 << S6_TRISTRIP_PV_SHIFT);
+ } else {
+ i915->state.RasterRules[I915_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(0) |
+ TRI_FAN_PROVOKE_VRTX(1));
+ i915->state.Ctx[I915_CTXREG_LIS6] |= (0 << S6_TRISTRIP_PV_SHIFT);
+ }
}
void
diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c
index fe1be93a6d0..9a723d3cd73 100644
--- a/src/mesa/drivers/dri/i915/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i915_vtbl.c
@@ -176,7 +176,7 @@ i915_emit_invarient_state(struct intel_context *intel)
{
BATCH_LOCALS;
- BEGIN_BATCH(20, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(17, IGNORE_CLIPRECTS);
OUT_BATCH(_3DSTATE_AA_CMD |
AA_LINE_ECAAR_WIDTH_ENABLE |
@@ -200,14 +200,6 @@ i915_emit_invarient_state(struct intel_context *intel)
CSB_TCB(3, 3) |
CSB_TCB(4, 4) | CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7));
- OUT_BATCH(_3DSTATE_RASTER_RULES_CMD |
- ENABLE_POINT_RASTER_RULE |
- OGL_POINT_RASTER_RULE |
- ENABLE_LINE_STRIP_PROVOKE_VRTX |
- ENABLE_TRI_FAN_PROVOKE_VRTX |
- LINE_STRIP_PROVOKE_VRTX(1) |
- TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D);
-
/* Need to initialize this to zero.
*/
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0));
@@ -225,11 +217,6 @@ i915_emit_invarient_state(struct intel_context *intel)
OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); /* disable indirect state */
OUT_BATCH(0);
-
- /* Don't support twosided stencil yet */
- OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0);
- OUT_BATCH(0);
-
ADVANCE_BATCH();
}
@@ -263,6 +250,9 @@ get_state_size(struct i915_hw_state *state)
if (dirty & I915_UPLOAD_INVARIENT)
sz += 30 * 4;
+ if (dirty & I915_UPLOAD_RASTER_RULES)
+ sz += sizeof(state->RasterRules);
+
if (dirty & I915_UPLOAD_CTX)
sz += sizeof(state->Ctx);
@@ -371,6 +361,12 @@ i915_emit_state(struct intel_context *intel)
i915_emit_invarient_state(intel);
}
+ if (dirty & I915_UPLOAD_RASTER_RULES) {
+ if (INTEL_DEBUG & DEBUG_STATE)
+ fprintf(stderr, "I915_UPLOAD_RASTER_RULES:\n");
+ emit(intel, state->RasterRules, sizeof(state->RasterRules));
+ }
+
if (dirty & I915_UPLOAD_CTX) {
if (INTEL_DEBUG & DEBUG_STATE)
fprintf(stderr, "I915_UPLOAD_CTX:\n");
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 00a42111da0..128afb56866 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -43,6 +43,7 @@ DRIVER_SOURCES = \
brw_clip_util.c \
brw_context.c \
brw_curbe.c \
+ brw_disasm.c \
brw_draw.c \
brw_draw_upload.c \
brw_eu.c \
diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c
index 54d30a3f422..20a927cf386 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.c
+++ b/src/mesa/drivers/dri/i965/brw_clip.c
@@ -166,7 +166,8 @@ static void upload_clip_prog(struct brw_context *brw)
/* _NEW_POLYGON */
if (key.primitive == GL_TRIANGLES) {
- if (ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
+ if (ctx->Polygon.CullFlag &&
+ ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
key.clip_mode = BRW_CLIPMODE_REJECT_ALL;
else {
GLuint fill_front = CLIP_CULL;
diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h
index 12e8548df1f..957df441ab0 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.h
+++ b/src/mesa/drivers/dri/i965/brw_clip.h
@@ -100,6 +100,8 @@ struct brw_clip_compile {
struct brw_reg fixed_planes;
struct brw_reg plane_equation;
+
+ struct brw_reg ff_sync;
} reg;
/* 3 different ways of expressing vertex size:
@@ -173,4 +175,5 @@ struct brw_reg get_tmp( struct brw_clip_compile *c );
void brw_clip_project_position(struct brw_clip_compile *c,
struct brw_reg pos );
void brw_clip_ff_sync(struct brw_clip_compile *c);
+void brw_clip_init_ff_sync(struct brw_clip_compile *c);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c
index 9abd0642aa2..048ca620fab 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_line.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_line.c
@@ -85,6 +85,10 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
i++;
}
+ if (c->need_ff_sync) {
+ c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+ i++;
+ }
c->first_tmp = i;
c->last_tmp = i;
@@ -246,8 +250,6 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1);
brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
- if (c->need_ff_sync)
- brw_clip_ff_sync(c);
not_culled = brw_IF(p, BRW_EXECUTE_1);
{
brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, GL_FALSE);
@@ -265,6 +267,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
void brw_emit_line_clip( struct brw_clip_compile *c )
{
brw_clip_line_alloc_regs(c);
+ brw_clip_init_ff_sync(c);
if (c->key.do_flat_shading)
brw_clip_copy_colors(c, 0, 1);
diff --git a/src/mesa/drivers/dri/i965/brw_clip_point.c b/src/mesa/drivers/dri/i965/brw_clip_point.c
index 97382991686..8458f61c5a0 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_point.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_point.c
@@ -50,7 +50,7 @@ void brw_emit_point_clip( struct brw_clip_compile *c )
/* Send an empty message to kill the thread:
*/
brw_clip_tri_alloc_regs(c, 0);
- if (c->need_ff_sync)
- brw_clip_ff_sync(c);
+ brw_clip_init_ff_sync(c);
+
brw_clip_kill_thread(c);
}
diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c
index 4c2d655fb10..0efd77225e2 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_tri.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c
@@ -119,6 +119,11 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
i++;
}
+ if (c->need_ff_sync) {
+ c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+ i++;
+ }
+
c->first_tmp = i;
c->last_tmp = i;
@@ -563,6 +568,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
brw_clip_tri_init_vertices(c);
brw_clip_init_clipmask(c);
+ brw_clip_init_ff_sync(c);
/* if -ve rhw workaround bit is set,
do cliptest */
@@ -589,8 +595,6 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
else
maybe_do_clip_tri(c);
- if (c->need_ff_sync)
- brw_clip_ff_sync(c);
brw_clip_tri_emit_polygon(c);
/* Send an empty message to kill the thread:
diff --git a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
index 26950383c1b..ad1bfa435fb 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
@@ -453,6 +453,7 @@ void brw_emit_unfilled_clip( struct brw_clip_compile *c )
brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
brw_clip_tri_init_vertices(c);
+ brw_clip_init_ff_sync(c);
assert(c->offset[VERT_RESULT_EDGE]);
@@ -496,8 +497,6 @@ void brw_emit_unfilled_clip( struct brw_clip_compile *c )
}
brw_ENDIF(p, do_clip);
- if (c->need_ff_sync)
- brw_clip_ff_sync(c);
emit_unfilled_primitives(c);
brw_clip_kill_thread(c);
}
diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c
index e09efc07ed4..5a73abdfee9 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_util.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_util.c
@@ -213,6 +213,8 @@ void brw_clip_emit_vue(struct brw_clip_compile *c,
struct brw_compile *p = &c->func;
GLuint start = c->last_mrf;
+ brw_clip_ff_sync(c);
+
assert(!(allocate && eot));
/* Cycle through mrf regs - probably futile as we have to wait for
@@ -263,6 +265,7 @@ void brw_clip_kill_thread(struct brw_clip_compile *c)
{
struct brw_compile *p = &c->func;
+ brw_clip_ff_sync(c);
/* Send an empty message to kill the thread and release any
* allocated urb entry:
*/
@@ -356,17 +359,38 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c )
void brw_clip_ff_sync(struct brw_clip_compile *c)
{
+ if (c->need_ff_sync) {
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *need_ff_sync;
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
+ need_ff_sync = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
+ brw_ff_sync(p,
+ c->reg.R0,
+ 0,
+ c->reg.R0,
+ 1,
+ 1, /* used */
+ 1, /* msg length */
+ 1, /* response length */
+ 0, /* eot */
+ 1, /* write compelete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+ }
+ brw_ENDIF(p, need_ff_sync);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+}
+
+void brw_clip_init_ff_sync(struct brw_clip_compile *c)
+{
+ if (c->need_ff_sync) {
struct brw_compile *p = &c->func;
- brw_ff_sync(p,
- c->reg.R0,
- 0,
- c->reg.R0,
- 1,
- 1, /* used */
- 1, /* msg length */
- 1, /* response length */
- 0, /* eot */
- 1, /* write compelete */
- 0, /* urb offset */
- BRW_URB_SWIZZLE_NONE);
+
+ brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
+ }
}
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 57ddf75413a..847c44ed83a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -143,6 +143,7 @@ struct brw_context;
#define BRW_NEW_DEPTH_BUFFER 0x20000
#define BRW_NEW_NR_WM_SURFACES 0x40000
#define BRW_NEW_NR_VS_SURFACES 0x80000
+#define BRW_NEW_INDEX_BUFFER 0x100000
struct brw_state_flags {
/** State update flags signalled by mesa internals */
@@ -438,9 +439,13 @@ struct brw_query_object {
unsigned int count;
};
+
+/**
+ * brw_context is derived from intel_context.
+ */
struct brw_context
{
- struct intel_context intel;
+ struct intel_context intel; /**< base class, must be first field */
GLuint primitive;
GLboolean emit_state_always;
@@ -475,6 +480,9 @@ struct brw_context
struct {
struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
+ struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
+ GLuint nr_enabled;
+
#define BRW_NR_UPLOAD_BUFS 17
#define BRW_UPLOAD_INIT_SIZE (128*1024)
@@ -498,8 +506,15 @@ struct brw_context
*/
const struct _mesa_index_buffer *ib;
+ /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */
dri_bo *bo;
unsigned int offset;
+ unsigned int size;
+ /* Offset to index buffer index to use in CMD_3D_PRIM so that we can
+ * avoid re-uploading the IB packet over and over if we're actually
+ * referencing the same index buffer.
+ */
+ unsigned int start_vertex_offset;
} ib;
/* Active vertex program:
@@ -706,6 +721,8 @@ void brw_upload_urb_fence(struct brw_context *brw);
*/
void brw_upload_cs_urb_state(struct brw_context *brw);
+/* brw_disasm.c */
+int brw_disasm (FILE *file, struct brw_instruction *inst);
/*======================================================================
* Inline conversion functions. These are better-typed than the
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index d166250b4fe..78d457ad2be 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -471,8 +471,9 @@
#define BRW_CONDITIONAL_GE 4
#define BRW_CONDITIONAL_L 5
#define BRW_CONDITIONAL_LE 6
-#define BRW_CONDITIONAL_C 7
+#define BRW_CONDITIONAL_R 7
#define BRW_CONDITIONAL_O 8
+#define BRW_CONDITIONAL_U 9
#define BRW_DEBUG_NONE 0
#define BRW_DEBUG_BREAKPOINT 1
@@ -512,6 +513,7 @@
#define BRW_OPCODE_RSL 11
#define BRW_OPCODE_ASR 12
#define BRW_OPCODE_CMP 16
+#define BRW_OPCODE_CMPN 17
#define BRW_OPCODE_JMPI 32
#define BRW_OPCODE_IF 34
#define BRW_OPCODE_IFF 35
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
new file mode 100644
index 00000000000..9fef230507f
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -0,0 +1,903 @@
+/*
+ * Copyright © 2008 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no representations
+ * about the suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <stdarg.h>
+
+#include "main/mtypes.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+
+struct {
+ char *name;
+ int nsrc;
+ int ndst;
+} opcode[128] = {
+ [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
+
+ [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 },
+ [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
+};
+
+char *conditional_modifier[16] = {
+ [BRW_CONDITIONAL_NONE] = "",
+ [BRW_CONDITIONAL_Z] = ".e",
+ [BRW_CONDITIONAL_NZ] = ".ne",
+ [BRW_CONDITIONAL_G] = ".g",
+ [BRW_CONDITIONAL_GE] = ".ge",
+ [BRW_CONDITIONAL_L] = ".l",
+ [BRW_CONDITIONAL_LE] = ".le",
+ [BRW_CONDITIONAL_R] = ".r",
+ [BRW_CONDITIONAL_O] = ".o",
+ [BRW_CONDITIONAL_U] = ".u",
+};
+
+char *negate[2] = {
+ [0] = "",
+ [1] = "-",
+};
+
+char *_abs[2] = {
+ [0] = "",
+ [1] = "(abs)",
+};
+
+char *vert_stride[16] = {
+ [0] = "0",
+ [1] = "1",
+ [2] = "2",
+ [3] = "4",
+ [4] = "8",
+ [5] = "16",
+ [6] = "32",
+ [15] = "VxH",
+};
+
+char *width[8] = {
+ [0] = "1",
+ [1] = "2",
+ [2] = "4",
+ [3] = "8",
+ [4] = "16",
+};
+
+char *horiz_stride[4] = {
+ [0] = "0",
+ [1] = "1",
+ [2] = "2",
+ [3] = "4"
+};
+
+char *chan_sel[4] = {
+ [0] = "x",
+ [1] = "y",
+ [2] = "z",
+ [3] = "w",
+};
+
+char *dest_condmod[16] = {
+};
+
+char *debug_ctrl[2] = {
+ [0] = "",
+ [1] = ".breakpoint"
+};
+
+char *saturate[2] = {
+ [0] = "",
+ [1] = ".sat"
+};
+
+char *exec_size[8] = {
+ [0] = "1",
+ [1] = "2",
+ [2] = "4",
+ [3] = "8",
+ [4] = "16",
+ [5] = "32"
+};
+
+char *pred_inv[2] = {
+ [0] = "+",
+ [1] = "-"
+};
+
+char *pred_ctrl_align16[16] = {
+ [1] = "",
+ [2] = ".x",
+ [3] = ".y",
+ [4] = ".z",
+ [5] = ".w",
+ [6] = ".any4h",
+ [7] = ".all4h",
+};
+
+char *pred_ctrl_align1[16] = {
+ [1] = "",
+ [2] = ".anyv",
+ [3] = ".allv",
+ [4] = ".any2h",
+ [5] = ".all2h",
+ [6] = ".any4h",
+ [7] = ".all4h",
+ [8] = ".any8h",
+ [9] = ".all8h",
+ [10] = ".any16h",
+ [11] = ".all16h",
+};
+
+char *thread_ctrl[4] = {
+ [0] = "",
+ [2] = "switch"
+};
+
+char *compr_ctrl[4] = {
+ [0] = "",
+ [1] = "sechalf",
+ [2] = "compr",
+};
+
+char *dep_ctrl[4] = {
+ [0] = "",
+ [1] = "NoDDClr",
+ [2] = "NoDDChk",
+ [3] = "NoDDClr,NoDDChk",
+};
+
+char *mask_ctrl[4] = {
+ [0] = "",
+ [1] = "nomask",
+};
+
+char *access_mode[2] = {
+ [0] = "align1",
+ [1] = "align16",
+};
+
+char *reg_encoding[8] = {
+ [0] = "UD",
+ [1] = "D",
+ [2] = "UW",
+ [3] = "W",
+ [4] = "UB",
+ [5] = "B",
+ [7] = "F"
+};
+
+char *imm_encoding[8] = {
+ [0] = "UD",
+ [1] = "D",
+ [2] = "UW",
+ [3] = "W",
+ [5] = "VF",
+ [5] = "V",
+ [7] = "F"
+};
+
+char *reg_file[4] = {
+ [0] = "A",
+ [1] = "g",
+ [2] = "m",
+ [3] = "imm",
+};
+
+char *writemask[16] = {
+ [0x0] = ".",
+ [0x1] = ".x",
+ [0x2] = ".y",
+ [0x3] = ".xy",
+ [0x4] = ".z",
+ [0x5] = ".xz",
+ [0x6] = ".yz",
+ [0x7] = ".xyz",
+ [0x8] = ".w",
+ [0x9] = ".xw",
+ [0xa] = ".yw",
+ [0xb] = ".xyw",
+ [0xc] = ".zw",
+ [0xd] = ".xzw",
+ [0xe] = ".yzw",
+ [0xf] = "",
+};
+
+char *end_of_thread[2] = {
+ [0] = "",
+ [1] = "EOT"
+};
+
+char *target_function[16] = {
+ [BRW_MESSAGE_TARGET_NULL] = "null",
+ [BRW_MESSAGE_TARGET_MATH] = "math",
+ [BRW_MESSAGE_TARGET_SAMPLER] = "sampler",
+ [BRW_MESSAGE_TARGET_GATEWAY] = "gateway",
+ [BRW_MESSAGE_TARGET_DATAPORT_READ] = "read",
+ [BRW_MESSAGE_TARGET_DATAPORT_WRITE] = "write",
+ [BRW_MESSAGE_TARGET_URB] = "urb",
+ [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner"
+};
+
+char *math_function[16] = {
+ [BRW_MATH_FUNCTION_INV] = "inv",
+ [BRW_MATH_FUNCTION_LOG] = "log",
+ [BRW_MATH_FUNCTION_EXP] = "exp",
+ [BRW_MATH_FUNCTION_SQRT] = "sqrt",
+ [BRW_MATH_FUNCTION_RSQ] = "rsq",
+ [BRW_MATH_FUNCTION_SIN] = "sin",
+ [BRW_MATH_FUNCTION_COS] = "cos",
+ [BRW_MATH_FUNCTION_SINCOS] = "sincos",
+ [BRW_MATH_FUNCTION_TAN] = "tan",
+ [BRW_MATH_FUNCTION_POW] = "pow",
+ [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod",
+ [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intmod",
+ [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intdiv",
+};
+
+char *math_saturate[2] = {
+ [0] = "",
+ [1] = "sat"
+};
+
+char *math_signed[2] = {
+ [0] = "",
+ [1] = "signed"
+};
+
+char *math_scalar[2] = {
+ [0] = "",
+ [1] = "scalar"
+};
+
+char *math_precision[2] = {
+ [0] = "",
+ [1] = "partial_precision"
+};
+
+char *urb_swizzle[4] = {
+ [BRW_URB_SWIZZLE_NONE] = "",
+ [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
+ [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose",
+};
+
+char *urb_allocate[2] = {
+ [0] = "",
+ [1] = "allocate"
+};
+
+char *urb_used[2] = {
+ [0] = "",
+ [1] = "used"
+};
+
+char *urb_complete[2] = {
+ [0] = "",
+ [1] = "complete"
+};
+
+char *sampler_target_format[4] = {
+ [0] = "F",
+ [2] = "UD",
+ [3] = "D"
+};
+
+
+static int column;
+
+static int string (FILE *file, char *string)
+{
+ fputs (string, file);
+ column += strlen (string);
+ return 0;
+}
+
+static int format (FILE *f, char *format, ...)
+{
+ char buf[1024];
+ va_list args;
+ va_start (args, format);
+
+ vsnprintf (buf, sizeof (buf) - 1, format, args);
+ string (f, buf);
+ return 0;
+}
+
+static int newline (FILE *f)
+{
+ putc ('\n', f);
+ column = 0;
+ return 0;
+}
+
+static int pad (FILE *f, int c)
+{
+ do
+ string (f, " ");
+ while (column < c);
+ return 0;
+}
+
+static int control (FILE *file, char *name, char *ctrl[], GLuint id, int *space)
+{
+ if (!ctrl[id]) {
+ fprintf (file, "*** invalid %s value %d ",
+ name, id);
+ return 1;
+ }
+ if (ctrl[id][0])
+ {
+ if (space && *space)
+ string (file, " ");
+ string (file, ctrl[id]);
+ if (space)
+ *space = 1;
+ }
+ return 0;
+}
+
+static int print_opcode (FILE *file, int id)
+{
+ if (!opcode[id].name) {
+ format (file, "*** invalid opcode value %d ", id);
+ return 1;
+ }
+ string (file, opcode[id].name);
+ return 0;
+}
+
+static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
+{
+ int err = 0;
+ if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
+ switch (_reg_nr & 0xf0) {
+ case BRW_ARF_NULL:
+ string (file, "null");
+ return -1;
+ case BRW_ARF_ADDRESS:
+ format (file, "a%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_ACCUMULATOR:
+ format (file, "acc%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_MASK:
+ format (file, "mask%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_MASK_STACK:
+ format (file, "msd%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_STATE:
+ format (file, "sr%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_CONTROL:
+ format (file, "cr%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_NOTIFICATION_COUNT:
+ format (file, "n%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_IP:
+ string (file, "ip");
+ return -1;
+ break;
+ default:
+ format (file, "ARF%d", _reg_nr);
+ break;
+ }
+ } else {
+ err |= control (file, "src reg file", reg_file, _reg_file, NULL);
+ format (file, "%d", _reg_nr);
+ }
+ return err;
+}
+
+static int dest (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+
+ if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da1.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da1.dest_subreg_nr);
+ format (file, "<%d>", inst->bits1.da1.dest_horiz_stride);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
+ }
+ else
+ {
+ string (file, "g[a0");
+ if (inst->bits1.ia1.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.ia1.dest_subreg_nr);
+ if (inst->bits1.ia1.dest_indirect_offset)
+ format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
+ string (file, "]");
+ format (file, "<%d>", inst->bits1.ia1.dest_horiz_stride);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL);
+ }
+ }
+ else
+ {
+ if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da16.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da16.dest_subreg_nr);
+ string (file, "<1>");
+ err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
+ }
+ else
+ {
+ err = 1;
+ string (file, "Indirect align16 address mode not supported");
+ }
+ }
+
+ return 0;
+}
+
+static int src_align1_region (FILE *file,
+ GLuint _vert_stride, GLuint _width, GLuint _horiz_stride)
+{
+ int err = 0;
+ string (file, "<");
+ err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+ string (file, ",");
+ err |= control (file, "width", width, _width, NULL);
+ string (file, ",");
+ err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL);
+ string (file, ">");
+ return err;
+}
+
+static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
+ GLuint _vert_stride, GLuint _width, GLuint _horiz_stride,
+ GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ err |= reg (file, _reg_file, reg_num);
+ if (err == -1)
+ return 0;
+ if (sub_reg_num)
+ format (file, ".%d", sub_reg_num);
+ src_align1_region (file, _vert_stride, _width, _horiz_stride);
+ err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+ return err;
+}
+
+static int src_ia1 (FILE *file,
+ GLuint type,
+ GLuint _reg_file,
+ GLint _addr_imm,
+ GLuint _addr_subreg_nr,
+ GLuint _negate,
+ GLuint __abs,
+ GLuint _addr_mode,
+ GLuint _horiz_stride,
+ GLuint _width,
+ GLuint _vert_stride)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ string (file, "g[a0");
+ if (_addr_subreg_nr)
+ format (file, ".%d", _addr_subreg_nr);
+ if (_addr_imm)
+ format (file, " %d", _addr_imm);
+ string (file, "]");
+ src_align1_region (file, _vert_stride, _width, _horiz_stride);
+ err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+ return err;
+}
+
+static int src_da16 (FILE *file,
+ GLuint _reg_type,
+ GLuint _reg_file,
+ GLuint _vert_stride,
+ GLuint _reg_nr,
+ GLuint _subreg_nr,
+ GLuint __abs,
+ GLuint _negate,
+ GLuint swz_x,
+ GLuint swz_y,
+ GLuint swz_z,
+ GLuint swz_w)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ err |= reg (file, _reg_file, _reg_nr);
+ if (err == -1)
+ return 0;
+ if (_subreg_nr)
+ format (file, ".%d", _subreg_nr);
+ string (file, "<");
+ err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+ string (file, ",1,1>");
+ err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == BRW_CHANNEL_X &&
+ swz_y == BRW_CHANNEL_Y &&
+ swz_z == BRW_CHANNEL_Z &&
+ swz_w == BRW_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ return err;
+}
+
+
+static int imm (FILE *file, GLuint type, struct brw_instruction *inst) {
+ switch (type) {
+ case BRW_REGISTER_TYPE_UD:
+ format (file, "0x%08xUD", inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_D:
+ format (file, "%dD", inst->bits3.d);
+ break;
+ case BRW_REGISTER_TYPE_UW:
+ format (file, "0x%04xUW", (uint16_t) inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_W:
+ format (file, "%dW", (int16_t) inst->bits3.d);
+ break;
+ case BRW_REGISTER_TYPE_UB:
+ format (file, "0x%02xUB", (int8_t) inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_VF:
+ format (file, "Vector Float");
+ break;
+ case BRW_REGISTER_TYPE_V:
+ format (file, "0x%08xV", inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_F:
+ format (file, "%-gF", inst->bits3.f);
+ }
+ return 0;
+}
+
+static int src0 (FILE *file, struct brw_instruction *inst)
+{
+ if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE)
+ return imm (file, inst->bits1.da1.src0_reg_type,
+ inst);
+ else if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da1 (file,
+ inst->bits1.da1.src0_reg_type,
+ inst->bits1.da1.src0_reg_file,
+ inst->bits2.da1.src0_vert_stride,
+ inst->bits2.da1.src0_width,
+ inst->bits2.da1.src0_horiz_stride,
+ inst->bits2.da1.src0_reg_nr,
+ inst->bits2.da1.src0_subreg_nr,
+ inst->bits2.da1.src0_abs,
+ inst->bits2.da1.src0_negate);
+ }
+ else
+ {
+ return src_ia1 (file,
+ inst->bits1.ia1.src0_reg_type,
+ inst->bits1.ia1.src0_reg_file,
+ inst->bits2.ia1.src0_indirect_offset,
+ inst->bits2.ia1.src0_subreg_nr,
+ inst->bits2.ia1.src0_negate,
+ inst->bits2.ia1.src0_abs,
+ inst->bits2.ia1.src0_address_mode,
+ inst->bits2.ia1.src0_horiz_stride,
+ inst->bits2.ia1.src0_width,
+ inst->bits2.ia1.src0_vert_stride);
+ }
+ }
+ else
+ {
+ if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da16 (file,
+ inst->bits1.da16.src0_reg_type,
+ inst->bits1.da16.src0_reg_file,
+ inst->bits2.da16.src0_vert_stride,
+ inst->bits2.da16.src0_reg_nr,
+ inst->bits2.da16.src0_subreg_nr,
+ inst->bits2.da16.src0_abs,
+ inst->bits2.da16.src0_negate,
+ inst->bits2.da16.src0_swz_x,
+ inst->bits2.da16.src0_swz_y,
+ inst->bits2.da16.src0_swz_z,
+ inst->bits2.da16.src0_swz_w);
+ }
+ else
+ {
+ string (file, "Indirect align16 address mode not supported");
+ return 1;
+ }
+ }
+}
+
+static int src1 (FILE *file, struct brw_instruction *inst)
+{
+ if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
+ return imm (file, inst->bits1.da1.src1_reg_type,
+ inst);
+ else if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da1 (file,
+ inst->bits1.da1.src1_reg_type,
+ inst->bits1.da1.src1_reg_file,
+ inst->bits3.da1.src1_vert_stride,
+ inst->bits3.da1.src1_width,
+ inst->bits3.da1.src1_horiz_stride,
+ inst->bits3.da1.src1_reg_nr,
+ inst->bits3.da1.src1_subreg_nr,
+ inst->bits3.da1.src1_abs,
+ inst->bits3.da1.src1_negate);
+ }
+ else
+ {
+ return src_ia1 (file,
+ inst->bits1.ia1.src1_reg_type,
+ inst->bits1.ia1.src1_reg_file,
+ inst->bits3.ia1.src1_indirect_offset,
+ inst->bits3.ia1.src1_subreg_nr,
+ inst->bits3.ia1.src1_negate,
+ inst->bits3.ia1.src1_abs,
+ inst->bits3.ia1.src1_address_mode,
+ inst->bits3.ia1.src1_horiz_stride,
+ inst->bits3.ia1.src1_width,
+ inst->bits3.ia1.src1_vert_stride);
+ }
+ }
+ else
+ {
+ if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da16 (file,
+ inst->bits1.da16.src1_reg_type,
+ inst->bits1.da16.src1_reg_file,
+ inst->bits3.da16.src1_vert_stride,
+ inst->bits3.da16.src1_reg_nr,
+ inst->bits3.da16.src1_subreg_nr,
+ inst->bits3.da16.src1_abs,
+ inst->bits3.da16.src1_negate,
+ inst->bits3.da16.src1_swz_x,
+ inst->bits3.da16.src1_swz_y,
+ inst->bits3.da16.src1_swz_z,
+ inst->bits3.da16.src1_swz_w);
+ }
+ else
+ {
+ string (file, "Indirect align16 address mode not supported");
+ return 1;
+ }
+ }
+}
+
+int brw_disasm (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+ int space = 0;
+
+ if (inst->header.predicate_control) {
+ string (file, "(");
+ err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL);
+ string (file, "f0");
+ if (inst->bits2.da1.flag_reg_nr)
+ format (file, ".%d", inst->bits2.da1.flag_reg_nr);
+ if (inst->header.access_mode == BRW_ALIGN_1)
+ err |= control (file, "predicate control align1", pred_ctrl_align1,
+ inst->header.predicate_control, NULL);
+ else
+ err |= control (file, "predicate control align16", pred_ctrl_align16,
+ inst->header.predicate_control, NULL);
+ string (file, ") ");
+ }
+
+ err |= print_opcode (file, inst->header.opcode);
+ err |= control (file, "saturate", saturate, inst->header.saturate, NULL);
+ err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL);
+
+ if (inst->header.opcode != BRW_OPCODE_SEND)
+ err |= control (file, "conditional modifier", conditional_modifier,
+ inst->header.destreg__conditionalmod, NULL);
+
+ if (inst->header.opcode != BRW_OPCODE_NOP) {
+ string (file, "(");
+ err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL);
+ string (file, ")");
+ }
+
+ if (inst->header.opcode == BRW_OPCODE_SEND)
+ format (file, " %d", inst->header.destreg__conditionalmod);
+
+ if (opcode[inst->header.opcode].ndst > 0) {
+ pad (file, 16);
+ err |= dest (file, inst);
+ }
+ if (opcode[inst->header.opcode].nsrc > 0) {
+ pad (file, 32);
+ err |= src0 (file, inst);
+ }
+ if (opcode[inst->header.opcode].nsrc > 1) {
+ pad (file, 48);
+ err |= src1 (file, inst);
+ }
+
+ if (inst->header.opcode == BRW_OPCODE_SEND) {
+ newline (file);
+ pad (file, 16);
+ space = 0;
+ err |= control (file, "target function", target_function,
+ inst->bits3.generic.msg_target, &space);
+ switch (inst->bits3.generic.msg_target) {
+ case BRW_MESSAGE_TARGET_MATH:
+ err |= control (file, "math function", math_function,
+ inst->bits3.math.function, &space);
+ err |= control (file, "math saturate", math_saturate,
+ inst->bits3.math.saturate, &space);
+ err |= control (file, "math signed", math_signed,
+ inst->bits3.math.int_type, &space);
+ err |= control (file, "math scalar", math_scalar,
+ inst->bits3.math.data_type, &space);
+ err |= control (file, "math precision", math_precision,
+ inst->bits3.math.precision, &space);
+ break;
+ case BRW_MESSAGE_TARGET_SAMPLER:
+ format (file, " (%d, %d, ",
+ inst->bits3.sampler.binding_table_index,
+ inst->bits3.sampler.sampler);
+ err |= control (file, "sampler target format", sampler_target_format,
+ inst->bits3.sampler.return_format, NULL);
+ string (file, ")");
+ break;
+ case BRW_MESSAGE_TARGET_DATAPORT_WRITE:
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.dp_write.binding_table_index,
+ (inst->bits3.dp_write.pixel_scoreboard_clear << 3) |
+ inst->bits3.dp_write.msg_control,
+ inst->bits3.dp_write.msg_type,
+ inst->bits3.dp_write.send_commit_msg);
+ break;
+ case BRW_MESSAGE_TARGET_URB:
+ format (file, " %d", inst->bits3.urb.offset);
+ space = 1;
+ err |= control (file, "urb swizzle", urb_swizzle,
+ inst->bits3.urb.swizzle_control, &space);
+ err |= control (file, "urb allocate", urb_allocate,
+ inst->bits3.urb.allocate, &space);
+ err |= control (file, "urb used", urb_used,
+ inst->bits3.urb.used, &space);
+ err |= control (file, "urb complete", urb_complete,
+ inst->bits3.urb.complete, &space);
+ break;
+ case BRW_MESSAGE_TARGET_THREAD_SPAWNER:
+ break;
+ default:
+ format (file, "unsupported target %d", inst->bits3.generic.msg_target);
+ break;
+ }
+ if (space)
+ string (file, " ");
+ format (file, "mlen %d",
+ inst->bits3.generic.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic.response_length);
+ }
+ pad (file, 64);
+ if (inst->header.opcode != BRW_OPCODE_NOP) {
+ string (file, "{");
+ space = 1;
+ err |= control(file, "access mode", access_mode, inst->header.access_mode, &space);
+ err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
+ err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
+ err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space);
+ err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
+ if (inst->header.opcode == BRW_OPCODE_SEND)
+ err |= control (file, "end of thread", end_of_thread,
+ inst->bits3.generic.end_of_thread, &space);
+ if (space)
+ string (file, " ");
+ string (file, "}");
+ }
+ string (file, ";");
+ newline (file);
+ return err;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 5152c3f3a5f..682094ff139 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -141,6 +141,8 @@ static void brw_emit_prim(struct brw_context *brw,
prim_packet.verts_per_instance = trim(prim->mode, prim->count);
prim_packet.start_vert_location = prim->start;
+ if (prim->indexed)
+ prim_packet.start_vert_location += brw->ib.start_vertex_offset;
prim_packet.instance_count = 1;
prim_packet.start_instance_location = 0;
prim_packet.base_vert_location = 0;
@@ -422,54 +424,31 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
return retval;
}
-static GLboolean brw_need_rebase( GLcontext *ctx,
- const struct gl_client_array *arrays[],
- const struct _mesa_index_buffer *ib,
- GLuint min_index )
-{
- if (min_index == 0)
- return GL_FALSE;
-
- if (ib) {
- if (!vbo_all_varyings_in_vbos(arrays))
- return GL_TRUE;
- else
- return GL_FALSE;
- }
- else {
- /* Hmm. This isn't quite what I wanted. BRW can actually
- * handle the mixed case well enough that we shouldn't need to
- * rebase. However, it's probably not very common, nor hugely
- * expensive to do it this way:
- */
- if (!vbo_all_varyings_in_vbos(arrays))
- return GL_TRUE;
- else
- return GL_FALSE;
- }
-}
-
-
void brw_draw_prims( GLcontext *ctx,
const struct gl_client_array *arrays[],
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
GLuint min_index,
GLuint max_index )
{
GLboolean retval;
- /* Decide if we want to rebase. If so we end up recursing once
- * only into this function.
- */
- if (brw_need_rebase( ctx, arrays, ib, min_index )) {
- vbo_rebase_prims( ctx, arrays,
- prim, nr_prims,
- ib, min_index, max_index,
- brw_draw_prims );
-
- return;
+ if (!vbo_all_varyings_in_vbos(arrays)) {
+ if (!index_bounds_valid)
+ vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+
+ /* Decide if we want to rebase. If so we end up recursing once
+ * only into this function.
+ */
+ if (min_index != 0) {
+ vbo_rebase_prims(ctx, arrays,
+ prim, nr_prims,
+ ib, min_index, max_index,
+ brw_draw_prims );
+ return;
+ }
}
/* Make a first attempt at drawing:
diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h
index 9aebbdb1b86..2a14db217fc 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.h
+++ b/src/mesa/drivers/dri/i965/brw_draw.h
@@ -39,6 +39,7 @@ void brw_draw_prims( GLcontext *ctx,
const struct _mesa_prim *prims,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
GLuint min_index,
GLuint max_index );
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index c29f1dd5c03..d49fb0fd951 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -350,9 +350,6 @@ static void brw_prepare_vertices(struct brw_context *brw)
unsigned int min_index = brw->vb.min_index;
unsigned int max_index = brw->vb.max_index;
- struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
- GLuint nr_enabled = 0;
-
struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
GLuint nr_uploads = 0;
@@ -362,12 +359,13 @@ static void brw_prepare_vertices(struct brw_context *brw)
_mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
/* Accumulate the list of enabled arrays. */
+ brw->vb.nr_enabled = 0;
while (vs_inputs) {
GLuint i = _mesa_ffsll(vs_inputs) - 1;
struct brw_vertex_element *input = &brw->vb.inputs[i];
vs_inputs &= ~(1 << i);
- enabled[nr_enabled++] = input;
+ brw->vb.enabled[brw->vb.nr_enabled++] = input;
}
/* XXX: In the rare cases where this happens we fallback all
@@ -376,16 +374,15 @@ static void brw_prepare_vertices(struct brw_context *brw)
* cases with > 17 vertex attributes enabled, so it probably
* isn't an issue at this point.
*/
- if (nr_enabled >= BRW_VEP_MAX) {
+ if (brw->vb.nr_enabled >= BRW_VEP_MAX) {
intel->Fallback = 1;
return;
}
- for (i = 0; i < nr_enabled; i++) {
- struct brw_vertex_element *input = enabled[i];
+ for (i = 0; i < brw->vb.nr_enabled; i++) {
+ struct brw_vertex_element *input = brw->vb.enabled[i];
input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
- input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1;
if (input->glarray->BufferObj->Name != 0) {
struct intel_buffer_object *intel_buffer =
@@ -398,7 +395,23 @@ static void brw_prepare_vertices(struct brw_context *brw)
dri_bo_reference(input->bo);
input->offset = (unsigned long)input->glarray->Ptr;
input->stride = input->glarray->StrideB;
+ input->count = input->glarray->_MaxElement;
+
+ /* This is a common place to reach if the user mistakenly supplies
+ * a pointer in place of a VBO offset. If we just let it go through,
+ * we may end up dereferencing a pointer beyond the bounds of the
+ * GTT. We would hope that the VBO's max_index would save us, but
+ * Mesa appears to hand us min/max values not clipped to the
+ * array object's _MaxElement, and _MaxElement frequently appears
+ * to be wrong anyway.
+ *
+ * The VBO spec allows application termination in this case, and it's
+ * probably a service to the poor programmer to do so rather than
+ * trying to just not render.
+ */
+ assert(input->offset < input->bo->size);
} else {
+ input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1;
if (input->bo != NULL) {
/* Already-uploaded vertex data is present from a previous
* prepare_vertices, but we had to re-validate state due to
@@ -466,8 +479,8 @@ static void brw_prepare_vertices(struct brw_context *brw)
brw_prepare_query_begin(brw);
- for (i = 0; i < nr_enabled; i++) {
- struct brw_vertex_element *input = enabled[i];
+ for (i = 0; i < brw->vb.nr_enabled; i++) {
+ struct brw_vertex_element *input = brw->vb.enabled[i];
brw_add_validated_bo(brw, input->bo);
}
@@ -477,34 +490,44 @@ static void brw_emit_vertices(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = intel_context(ctx);
- GLbitfield vs_inputs = brw->vs.prog_data->inputs_read;
- struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
GLuint i;
- GLuint nr_enabled = 0;
- /* Accumulate the list of enabled arrays. */
- while (vs_inputs) {
- i = _mesa_ffsll(vs_inputs) - 1;
- struct brw_vertex_element *input = &brw->vb.inputs[i];
+ brw_emit_query_begin(brw);
- vs_inputs &= ~(1 << i);
- enabled[nr_enabled++] = input;
+ /* If the VS doesn't read any inputs (calculating vertex position from
+ * a state variable for some reason, for example), emit a single pad
+ * VERTEX_ELEMENT struct and bail.
+ *
+ * The stale VB state stays in place, but they don't do anything unless
+ * a VE loads from them.
+ */
+ if (brw->vb.nr_enabled == 0) {
+ BEGIN_BATCH(3, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
+ OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
+ BRW_VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
+ (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+ OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
+ ADVANCE_BATCH();
+ return;
}
- brw_emit_query_begin(brw);
-
/* Now emit VB and VEP state packets.
*
* This still defines a hardware VB for each input, even if they
* are interleaved or from the same VBO. TBD if this makes a
* performance difference.
*/
- BEGIN_BATCH(1 + nr_enabled * 4, IGNORE_CLIPRECTS);
+ BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS);
OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
- ((1 + nr_enabled * 4) - 2));
+ ((1 + brw->vb.nr_enabled * 4) - 2));
- for (i = 0; i < nr_enabled; i++) {
- struct brw_vertex_element *input = enabled[i];
+ for (i = 0; i < brw->vb.nr_enabled; i++) {
+ struct brw_vertex_element *input = brw->vb.enabled[i];
OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) |
BRW_VB0_ACCESS_VERTEXDATA |
@@ -524,15 +547,15 @@ static void brw_emit_vertices(struct brw_context *brw)
input->offset + input->element_size);
}
} else
- OUT_BATCH(brw->vb.max_index);
+ OUT_BATCH(input->stride ? input->count : 0);
OUT_BATCH(0); /* Instance data step rate */
}
ADVANCE_BATCH();
- BEGIN_BATCH(1 + nr_enabled * 2, IGNORE_CLIPRECTS);
- OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr_enabled * 2) - 2));
- for (i = 0; i < nr_enabled; i++) {
- struct brw_vertex_element *input = enabled[i];
+ BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2));
+ for (i = 0; i < brw->vb.nr_enabled; i++) {
+ struct brw_vertex_element *input = brw->vb.enabled[i];
uint32_t format = get_surface_type(input->glarray->Type,
input->glarray->Size,
input->glarray->Format,
@@ -589,17 +612,20 @@ static void brw_prepare_indices(struct brw_context *brw)
dri_bo *bo = NULL;
struct gl_buffer_object *bufferobj;
GLuint offset;
+ GLuint ib_type_size;
if (index_buffer == NULL)
return;
- ib_size = get_size(index_buffer->type) * index_buffer->count;
+ ib_type_size = get_size(index_buffer->type);
+ ib_size = ib_type_size * index_buffer->count;
bufferobj = index_buffer->obj;;
/* Turn into a proper VBO:
*/
if (!bufferobj->Name) {
-
+ brw->ib.start_vertex_offset = 0;
+
/* Get new bufferobj, offset:
*/
get_space(brw, ib_size, &bo, &offset);
@@ -615,6 +641,7 @@ static void brw_prepare_indices(struct brw_context *brw)
}
} else {
offset = (GLuint) (unsigned long) index_buffer->ptr;
+ brw->ib.start_vertex_offset = 0;
/* If the index buffer isn't aligned to its element size, we have to
* rebase it into a temporary.
@@ -635,39 +662,62 @@ static void brw_prepare_indices(struct brw_context *brw)
bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj),
INTEL_READ);
dri_bo_reference(bo);
+
+ /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
+ * the index buffer state when we're just moving the start index
+ * of our drawing.
+ */
+ brw->ib.start_vertex_offset = offset / ib_type_size;
+ offset = 0;
+ ib_size = bo->size;
}
}
- dri_bo_unreference(brw->ib.bo);
- brw->ib.bo = bo;
- brw->ib.offset = offset;
+ if (brw->ib.bo != bo ||
+ brw->ib.offset != offset ||
+ brw->ib.size != ib_size)
+ {
+ drm_intel_bo_unreference(brw->ib.bo);
+ brw->ib.bo = bo;
+ brw->ib.offset = offset;
+ brw->ib.size = ib_size;
+
+ brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
+ } else {
+ drm_intel_bo_unreference(bo);
+ }
brw_add_validated_bo(brw, brw->ib.bo);
}
-static void brw_emit_indices(struct brw_context *brw)
+const struct brw_tracked_state brw_indices = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_INDICES,
+ .cache = 0,
+ },
+ .prepare = brw_prepare_indices,
+};
+
+static void brw_emit_index_buffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
- GLuint ib_size;
if (index_buffer == NULL)
return;
- ib_size = get_size(index_buffer->type) * index_buffer->count - 1;
-
/* Emit the indexbuffer packet:
*/
{
struct brw_indexbuffer ib;
memset(&ib, 0, sizeof(ib));
-
+
ib.header.bits.opcode = CMD_INDEX_BUFFER;
ib.header.bits.length = sizeof(ib)/4 - 2;
ib.header.bits.index_format = get_index_type(index_buffer->type);
ib.header.bits.cut_index_enable = 0;
-
BEGIN_BATCH(4, IGNORE_CLIPRECTS);
OUT_BATCH( ib.header.dword );
@@ -676,18 +726,17 @@ static void brw_emit_indices(struct brw_context *brw)
brw->ib.offset);
OUT_RELOC(brw->ib.bo,
I915_GEM_DOMAIN_VERTEX, 0,
- brw->ib.offset + ib_size);
+ brw->ib.offset + brw->ib.size);
OUT_BATCH( 0 );
ADVANCE_BATCH();
}
}
-const struct brw_tracked_state brw_indices = {
+const struct brw_tracked_state brw_index_buffer = {
.dirty = {
.mesa = 0,
- .brw = BRW_NEW_BATCH | BRW_NEW_INDICES,
+ .brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER,
.cache = 0,
},
- .prepare = brw_prepare_indices,
- .emit = brw_emit_indices,
+ .emit = brw_emit_index_buffer,
};
diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c
index c53efba5991..1df561386e6 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.c
+++ b/src/mesa/drivers/dri/i965/brw_eu.c
@@ -62,7 +62,7 @@ void brw_set_predicate_control( struct brw_compile *p, GLuint pc )
void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional )
{
- p->current->header.destreg__conditonalmod = conditional;
+ p->current->header.destreg__conditionalmod = conditional;
}
void brw_set_access_mode( struct brw_compile *p, GLuint access_mode )
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 2412014248c..241cdc33f86 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -481,8 +481,8 @@ static struct brw_instruction *next_insn( struct brw_compile *p,
/* Reset this one-shot flag:
*/
- if (p->current->header.destreg__conditonalmod) {
- p->current->header.destreg__conditonalmod = 0;
+ if (p->current->header.destreg__conditionalmod) {
+ p->current->header.destreg__conditionalmod = 0;
p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
}
@@ -679,7 +679,7 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p,
assert(if_insn->header.opcode == BRW_OPCODE_IF);
if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
- if_insn->bits3.if_else.pop_count = 1;
+ if_insn->bits3.if_else.pop_count = 0;
if_insn->bits3.if_else.pad0 = 0;
}
@@ -871,7 +871,7 @@ void brw_CMP(struct brw_compile *p,
{
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
- insn->header.destreg__conditonalmod = conditional;
+ insn->header.destreg__conditionalmod = conditional;
brw_set_dest(insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, src1);
@@ -915,7 +915,7 @@ void brw_math( struct brw_compile *p,
* instructions.
*/
insn->header.predicate_control = 0;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(insn, dest);
brw_set_src0(insn, src);
@@ -952,7 +952,7 @@ void brw_math_16( struct brw_compile *p,
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
insn = next_insn(p, BRW_OPCODE_SEND);
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(insn, dest);
brw_set_src0(insn, src);
@@ -969,7 +969,7 @@ void brw_math_16( struct brw_compile *p,
*/
insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
- insn->header.destreg__conditonalmod = msg_reg_nr+1;
+ insn->header.destreg__conditionalmod = msg_reg_nr+1;
brw_set_dest(insn, offset(dest,1));
brw_set_src0(insn, src);
@@ -1016,7 +1016,7 @@ void brw_dp_WRITE_16( struct brw_compile *p,
insn->header.predicate_control = 0; /* XXX */
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(insn, dest);
brw_set_src0(insn, src);
@@ -1062,7 +1062,7 @@ void brw_dp_READ_16( struct brw_compile *p,
insn->header.predicate_control = 0; /* XXX */
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(insn, dest); /* UW? */
brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
@@ -1116,7 +1116,7 @@ void brw_dp_READ_4( struct brw_compile *p,
insn->header.predicate_control = BRW_PREDICATE_NONE;
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
insn->header.mask_control = BRW_MASK_DISABLE;
/* cast dest to a uword[8] vector */
@@ -1190,7 +1190,7 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
insn->header.predicate_control = BRW_PREDICATE_NONE;
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
insn->header.mask_control = BRW_MASK_DISABLE;
/*insn->header.access_mode = BRW_ALIGN_16;*/
@@ -1224,7 +1224,7 @@ void brw_fb_WRITE(struct brw_compile *p,
insn->header.predicate_control = 0; /* XXX */
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(insn, dest);
brw_set_src0(insn, src0);
@@ -1322,7 +1322,7 @@ void brw_SAMPLE(struct brw_compile *p,
insn->header.predicate_control = 0; /* XXX */
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_dest(insn, dest);
brw_set_src0(insn, src0);
@@ -1375,7 +1375,7 @@ void brw_urb_WRITE(struct brw_compile *p,
brw_set_src0(insn, src0);
brw_set_src1(insn, brw_imm_d(0));
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_urb_message(p->brw,
insn,
@@ -1410,7 +1410,7 @@ void brw_ff_sync(struct brw_compile *p,
brw_set_src0(insn, src0);
brw_set_src1(insn, brw_imm_d(0));
- insn->header.destreg__conditonalmod = msg_reg_nr;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_ff_sync_message(p->brw,
insn,
diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c
index 980eac7646d..a9b2aa2eace 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c
@@ -101,7 +101,7 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c,
BRW_URB_SWIZZLE_NONE);
}
-void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
+static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
{
struct brw_compile *p = &c->func;
brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim));
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
index b5f6371c82c..bc0f0760738 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -113,7 +113,7 @@ struct brw_sf_unit_key {
unsigned int nr_urb_entries, urb_size, sfsize;
- GLenum front_face, cull_face;
+ GLenum front_face, cull_face, provoking_vertex;
unsigned scissor:1;
unsigned line_smooth:1;
unsigned point_sprite:1;
@@ -153,6 +153,9 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
key->point_attenuated = ctx->Point._Attenuated;
+ /* _NEW_LIGHT */
+ key->provoking_vertex = ctx->Light.ProvokingVertex;
+
key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
}
@@ -284,9 +287,15 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
/* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
*/
- sf.sf7.trifan_pv = 2;
- sf.sf7.linestrip_pv = 1;
- sf.sf7.tristrip_pv = 2;
+ if (key->provoking_vertex == GL_LAST_VERTEX_CONVENTION) {
+ sf.sf7.trifan_pv = 2;
+ sf.sf7.linestrip_pv = 1;
+ sf.sf7.tristrip_pv = 2;
+ } else {
+ sf.sf7.trifan_pv = 1;
+ sf.sf7.linestrip_pv = 0;
+ sf.sf7.tristrip_pv = 0;
+ }
sf.sf7.line_last_pixel_enable = 0;
/* Set bias for OpenGL rasterization rules:
@@ -300,6 +309,9 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
&sf, sizeof(sf),
NULL, NULL);
+ /* STATE_PREFETCH command description describes this state as being
+ * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
+ */
/* Emit SF program relocation */
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
@@ -340,6 +352,7 @@ static void upload_sf_unit( struct brw_context *brw )
const struct brw_tracked_state brw_sf_unit = {
.dirty = {
.mesa = (_NEW_POLYGON |
+ _NEW_LIGHT |
_NEW_LINE |
_NEW_POINT |
_NEW_SCISSOR |
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index bf9f6cae55e..78572356a3d 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -92,6 +92,7 @@ const struct brw_tracked_state brw_clear_batch_cache;
const struct brw_tracked_state brw_drawing_rect;
const struct brw_tracked_state brw_indices;
const struct brw_tracked_state brw_vertices;
+const struct brw_tracked_state brw_index_buffer;
/**
* Use same key for WM and VS surfaces.
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 38d9dd8991e..95d42d2dcc5 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -94,6 +94,7 @@ const struct brw_tracked_state *atoms[] =
&brw_drawing_rect,
&brw_indices,
+ &brw_index_buffer,
&brw_vertices,
&brw_constant_buffer
@@ -208,6 +209,7 @@ static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_PSP),
DEFINE_BIT(BRW_NEW_FENCE),
DEFINE_BIT(BRW_NEW_INDICES),
+ DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
DEFINE_BIT(BRW_NEW_VERTICES),
DEFINE_BIT(BRW_NEW_BATCH),
DEFINE_BIT(BRW_NEW_DEPTH_BUFFER),
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index 8ba7eb27b36..a6de09207b7 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -1200,7 +1200,7 @@ struct brw_instruction
GLuint predicate_control:4;
GLuint predicate_inverse:1;
GLuint execution_size:3;
- GLuint destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */
+ GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */
GLuint pad0:2;
GLuint debug_control:1;
GLuint saturate:1;
@@ -1228,7 +1228,9 @@ struct brw_instruction
GLuint dest_reg_type:3;
GLuint src0_reg_file:2;
GLuint src0_reg_type:3;
- GLuint pad:6;
+ GLuint src1_reg_file:2; /* 0x00000c00 */
+ GLuint src1_reg_type:3; /* 0x00007000 */
+ GLuint pad:1;
GLint dest_indirect_offset:10; /* offset against the deref'd address reg */
GLuint dest_subreg_nr:3; /* subnr for the address reg a0.x */
GLuint dest_horiz_stride:2;
@@ -1243,7 +1245,7 @@ struct brw_instruction
GLuint src0_reg_type:3;
GLuint src1_reg_file:2;
GLuint src1_reg_type:3;
- GLuint pad0:1;
+ GLuint pad:1;
GLuint dest_writemask:4;
GLuint dest_subreg_nr:1;
GLuint dest_reg_nr:8;
@@ -1348,7 +1350,7 @@ struct brw_instruction
GLuint src1_reg_nr:8;
GLuint src1_abs:1;
GLuint src1_negate:1;
- GLuint pad:1;
+ GLuint src1_address_mode:1;
GLuint src1_horiz_stride:2;
GLuint src1_width:3;
GLuint src1_vert_stride:4;
@@ -1363,7 +1365,7 @@ struct brw_instruction
GLuint src1_reg_nr:8;
GLuint src1_abs:1;
GLuint src1_negate:1;
- GLuint pad0:1;
+ GLuint src1_address_mode:1;
GLuint src1_swz_z:2;
GLuint src1_swz_w:2;
GLuint pad1:1;
@@ -1377,7 +1379,7 @@ struct brw_instruction
GLuint src1_subreg_nr:3;
GLuint src1_abs:1;
GLuint src1_negate:1;
- GLuint pad0:1;
+ GLuint src1_address_mode:1;
GLuint src1_horiz_stride:2;
GLuint src1_width:3;
GLuint src1_vert_stride:4;
@@ -1565,6 +1567,7 @@ struct brw_instruction
GLint d;
GLuint ud;
+ float f;
} bits3;
};
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 7f9b2535340..1d2e953eb1e 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -65,11 +65,6 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
if (mt->compressed) {
mt->pitch = ALIGN(mt->width0, align_w);
- qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp;
- mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6;
- } else {
- qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp;
- mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6;
}
if (mt->first_level != mt->last_level) {
@@ -90,6 +85,14 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch);
+ if (mt->compressed) {
+ qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp;
+ mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6;
+ } else {
+ qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp;
+ mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6;
+ }
+
for (level = mt->first_level; level <= mt->last_level; level++) {
GLuint img_height;
GLuint nr_images = 6;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 514f15d5e3a..83167b9258e 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -68,6 +68,7 @@ static void release_tmps( struct brw_vs_compile *c )
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
{
GLuint i, reg = 0, mrf;
+ int attributes_in_vue;
/* Determine whether to use a real constant buffer or use a block
* of GRF registers for constants. The later is faster but only
@@ -128,6 +129,11 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
reg++;
}
}
+ /* If there are no inputs, we'll still be reading one attribute's worth
+ * because it's required -- see urb_read_length setting.
+ */
+ if (c->nr_inputs == 0)
+ reg++;
/* Allocate outputs. The non-position outputs go straight into message regs.
*/
@@ -220,11 +226,22 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
* vertex urb, so is half the amount:
*/
c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2;
+ /* Setting this field to 0 leads to undefined behavior according to the
+ * the VS_STATE docs. Our VUEs will always have at least one attribute
+ * sitting in them, even if it's padding.
+ */
+ if (c->prog_data.urb_read_length == 0)
+ c->prog_data.urb_read_length = 1;
+
+ /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size
+ * them to fit the biggest thing they need to.
+ */
+ attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs);
if (BRW_IS_IGDNG(c->func.brw))
- c->prog_data.urb_entry_size = (c->nr_outputs + 6 + 3) / 4;
+ c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
else
- c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4;
+ c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
c->prog_data.total_grf = reg;
@@ -1245,9 +1262,30 @@ post_vs_emit( struct brw_vs_compile *c,
/* patch up the END code to jump past subroutines, etc */
offset = last_inst - end_inst;
- brw_set_src1(end_inst, brw_imm_d(offset * 16));
+ if (offset > 1) {
+ brw_set_src1(end_inst, brw_imm_d(offset * 16));
+ } else {
+ end_inst->header.opcode = BRW_OPCODE_NOP;
+ }
}
+static uint32_t
+get_predicate(uint32_t swizzle)
+{
+ switch (swizzle) {
+ case SWIZZLE_XXXX:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_X;
+ case SWIZZLE_YYYY:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_Y;
+ case SWIZZLE_ZZZZ:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_Z;
+ case SWIZZLE_WWWW:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_W;
+ default:
+ _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n", swizzle);
+ return BRW_PREDICATE_NORMAL;
+ }
+}
/* Emit the vertex program instructions here.
*/
@@ -1266,7 +1304,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
GLuint file;
if (INTEL_DEBUG & DEBUG_VS) {
- _mesa_printf("vs-emit:\n");
+ _mesa_printf("vs-mesa:\n");
_mesa_print_program(&c->vp->program.Base);
_mesa_printf("\n");
}
@@ -1453,7 +1491,10 @@ void brw_vs_emit(struct brw_vs_compile *c )
break;
case OPCODE_IF:
assert(if_depth < MAX_IF_DEPTH);
- if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
+ if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
+ if_inst[if_depth]->header.predicate_control =
+ get_predicate(inst->DstReg.CondSwizzle);
+ if_depth++;
break;
case OPCODE_ELSE:
if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
@@ -1541,6 +1582,19 @@ void brw_vs_emit(struct brw_vs_compile *c )
"unknown");
}
+ /* Set the predication update on the last instruction of the native
+ * instruction sequence.
+ *
+ * This would be problematic if it was set on a math instruction,
+ * but that shouldn't be the case with the current GLSL compiler.
+ */
+ if (inst->CondUpdate) {
+ struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1];
+
+ assert(hw_insn->header.destreg__conditionalmod == 0);
+ hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ;
+ }
+
if ((inst->DstReg.File == PROGRAM_OUTPUT)
&& (inst->DstReg.Index != VERT_RESULT_HPOS)
&& c->output_regs[inst->DstReg.Index].used_in_src) {
@@ -1578,4 +1632,13 @@ void brw_vs_emit(struct brw_vs_compile *c )
emit_vertex_write(c);
post_vs_emit(c, end_inst, last_inst);
+
+ if (INTEL_DEBUG & DEBUG_VS) {
+ int i;
+
+ _mesa_printf("vs-native:\n");
+ for (i = 0; i < p->nr_insn; i++)
+ brw_disasm(stderr, &p->store[i]);
+ _mesa_printf("\n");
+ }
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 14e05be4f6c..2292de94c44 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -41,13 +41,13 @@ GLuint brw_wm_nr_args( GLuint opcode )
{
switch (opcode) {
case WM_FRONTFACING:
- return 0;
case WM_PIXELXY:
+ return 0;
case WM_CINTERP:
case WM_WPOSXY:
+ case WM_DELTAXY:
return 1;
case WM_LINTERP:
- case WM_DELTAXY:
case WM_PIXELW:
return 2;
case WM_FB_WRITE:
@@ -171,9 +171,11 @@ static void do_wm_prog( struct brw_context *brw,
* differently from "simple" shaders.
*/
if (fp->isGLSL) {
+ c->dispatch_width = 8;
brw_wm_glsl_emit(brw, c);
}
else {
+ c->dispatch_width = 16;
brw_wm_non_glsl_emit(brw, c);
}
@@ -202,6 +204,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
/* BRW_NEW_FRAGMENT_PROGRAM */
const struct brw_fragment_program *fp =
(struct brw_fragment_program *)brw->fragment_program;
+ GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
GLuint lookup = 0;
GLuint line_aa;
GLuint i;
@@ -263,6 +266,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
brw_wm_lookup_iz(line_aa,
lookup,
+ uses_depth,
key);
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index ba497432c60..ae98b5492db 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -260,6 +260,7 @@ struct brw_wm_compile {
GLuint tmp_index;
GLuint tmp_max;
GLuint subroutines[BRW_WM_MAX_SUBROUTINE];
+ GLuint dispatch_width;
/** we may need up to 3 constants per instruction (if use_const_buffer) */
struct {
@@ -292,6 +293,7 @@ void brw_wm_print_program( struct brw_wm_compile *c,
void brw_wm_lookup_iz( GLuint line_aa,
GLuint lookup,
+ GLboolean ps_uses_depth,
struct brw_wm_prog_key *key );
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 9f82916c025..b3cf524c63e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -65,8 +65,7 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
static void emit_pixel_xy(struct brw_compile *p,
const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0)
+ GLuint mask)
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
@@ -98,8 +97,7 @@ static void emit_pixel_xy(struct brw_compile *p,
static void emit_delta_xy(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1)
+ const struct brw_reg *arg0)
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
@@ -545,16 +543,18 @@ static void emit_dp3( struct brw_compile *p,
const struct brw_reg *arg0,
const struct brw_reg *arg1 )
{
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
- assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
- brw_MAC(p, dst[0], arg0[2], arg1[2]);
+ brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
brw_set_saturate(p, 0);
}
@@ -565,17 +565,19 @@ static void emit_dp4( struct brw_compile *p,
const struct brw_reg *arg0,
const struct brw_reg *arg1 )
{
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
- assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
- brw_MAC(p, dst[0], arg0[3], arg1[3]);
+ brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
brw_set_saturate(p, 0);
}
@@ -632,18 +634,19 @@ static void emit_math1( struct brw_compile *p,
GLuint mask,
const struct brw_reg *arg0 )
{
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
- //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
- // function == BRW_MATH_FUNCTION_SINCOS);
-
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
brw_MOV(p, brw_message_reg(2), arg0[0]);
/* Send two messages to perform all 16 operations:
*/
brw_math_16(p,
- dst[0],
+ dst[dst_chan],
function,
(mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
2,
@@ -659,10 +662,12 @@ static void emit_math2( struct brw_compile *p,
const struct brw_reg *arg0,
const struct brw_reg *arg1)
{
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
- assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
brw_push_insn_state(p);
@@ -681,7 +686,7 @@ static void emit_math2( struct brw_compile *p,
*/
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math(p,
- dst[0],
+ dst[dst_chan],
function,
(mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
2,
@@ -691,7 +696,7 @@ static void emit_math2( struct brw_compile *p,
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_math(p,
- offset(dst[0],1),
+ offset(dst[dst_chan],1),
function,
(mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
4,
@@ -1194,11 +1199,11 @@ void brw_wm_emit( struct brw_wm_compile *c )
/* Generated instructions for calculating triangle interpolants:
*/
case WM_PIXELXY:
- emit_pixel_xy(p, dst, dst_flags, args[0]);
+ emit_pixel_xy(p, dst, dst_flags);
break;
case WM_DELTAXY:
- emit_delta_xy(p, dst, dst_flags, args[0], args[1]);
+ emit_delta_xy(p, dst, dst_flags, args[0]);
break;
case WM_WPOSXY:
@@ -1385,4 +1390,13 @@ void brw_wm_emit( struct brw_wm_compile *c )
inst->dst[i]->hw_reg,
inst->dst[i]->spill_slot);
}
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ int i;
+
+ _mesa_printf("wm-native:\n");
+ for (i = 0; i < p->nr_insn; i++)
+ brw_disasm(stderr, &p->store[i]);
+ _mesa_printf("\n");
+ }
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index b9e8dd2e96e..5dc076a8257 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -226,9 +226,42 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c,
0, 0, 0, /* tex unit, target, shadow */
src0, src1, src2);
}
-
+/* Many Mesa opcodes produce the same value across all the result channels.
+ * We'd rather not have to support that splatting in the opcode implementations,
+ * and brw_wm_pass*.c wants to optimize them out by shuffling references around
+ * anyway. We can easily get both by emitting the opcode to one channel, and
+ * then MOVing it to the others, which brw_wm_pass*.c already understands.
+ */
+static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
+ const struct prog_instruction *inst0)
+{
+ struct prog_instruction *inst;
+ unsigned int dst_chan;
+ unsigned int other_channel_mask;
+
+ if (inst0->DstReg.WriteMask == 0)
+ return NULL;
+
+ dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
+ inst = get_fp_inst(c);
+ *inst = *inst0;
+ inst->DstReg.WriteMask = 1 << dst_chan;
+
+ other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
+ if (other_channel_mask != 0) {
+ inst = emit_op(c,
+ OPCODE_MOV,
+ dst_mask(inst0->DstReg, other_channel_mask),
+ 0,
+ src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
+ src_undef(),
+ src_undef());
+ }
+ return inst;
+}
+
/***********************************************************************
* Special instructions for interpolation and other tasks
@@ -376,14 +409,6 @@ static void emit_interp( struct brw_wm_compile *c,
}
break;
case FRAG_ATTRIB_FOGC:
- /* The FOGC input is really special. When a program uses glFogFragCoord,
- * the results returned are supposed to be (f,0,0,1). But for Mesa GLSL,
- * the glFrontFacing and glPointCoord values are also stashed in FOGC.
- * So, write the interpolated fog value to X, then either 0, 1, or the
- * stashed values to Y, Z, W. Note that this means that
- * glFogFragCoord.yzw can be wrong in those cases!
- */
-
/* Interpolate the fog coordinate */
emit_op(c,
WM_PINTERP,
@@ -393,26 +418,40 @@ static void emit_interp( struct brw_wm_compile *c,
deltas,
get_pixel_w(c));
- /* Move the front facing value into FOGC.y if it's needed. */
- if (c->fp->program.UsesFrontFacing) {
- emit_op(c,
- WM_FRONTFACING,
- dst_mask(dst, WRITEMASK_Y),
- 0,
- src_undef(),
- src_undef(),
- src_undef());
- } else {
- emit_op(c,
- OPCODE_MOV,
- dst_mask(dst, WRITEMASK_Y),
- 0,
- src_swizzle1(interp, SWIZZLE_ZERO),
- src_undef(),
- src_undef());
- }
+ emit_op(c,
+ OPCODE_MOV,
+ dst_mask(dst, WRITEMASK_YZW),
+ 0,
+ src_swizzle(interp,
+ SWIZZLE_ZERO,
+ SWIZZLE_ZERO,
+ SWIZZLE_ZERO,
+ SWIZZLE_ONE),
+ src_undef(),
+ src_undef());
+ break;
+
+ case FRAG_ATTRIB_FACE:
+ /* XXX review/test this case */
+ emit_op(c,
+ WM_FRONTFACING,
+ dst_mask(dst, WRITEMASK_X),
+ 0,
+ src_undef(),
+ src_undef(),
+ src_undef());
+ break;
+
+ case FRAG_ATTRIB_PNTC:
+ /* XXX review/test this case */
+ emit_op(c,
+ WM_PINTERP,
+ dst_mask(dst, WRITEMASK_XY),
+ 0,
+ interp,
+ deltas,
+ get_pixel_w(c));
- /* Should do the PointCoord thing here. */
emit_op(c,
OPCODE_MOV,
dst_mask(dst, WRITEMASK_ZW),
@@ -425,6 +464,7 @@ static void emit_interp( struct brw_wm_compile *c,
src_undef(),
src_undef());
break;
+
default:
emit_op(c,
WM_PINTERP,
@@ -683,7 +723,7 @@ static void precalc_tex( struct brw_wm_compile *c,
/* tmp0 = 1 / tmp1 */
emit_op(c, OPCODE_RCP,
- tmp0,
+ dst_mask(tmp0, WRITEMASK_X),
0,
tmp1src,
src_undef(),
@@ -694,7 +734,7 @@ static void precalc_tex( struct brw_wm_compile *c,
tmpcoord,
0,
src0,
- tmp0src,
+ src_swizzle1(tmp0src, SWIZZLE_X),
src_undef());
release_temp(c, tmp0);
@@ -717,7 +757,11 @@ static void precalc_tex( struct brw_wm_compile *c,
tmpcoord,
0,
inst->SrcReg[0],
- scale,
+ src_swizzle(scale,
+ SWIZZLE_X,
+ SWIZZLE_Y,
+ SWIZZLE_ONE,
+ SWIZZLE_ONE),
src_undef());
coord = src_reg_from_dst(tmpcoord);
@@ -1134,9 +1178,11 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
break;
case OPCODE_PRINT:
break;
-
default:
- emit_insn(c, inst);
+ if (brw_wm_is_scalar_result(inst->Opcode))
+ emit_scalar_insn(c, inst);
+ else
+ emit_insn(c, inst);
break;
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 19f777fe32e..a5b18ec7d76 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -10,6 +10,9 @@ enum _subroutine {
SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4
};
+static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
+ const struct prog_instruction *inst,
+ GLuint component);
/**
* Determine if the given fragment program uses GLSL features such
@@ -22,6 +25,7 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
for (i = 0; i < fp->Base.NumInstructions; i++) {
const struct prog_instruction *inst = &fp->Base.Instructions[i];
switch (inst->Opcode) {
+ case OPCODE_ARL:
case OPCODE_IF:
case OPCODE_ENDIF:
case OPCODE_CAL:
@@ -130,19 +134,6 @@ static void set_reg(struct brw_wm_compile *c, int file, int index,
c->wm_regs[file][index][component].inited = GL_TRUE;
}
-/**
- * Examine instruction's write mask to find index of first component
- * enabled for writing.
- */
-static int get_scalar_dst_index(const struct prog_instruction *inst)
-{
- int i;
- for (i = 0; i < 4; i++)
- if (inst->DstReg.WriteMask & (1<<i))
- break;
- return i;
-}
-
static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
{
struct brw_reg reg;
@@ -402,6 +393,27 @@ static void prealloc_reg(struct brw_wm_compile *c)
prealloc_grf(c, 126);
prealloc_grf(c, 127);
+ for (i = 0; i < c->nr_fp_insns; i++) {
+ const struct prog_instruction *inst = &c->prog_instructions[i];
+ struct brw_reg dst[4];
+
+ switch (inst->Opcode) {
+ case OPCODE_TEX:
+ case OPCODE_TXB:
+ /* Allocate the channels of texture results contiguously,
+ * since they are written out that way by the sampler unit.
+ */
+ for (j = 0; j < 4; j++) {
+ dst[j] = get_dst_reg(c, inst, j);
+ if (j != 0)
+ assert(dst[j].nr == dst[j - 1].nr + 1);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
/* An instruction may reference up to three constants.
* They'll be found in these registers.
* XXX alloc these on demand!
@@ -639,23 +651,6 @@ static void invoke_subroutine( struct brw_wm_compile *c,
}
}
-static void emit_abs( struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- int i;
- struct brw_compile *p = &c->func;
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for (i = 0; i < 4; i++) {
- if (inst->DstReg.WriteMask & (1<<i)) {
- struct brw_reg src, dst;
- dst = get_dst_reg(c, inst, i);
- src = get_src_reg(c, inst, 0, i);
- brw_MOV(p, dst, brw_abs(src));
- }
- }
- brw_set_saturate(p, 0);
-}
-
static void emit_trunc( struct brw_wm_compile *c,
const struct prog_instruction *inst)
{
@@ -1031,12 +1026,20 @@ static void emit_dp3(struct brw_wm_compile *c,
struct brw_reg src0[3], src1[3], dst;
int i;
struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
for (i = 0; i < 3; i++) {
src0[i] = get_src_reg(c, inst, 0, i);
src1[i] = get_src_reg_imm(c, inst, 1, i);
}
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+ dst = get_dst_reg(c, inst, dst_chan);
brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
@@ -1050,11 +1053,19 @@ static void emit_dp4(struct brw_wm_compile *c,
struct brw_reg src0[4], src1[4], dst;
int i;
struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
for (i = 0; i < 4; i++) {
src0[i] = get_src_reg(c, inst, 0, i);
src1[i] = get_src_reg_imm(c, inst, 1, i);
}
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+ dst = get_dst_reg(c, inst, dst_chan);
brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
@@ -1069,11 +1080,19 @@ static void emit_dph(struct brw_wm_compile *c,
struct brw_reg src0[4], src1[4], dst;
int i;
struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
for (i = 0; i < 4; i++) {
src0[i] = get_src_reg(c, inst, 0, i);
src1[i] = get_src_reg_imm(c, inst, 1, i);
}
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+ dst = get_dst_reg(c, inst, dst_chan);
brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
brw_MAC(p, dst, src0[2], src1[2]);
@@ -1091,37 +1110,28 @@ static void emit_math1(struct brw_wm_compile *c,
const struct prog_instruction *inst, GLuint func)
{
struct brw_compile *p = &c->func;
- struct brw_reg src0, dst, tmp;
- const int mark = mark_tmps( c );
- int i;
+ struct brw_reg src0, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
- tmp = alloc_tmp(c);
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
/* Get first component of source register */
+ dst = get_dst_reg(c, inst, dst_chan);
src0 = get_src_reg(c, inst, 0, 0);
- /* tmp = func(src0) */
brw_MOV(p, brw_message_reg(2), src0);
brw_math(p,
- tmp,
+ dst,
func,
(inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
2,
brw_null_reg(),
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
-
- /*tmp.dw1.bits.swizzle = SWIZZLE_XXXX;*/
-
- /* replicate tmp value across enabled dest channels */
- for (i = 0; i < 4; i++) {
- if (inst->DstReg.WriteMask & (1 << i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV(p, dst, tmp);
- }
- }
-
- release_tmps(c, mark);
}
static void emit_rcp(struct brw_wm_compile *c,
@@ -1192,24 +1202,6 @@ static void emit_arl(struct brw_wm_compile *c,
brw_set_saturate(p, 0);
}
-static void emit_sub(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, src1, dst;
- GLuint mask = inst->DstReg.WriteMask;
- int i;
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- src0 = get_src_reg(c, inst, 0, i);
- src1 = get_src_reg_imm(c, inst, 1, i);
- brw_ADD(p, dst, src0, negate(src1));
- }
- }
- brw_set_saturate(p, 0);
-}
static void emit_mul(struct brw_wm_compile *c,
const struct prog_instruction *inst)
@@ -1321,7 +1313,15 @@ static void emit_pow(struct brw_wm_compile *c,
{
struct brw_compile *p = &c->func;
struct brw_reg dst, src0, src1;
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ dst = get_dst_reg(c, inst, dst_chan);
src0 = get_src_reg_imm(c, inst, 0, 0);
src1 = get_src_reg_imm(c, inst, 1, 0);
@@ -2828,18 +2828,12 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
case WM_FRONTFACING:
emit_frontfacing(c, inst);
break;
- case OPCODE_ABS:
- emit_abs(c, inst);
- break;
case OPCODE_ADD:
emit_add(c, inst);
break;
case OPCODE_ARL:
emit_arl(c, inst);
break;
- case OPCODE_SUB:
- emit_sub(c, inst);
- break;
case OPCODE_FRC:
emit_frc(c, inst);
break;
@@ -3007,7 +3001,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
loop_depth--;
inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
- /* patch all the BREAK/CONT instructions from last BEGINLOOP */
+ /* patch all the BREAK/CONT instructions from last BGNLOOP */
while (inst0 > loop_inst[loop_depth]) {
inst0--;
if (inst0->header.opcode == BRW_OPCODE_BREAK) {
@@ -3032,8 +3026,14 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
post_wm_emit(c);
-}
+ if (INTEL_DEBUG & DEBUG_WM) {
+ _mesa_printf("wm-native:\n");
+ for (i = 0; i < p->nr_insn; i++)
+ brw_disasm(stderr, &p->store[i]);
+ _mesa_printf("\n");
+ }
+}
/**
* Do GPU code generation for shaders that use GLSL features such as
diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c
index 8fd067abe7d..5e399ac62a8 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_iz.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c
@@ -122,6 +122,7 @@ const struct {
*/
void brw_wm_lookup_iz( GLuint line_aa,
GLuint lookup,
+ GLboolean ps_uses_depth,
struct brw_wm_prog_key *key )
{
GLuint reg = 2;
@@ -131,7 +132,7 @@ void brw_wm_lookup_iz( GLuint line_aa,
if (lookup & IZ_PS_COMPUTES_DEPTH_BIT)
key->computes_depth = 1;
- if (wm_iz_table[lookup].sd_present) {
+ if (wm_iz_table[lookup].sd_present || ps_uses_depth) {
key->source_depth_reg = reg;
reg += 2;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
index 92142764f5d..62792583396 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
@@ -257,34 +257,6 @@ static void pass0_set_dst( struct brw_wm_compile *c,
}
-static void pass0_set_dst_scalar( struct brw_wm_compile *c,
- struct brw_wm_instruction *out,
- const struct prog_instruction *inst,
- GLuint writemask )
-{
- if (writemask) {
- const struct prog_dst_register *dst = &inst->DstReg;
- GLuint i;
-
- /* Compute only the first (X) value:
- */
- out->writemask = WRITEMASK_X;
- out->dst[0] = get_value(c);
-
- /* Update our tracking register file for all the components in
- * writemask:
- */
- for (i = 0; i < 4; i++) {
- if (writemask & (1<<i)) {
- pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[0]);
- }
- }
- }
- else
- out->writemask = 0;
-}
-
-
static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c,
struct prog_src_register src,
GLuint i )
@@ -363,10 +335,7 @@ translate_insn(struct brw_wm_compile *c,
/* Dst:
*/
- if (brw_wm_is_scalar_result(out->opcode))
- pass0_set_dst_scalar(c, out, inst, writemask);
- else
- pass0_set_dst(c, out, inst, writemask);
+ pass0_set_dst(c, out, inst, writemask);
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index 3fc18ff1f3a..dff466587a8 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -103,6 +103,10 @@ struct wm_sampler_key {
GLenum minfilter, magfilter;
GLenum comparemode, comparefunc;
dri_bo *sdc_bo;
+
+ /** If target is cubemap, take context setting.
+ */
+ GLboolean seamless_cube_map;
} sampler[BRW_MAX_TEX_UNIT];
};
@@ -169,30 +173,33 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key,
}
}
- if (key->tex_target == GL_TEXTURE_CUBE_MAP &&
- (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) {
- /* If we're using anything but nearest sampling for a cube map, we
- * need to set this wrap mode to avoid GPU lock-ups.
- */
- sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
- sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
- sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
- }
- else if (key->tex_target == GL_TEXTURE_1D) {
+ sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
+ sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
+ sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t);
+
+ /* Cube-maps on 965 and later must use the same wrap mode for all 3
+ * coordinate dimensions. Futher, only CUBE and CLAMP are valid.
+ */
+ if (key->tex_target == GL_TEXTURE_CUBE_MAP) {
+ if (key->seamless_cube_map &&
+ (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) {
+ sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ } else {
+ sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ }
+ } else if (key->tex_target == GL_TEXTURE_1D) {
/* There's a bug in 1D texture sampling - it actually pays
* attention to the wrap_t value, though it should not.
* Override the wrap_t value here to GL_REPEAT to keep
* any nonexistent border pixels from floating in.
*/
- sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
- sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
}
- else {
- sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
- sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
- sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t);
- }
+
/* Set shadow function:
*/
@@ -249,6 +256,9 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
entry->tex_target = texObj->Target;
+ entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP)
+ ? ctx->Texture.CubeMapSeamless : GL_FALSE;
+
entry->wrap_r = texObj->WrapR;
entry->wrap_s = texObj->WrapS;
entry->wrap_t = texObj->WrapT;
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 0f87fc46a44..6aa36d10b12 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -196,6 +196,16 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
struct intel_context *intel = batch->intel;
GLuint used = batch->ptr - batch->map;
+ if (intel->first_post_swapbuffers_batch == NULL) {
+ intel->first_post_swapbuffers_batch = intel->batch->buf;
+ drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
+ }
+
+ if (intel->first_post_swapbuffers_batch == NULL) {
+ intel->first_post_swapbuffers_batch = intel->batch->buf;
+ drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
+ }
+
if (used == 0) {
batch->cliprect_mode = IGNORE_CLIPRECTS;
return;
diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
index 2e95bd1013f..979f2025842 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -477,6 +477,8 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
BR13 |= BR13_565;
}
+ assert(irb->region->tiling != I915_TILING_Y);
+
#ifndef I915
if (irb->region->tiling != I915_TILING_NONE) {
CMD |= XY_DST_TILED;
@@ -571,6 +573,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
assert( logic_op - GL_CLEAR >= 0 );
assert( logic_op - GL_CLEAR < 0x10 );
+ assert(dst_pitch > 0);
if (w < 0 || h < 0)
return GL_TRUE;
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 4abb525f78e..46f1a7f7205 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -38,6 +38,7 @@
#include "swrast_setup/swrast_setup.h"
#include "tnl/tnl.h"
#include "drivers/common/driverfuncs.h"
+#include "drivers/common/meta.h"
#include "i830_dri.h"
@@ -513,7 +514,7 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush)
* each of N places that do rendering. This has worse performances,
* but it is much easier to get correct.
*/
- if (intel->is_front_buffer_rendering) {
+ if (!intel->is_front_buffer_rendering) {
intel->front_buffer_dirty = GL_FALSE;
}
}
@@ -529,7 +530,27 @@ intelFlush(GLcontext * ctx)
static void
intel_glFlush(GLcontext *ctx)
{
+ struct intel_context *intel = intel_context(ctx);
+
intel_flush(ctx, GL_TRUE);
+
+ /* We're using glFlush as an indicator that a frame is done, which is
+ * what DRI2 does before calling SwapBuffers (and means we should catch
+ * people doing front-buffer rendering, as well)..
+ *
+ * Wait for the swapbuffers before the one we just emitted, so we don't
+ * get too many swaps outstanding for apps that are GPU-heavy but not
+ * CPU-heavy.
+ *
+ * Unfortunately, we don't have a handle to the batch containing the swap,
+ * and getting our hands on that doesn't seem worth it, so we just us the
+ * first batch we emitted after the last swap.
+ */
+ if (intel->first_post_swapbuffers_batch != NULL) {
+ drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch);
+ drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
+ intel->first_post_swapbuffers_batch = NULL;
+ }
}
void
@@ -692,6 +713,8 @@ intelInitContext(struct intel_context *intel,
_swrast_allow_pixel_fog(ctx, GL_FALSE);
_swrast_allow_vertex_fog(ctx, GL_TRUE);
+ _mesa_meta_init(ctx);
+
intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24;
intel->hw_stipple = 1;
@@ -752,7 +775,7 @@ intelInitContext(struct intel_context *intel,
if (intel->use_texture_tiling &&
!intel->intelScreen->kernel_exec_fencing) {
fprintf(stderr, "No kernel support for execution fencing, "
- "disabling texture tiling");
+ "disabling texture tiling\n");
intel->use_texture_tiling = GL_FALSE;
}
intel->use_early_z = driQueryOptionb(&intel->optionCache, "early_z");
@@ -795,6 +818,8 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
INTEL_FIREVERTICES(intel);
+ _mesa_meta_free(&intel->ctx);
+
meta_destroy_metaops(&intel->meta);
intel->vtbl.destroy(intel);
@@ -814,6 +839,8 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
intel->prim.vb = NULL;
dri_bo_unreference(intel->prim.vb_bo);
intel->prim.vb_bo = NULL;
+ dri_bo_unreference(intel->first_post_swapbuffers_batch);
+ intel->first_post_swapbuffers_batch = NULL;
if (release_texture_heaps) {
/* Nothing is currently done here to free texture heaps;
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index 08bea88c958..0d9db5eb1da 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -80,9 +80,13 @@ extern void intelFallback(struct intel_context *intel, GLuint bit,
#define INTEL_MAX_FIXUP 64
+
+/**
+ * intel_context is derived from Mesa's context class: GLcontext.
+ */
struct intel_context
{
- GLcontext ctx; /* the parent class */
+ GLcontext ctx; /**< base class, must be first field */
struct
{
@@ -178,6 +182,7 @@ struct intel_context
GLboolean ttm;
struct intel_batchbuffer *batch;
+ drm_intel_bo *first_post_swapbuffers_batch;
GLboolean no_batch_wrap;
unsigned batch_id;
@@ -307,7 +312,7 @@ struct intel_context
__DRIdrawablePrivate *driReadDrawable;
__DRIscreenPrivate *driScreen;
intelScreenPrivate *intelScreen;
- volatile struct drm_i915_sarea *sarea;
+ volatile drm_i915_sarea_t *sarea;
GLuint lastStamp;
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c
index 7742609d242..9f90ef0a697 100644
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -48,6 +48,7 @@
#define need_GL_EXT_framebuffer_blit
#define need_GL_EXT_gpu_program_parameters
#define need_GL_EXT_point_parameters
+#define need_GL_EXT_provoking_vertex
#define need_GL_EXT_secondary_color
#define need_GL_EXT_stencil_two_side
#define need_GL_APPLE_vertex_array_object
@@ -71,6 +72,7 @@ static const struct dri_extension card_extensions[] = {
{ "GL_ARB_half_float_pixel", NULL },
{ "GL_ARB_multitexture", NULL },
{ "GL_ARB_point_parameters", GL_ARB_point_parameters_functions },
+ { "GL_ARB_point_sprite", NULL },
{ "GL_ARB_texture_border_clamp", NULL },
{ "GL_ARB_texture_cube_map", NULL },
{ "GL_ARB_texture_env_add", NULL },
@@ -92,6 +94,7 @@ static const struct dri_extension card_extensions[] = {
{ "GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
{ "GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions },
{ "GL_EXT_packed_depth_stencil", NULL },
+ { "GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions },
{ "GL_EXT_secondary_color", GL_EXT_secondary_color_functions },
{ "GL_EXT_stencil_wrap", NULL },
{ "GL_EXT_texture_edge_clamp", NULL },
@@ -119,8 +122,10 @@ static const struct dri_extension i915_extensions[] = {
{ "GL_ARB_fragment_program", NULL },
{ "GL_ARB_shadow", NULL },
{ "GL_ARB_texture_non_power_of_two", NULL },
+ { "GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions },
{ "GL_ATI_texture_env_combine3", NULL },
{ "GL_EXT_shadow_funcs", NULL },
+ { "GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions },
{ "GL_NV_texture_env_combine4", NULL },
{ NULL, NULL }
};
@@ -135,6 +140,7 @@ static const struct dri_extension brw_extensions[] = {
{ "GL_ARB_framebuffer_object", GL_ARB_framebuffer_object_functions},
{ "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions },
{ "GL_ARB_point_sprite", NULL },
+ { "GL_ARB_seamless_cube_map", NULL },
{ "GL_ARB_shader_objects", GL_ARB_shader_objects_functions },
{ "GL_ARB_shading_language_100", GL_VERSION_2_0_functions },
{ "GL_ARB_shading_language_120", GL_VERSION_2_1_functions },
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 666893596e1..804c0348401 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -35,6 +35,7 @@
#include "main/context.h"
#include "main/texformat.h"
#include "main/texrender.h"
+#include "drivers/common/meta.h"
#include "intel_context.h"
#include "intel_buffers.h"
@@ -700,74 +701,6 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb)
/**
- * Called from glBlitFramebuffer().
- * For now, we're doing an approximation with glCopyPixels().
- * XXX we need to bypass all the per-fragment operations, except scissor.
- */
-static void
-intel_blit_framebuffer(GLcontext *ctx,
- GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
- GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
- GLbitfield mask, GLenum filter)
-{
- const GLfloat xZoomSave = ctx->Pixel.ZoomX;
- const GLfloat yZoomSave = ctx->Pixel.ZoomY;
- GLsizei width, height;
- GLfloat xFlip = 1.0F, yFlip = 1.0F;
-
- if (srcX1 < srcX0) {
- GLint tmp = srcX1;
- srcX1 = srcX0;
- srcX0 = tmp;
- xFlip = -1.0F;
- }
-
- if (srcY1 < srcY0) {
- GLint tmp = srcY1;
- srcY1 = srcY0;
- srcY0 = tmp;
- yFlip = -1.0F;
- }
-
- width = srcX1 - srcX0;
- height = srcY1 - srcY0;
-
- ctx->Pixel.ZoomX = xFlip * (dstX1 - dstX0) / (srcX1 - srcY0);
- ctx->Pixel.ZoomY = yFlip * (dstY1 - dstY0) / (srcY1 - srcY0);
-
- if (ctx->Pixel.ZoomX < 0.0F) {
- dstX0 = MAX2(dstX0, dstX1);
- }
- else {
- dstX0 = MIN2(dstX0, dstX1);
- }
-
- if (ctx->Pixel.ZoomY < 0.0F) {
- dstY0 = MAX2(dstY0, dstY1);
- }
- else {
- dstY0 = MIN2(dstY0, dstY1);
- }
-
- if (mask & GL_COLOR_BUFFER_BIT) {
- ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height,
- dstX0, dstY0, GL_COLOR);
- }
- if (mask & GL_DEPTH_BUFFER_BIT) {
- ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height,
- dstX0, dstY0, GL_DEPTH);
- }
- if (mask & GL_STENCIL_BUFFER_BIT) {
- ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height,
- dstX0, dstY0, GL_STENCIL);
- }
-
- ctx->Pixel.ZoomX = xZoomSave;
- ctx->Pixel.ZoomY = yZoomSave;
-}
-
-
-/**
* Do one-time context initializations related to GL_EXT_framebuffer_object.
* Hook in device driver functions.
*/
@@ -782,5 +715,5 @@ intel_fbo_init(struct intel_context *intel)
intel->ctx.Driver.FinishRenderTexture = intel_finish_render_texture;
intel->ctx.Driver.ResizeBuffers = intel_resize_buffers;
intel->ctx.Driver.ValidateFramebuffer = intel_validate_framebuffer;
- intel->ctx.Driver.BlitFramebuffer = intel_blit_framebuffer;
+ intel->ctx.Driver.BlitFramebuffer = _mesa_meta_blit_framebuffer;
}
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
index 5d52335dee3..ca796b36559 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
@@ -31,7 +31,7 @@
#include "main/state.h"
#include "main/mtypes.h"
#include "main/macros.h"
-#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
#include "intel_screen.h"
#include "intel_context.h"
@@ -97,162 +97,6 @@ intel_check_copypixel_blit_fragment_ops(GLcontext * ctx)
ctx->Color.BlendEnabled);
}
-#ifdef I915
-/* Doesn't work for overlapping regions. Could do a double copy or
- * just fallback.
- */
-static GLboolean
-do_texture_copypixels(GLcontext * ctx,
- GLint srcx, GLint srcy,
- GLsizei width, GLsizei height,
- GLint dstx, GLint dsty, GLenum type)
-{
- struct intel_context *intel = intel_context(ctx);
- struct intel_region *dst = intel_drawbuf_region(intel);
- struct intel_region *src = copypix_src_region(intel, type);
- GLenum src_format;
- GLenum src_type;
-
- DBG("%s %d,%d %dx%d --> %d,%d\n", __FUNCTION__,
- srcx, srcy, width, height, dstx, dsty);
-
- if (!src || !dst || type != GL_COLOR)
- return GL_FALSE;
-
- if (ctx->_ImageTransferState) {
- if (INTEL_DEBUG & DEBUG_PIXEL)
- fprintf(stderr, "%s: check_color failed\n", __FUNCTION__);
- return GL_FALSE;
- }
-
- /* Can't handle overlapping regions. Don't have sufficient control
- * over rasterization to pull it off in-place. Punt on these for
- * now.
- *
- * XXX: do a copy to a temporary.
- */
- if (src->buffer == dst->buffer) {
- drm_clip_rect_t srcbox;
- drm_clip_rect_t dstbox;
- drm_clip_rect_t tmp;
-
- srcbox.x1 = srcx;
- srcbox.y1 = srcy;
- srcbox.x2 = srcx + width;
- srcbox.y2 = srcy + height;
-
- if (ctx->Pixel.ZoomX > 0) {
- dstbox.x1 = dstx;
- dstbox.x2 = dstx + width * ctx->Pixel.ZoomX;
- } else {
- dstbox.x1 = dstx + width * ctx->Pixel.ZoomX;
- dstbox.x2 = dstx;
- }
- if (ctx->Pixel.ZoomY > 0) {
- dstbox.y1 = dsty;
- dstbox.y2 = dsty + height * ctx->Pixel.ZoomY;
- } else {
- dstbox.y1 = dsty + height * ctx->Pixel.ZoomY;
- dstbox.y2 = dsty;
- }
-
- DBG("src %d,%d %d,%d\n", srcbox.x1, srcbox.y1, srcbox.x2, srcbox.y2);
- DBG("dst %d,%d %d,%d (%dx%d) (%f,%f)\n", dstbox.x1, dstbox.y1, dstbox.x2, dstbox.y2,
- width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY);
-
- if (intel_intersect_cliprects(&tmp, &srcbox, &dstbox)) {
- DBG("%s: regions overlap\n", __FUNCTION__);
- return GL_FALSE;
- }
- }
-
- intelFlush(&intel->ctx);
-
- intel->vtbl.install_meta_state(intel);
-
- /* Is this true? Also will need to turn depth testing on according
- * to state:
- */
- intel->vtbl.meta_no_stencil_write(intel);
- intel->vtbl.meta_no_depth_write(intel);
-
- /* Set the 3d engine to draw into the destination region:
- */
- intel->vtbl.meta_draw_region(intel, dst, intel->depth_region);
-
- intel->vtbl.meta_import_pixel_state(intel);
-
- if (src->cpp == 2) {
- src_format = GL_RGB;
- src_type = GL_UNSIGNED_SHORT_5_6_5;
- }
- else {
- src_format = GL_BGRA;
- src_type = GL_UNSIGNED_BYTE;
- }
-
- /* Set the frontbuffer up as a large rectangular texture.
- */
- if (!intel->vtbl.meta_tex_rect_source(intel, src->buffer, 0,
- src->pitch,
- src->height, src_format, src_type)) {
- intel->vtbl.leave_meta_state(intel);
- return GL_FALSE;
- }
-
-
- intel->vtbl.meta_texture_blend_replace(intel);
-
- LOCK_HARDWARE(intel);
-
- if (intel->driDrawable->numClipRects) {
- __DRIdrawablePrivate *dPriv = intel->driDrawable;
-
-
- srcy = dPriv->h - srcy - height; /* convert from gl to hardware coords */
-
- srcx += dPriv->x;
- srcy += dPriv->y;
-
- /* Clip against the source region. This is the only source
- * clipping we do. XXX: Just set the texcord wrap mode to clamp
- * or similar.
- *
- */
- if (0) {
- GLint orig_x = srcx;
- GLint orig_y = srcy;
-
- if (!_mesa_clip_to_region(0, 0, src->pitch, src->height,
- &srcx, &srcy, &width, &height))
- goto out;
-
- dstx += srcx - orig_x;
- dsty += (srcy - orig_y) * ctx->Pixel.ZoomY;
- }
-
- /* Just use the regular cliprect mechanism... Does this need to
- * even hold the lock???
- */
- intel->vtbl.meta_draw_quad(intel,
- dstx,
- dstx + width * ctx->Pixel.ZoomX,
- dPriv->h - (dsty + height * ctx->Pixel.ZoomY),
- dPriv->h - (dsty), 0, /* XXX: what z value? */
- 0x00ff00ff,
- srcx, srcx + width, srcy, srcy + height);
-
- out:
- intel->vtbl.leave_meta_state(intel);
- intel_batchbuffer_emit_mi_flush(intel->batch);
- }
- UNLOCK_HARDWARE(intel);
-
- DBG("%s: success\n", __FUNCTION__);
- return GL_TRUE;
-}
-#endif /* I915 */
-
/**
* CopyPixels with the blitter. Don't support zooming, pixel transfer, etc.
@@ -400,12 +244,5 @@ intelCopyPixels(GLcontext * ctx,
if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
return;
-#ifdef I915
- if (do_texture_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
- return;
-#endif
-
- DBG("fallback to _swrast_CopyPixels\n");
-
- _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type);
+ _mesa_meta_copy_pixels(ctx, srcx, srcy, width, height, destx, desty, type);
}
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
index 7525cd9c4d7..497f7967649 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -181,6 +181,11 @@ intel_region_alloc(struct intel_context *intel,
dri_bo *buffer;
struct intel_region *region;
+ if (tiling == I915_TILING_X)
+ height = ALIGN(height, 8);
+ else if (tiling == I915_TILING_Y)
+ height = ALIGN(height, 32);
+
if (expect_accelerated_upload) {
buffer = drm_intel_bo_alloc_for_render(intel->bufmgr, "region",
pitch * cpp * height, 64);
@@ -452,6 +457,7 @@ void
intel_region_cow(struct intel_context *intel, struct intel_region *region)
{
struct intel_buffer_object *pbo = region->pbo;
+ GLboolean ok;
intel_region_release_pbo(intel, region);
@@ -463,13 +469,14 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region)
*/
LOCK_HARDWARE(intel);
- assert(intelEmitCopyBlit(intel,
- region->cpp,
- region->pitch, pbo->buffer, 0, region->tiling,
- region->pitch, region->buffer, 0, region->tiling,
- 0, 0, 0, 0,
- region->pitch, region->height,
- GL_COPY));
+ ok = intelEmitCopyBlit(intel,
+ region->cpp,
+ region->pitch, pbo->buffer, 0, region->tiling,
+ region->pitch, region->buffer, 0, region->tiling,
+ 0, 0, 0, 0,
+ region->pitch, region->height,
+ GL_COPY);
+ assert(ok);
UNLOCK_HARDWARE(intel);
}
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index 6bbc995c1e6..1b8c56e68d6 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -69,7 +69,11 @@ PUBLIC const char __driConfigOptions[] =
DRI_CONF_DESC_END
DRI_CONF_OPT_END
- DRI_CONF_TEXTURE_TILING(false)
+#ifdef I915
+ DRI_CONF_TEXTURE_TILING(false)
+#else
+ DRI_CONF_TEXTURE_TILING(true)
+#endif
DRI_CONF_OPT_BEGIN(early_z, bool, false)
DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).")
@@ -628,10 +632,10 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen)
/* Otherwise, use the classic buffer manager. */
if (intelScreen->bufmgr == NULL) {
if (gem_disable) {
- fprintf(stderr, "GEM disabled. Using classic.\n");
+ _mesa_warning(NULL, "GEM disabled. Using classic.");
} else {
- fprintf(stderr, "Failed to initialize GEM. "
- "Falling back to classic.\n");
+ _mesa_warning(NULL,
+ "Failed to initialize GEM. Falling back to classic.");
}
if (intelScreen->tex.size == 0) {
diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c
index 34b78ebc1ab..8df49908806 100644
--- a/src/mesa/drivers/dri/intel/intel_span.c
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@@ -501,7 +501,7 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map)
/**
- * Prepare for softare rendering. Map current read/draw framebuffers'
+ * Prepare for software rendering. Map current read/draw framebuffers'
* renderbuffes and all currently bound texture objects.
*
* Old note: Moved locking out to get reasonable span performance.
@@ -526,7 +526,7 @@ intelSpanRenderStart(GLcontext * ctx)
}
/**
- * Called when done softare rendering. Unmap the buffers we mapped in
+ * Called when done software rendering. Unmap the buffers we mapped in
* the above function.
*/
void
diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
index 1f27131dac0..89037073f84 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
@@ -44,10 +44,12 @@ intelTexSubimage(GLcontext * ctx,
GLenum target, GLint level,
GLint xoffset, GLint yoffset, GLint zoffset,
GLint width, GLint height, GLint depth,
+ GLsizei imageSize,
GLenum format, GLenum type, const void *pixels,
const struct gl_pixelstore_attrib *packing,
struct gl_texture_object *texObj,
- struct gl_texture_image *texImage)
+ struct gl_texture_image *texImage,
+ GLboolean compressed)
{
struct intel_context *intel = intel_context(ctx);
struct intel_texture_image *intelImage = intel_texture_image(texImage);
@@ -59,9 +61,14 @@ intelTexSubimage(GLcontext * ctx,
intelFlush(ctx);
- pixels =
- _mesa_validate_pbo_teximage(ctx, dims, width, height, depth, format,
- type, pixels, packing, "glTexSubImage2D");
+ if (compressed)
+ pixels = _mesa_validate_pbo_compressed_teximage(ctx, imageSize,
+ pixels, packing,
+ "glCompressedTexImage");
+ else
+ pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, depth,
+ format, type, pixels, packing,
+ "glTexSubImage");
if (!pixels)
return;
@@ -90,15 +97,28 @@ intelTexSubimage(GLcontext * ctx,
assert(dstRowStride);
- if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat,
- texImage->TexFormat,
- texImage->Data,
- xoffset, yoffset, zoffset,
- dstRowStride,
- texImage->ImageOffsets,
- width, height, depth,
- format, type, pixels, packing)) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage");
+ if (compressed) {
+ if (intelImage->mt) {
+ struct intel_region *dst = intelImage->mt->region;
+
+ _mesa_copy_rect(texImage->Data, dst->cpp, dst->pitch,
+ xoffset, yoffset / 4,
+ (width + 3) & ~3, (height + 3) / 4,
+ pixels, (width + 3) & ~3, 0, 0);
+ } else
+ memcpy(texImage->Data, pixels, imageSize);
+ }
+ else {
+ if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat,
+ texImage->TexFormat,
+ texImage->Data,
+ xoffset, yoffset, zoffset,
+ dstRowStride,
+ texImage->ImageOffsets,
+ width, height, depth,
+ format, type, pixels, packing)) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage");
+ }
}
_mesa_unmap_teximage_pbo(ctx, packing);
@@ -132,8 +152,8 @@ intelTexSubImage3D(GLcontext * ctx,
intelTexSubimage(ctx, 3,
target, level,
xoffset, yoffset, zoffset,
- width, height, depth,
- format, type, pixels, packing, texObj, texImage);
+ width, height, depth, 0,
+ format, type, pixels, packing, texObj, texImage, GL_FALSE);
}
@@ -152,8 +172,8 @@ intelTexSubImage2D(GLcontext * ctx,
intelTexSubimage(ctx, 2,
target, level,
xoffset, yoffset, 0,
- width, height, 1,
- format, type, pixels, packing, texObj, texImage);
+ width, height, 1, 0,
+ format, type, pixels, packing, texObj, texImage, GL_FALSE);
}
@@ -172,8 +192,8 @@ intelTexSubImage1D(GLcontext * ctx,
intelTexSubimage(ctx, 1,
target, level,
xoffset, 0, 0,
- width, 1, 1,
- format, type, pixels, packing, texObj, texImage);
+ width, 1, 1, 0,
+ format, type, pixels, packing, texObj, texImage, GL_FALSE);
}
static void
@@ -187,8 +207,11 @@ intelCompressedTexSubImage2D(GLcontext * ctx,
struct gl_texture_object *texObj,
struct gl_texture_image *texImage)
{
- fprintf(stderr, "stubbed CompressedTexSubImage2D: %dx%d@%dx%d\n",
- width, height, xoffset, yoffset);
+ intelTexSubimage(ctx, 2,
+ target, level,
+ xoffset, yoffset, 0,
+ width, height, 1, imageSize,
+ format, 0, pixels, &ctx->Unpack, texObj, texImage, GL_TRUE);
}
diff --git a/src/mesa/drivers/dri/r200/.gitignore b/src/mesa/drivers/dri/r200/.gitignore
deleted file mode 100644
index 2f9cd1a987b..00000000000
--- a/src/mesa/drivers/dri/r200/.gitignore
+++ /dev/null
@@ -1,15 +0,0 @@
-radeon_bocs_wrapper.h
-radeon_bo_legacy.[ch]
-radeon_chipset.h
-radeon_cmdbuf.h
-radeon_common.[ch]
-radeon_common_context.[ch]
-radeon_cs_legacy.[ch]
-radeon_dma.[ch]
-radeon_fbo.c
-radeon_lock.[ch]
-radeon_mipmap_tree.[ch]
-radeon_screen.[ch]
-radeon_span.[ch]
-radeon_texture.[ch]
-server
diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile
index 4686241957b..e81a1b38ac3 100644
--- a/src/mesa/drivers/dri/r200/Makefile
+++ b/src/mesa/drivers/dri/r200/Makefile
@@ -55,41 +55,6 @@ X86_SOURCES =
DRIVER_DEFINES = -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R200
-SYMLINKS = \
- server/radeon_egl.c \
- server/radeon_dri.c \
- server/radeon_dri.h \
- server/radeon.h \
- server/radeon_macros.h \
- server/radeon_reg.h
-
-COMMON_SYMLINKS = \
- radeon_chipset.h \
- radeon_screen.c \
- radeon_screen.h \
- radeon_bo_legacy.c \
- radeon_cs_legacy.c \
- radeon_bo_legacy.h \
- radeon_cs_legacy.h \
- radeon_bocs_wrapper.h \
- radeon_span.h \
- radeon_span.c \
- radeon_lock.c \
- radeon_lock.h \
- radeon_common.c \
- radeon_common_context.c \
- radeon_common_context.h \
- radeon_common.h \
- radeon_cmdbuf.h \
- radeon_mipmap_tree.c \
- radeon_mipmap_tree.h \
- radeon_texture.c \
- radeon_texture.h \
- radeon_dma.c \
- radeon_dma.h \
- radeon_fbo.c \
- $(CS_SOURCES)
-
DRI_LIB_DEPS += $(RADEON_LDFLAGS)
##### TARGETS #####
@@ -99,15 +64,4 @@ include ../Makefile.template
#INCLUDES += -I../radeon/server
-server:
- mkdir -p server
-
-$(SYMLINKS): server
- @[ -e $@ ] || ln -sf ../../radeon/$@ server/
-
-
-$(COMMON_SYMLINKS):
- @[ -e $@ ] || ln -sf ../radeon/$@ ./
-
-symlinks: $(SYMLINKS) $(COMMON_SYMLINKS)
-
+symlinks:
diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c
index d49f4fabe72..14d6bc19c98 100644
--- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c
+++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c
@@ -107,31 +107,37 @@ void r200SetUpAtomList( r200ContextPtr rmesa )
void r200EmitScissor(r200ContextPtr rmesa)
{
+ unsigned x1, y1, x2, y2;
+ struct radeon_renderbuffer *rrb;
BATCH_LOCALS(&rmesa->radeon);
if (!rmesa->radeon.radeonScreen->kernel_mm) {
return;
}
+ rrb = radeon_get_colorbuffer(&rmesa->radeon);
+ if (!rrb || !rrb->bo)
+ return;
+
if (rmesa->radeon.state.scissor.enabled) {
- BEGIN_BATCH(8);
- OUT_BATCH(CP_PACKET0(R200_RE_CNTL, 0));
- OUT_BATCH(R200_SCISSOR_ENABLE | rmesa->hw.set.cmd[SET_RE_CNTL]);
- OUT_BATCH(CP_PACKET0(R200_RE_AUX_SCISSOR_CNTL, 0));
- OUT_BATCH(R200_SCISSOR_ENABLE_0);
- OUT_BATCH(CP_PACKET0(R200_RE_SCISSOR_TL_0, 0));
- OUT_BATCH((rmesa->radeon.state.scissor.rect.y1 << 16) |
- rmesa->radeon.state.scissor.rect.x1);
- OUT_BATCH(CP_PACKET0(R200_RE_SCISSOR_BR_0, 0));
- OUT_BATCH(((rmesa->radeon.state.scissor.rect.y2 - 1) << 16) |
- (rmesa->radeon.state.scissor.rect.x2 - 1));
- END_BATCH();
+ x1 = rmesa->radeon.state.scissor.rect.x1;
+ y1 = rmesa->radeon.state.scissor.rect.y1;
+ x2 = rmesa->radeon.state.scissor.rect.x2 - 1;
+ y2 = rmesa->radeon.state.scissor.rect.y2 - 1;
} else {
- BEGIN_BATCH(4);
- OUT_BATCH(CP_PACKET0(R200_RE_CNTL, 0));
- OUT_BATCH(rmesa->hw.set.cmd[SET_RE_CNTL] & ~R200_SCISSOR_ENABLE);
- OUT_BATCH(CP_PACKET0(R200_RE_AUX_SCISSOR_CNTL, 0));
- OUT_BATCH(0);
- END_BATCH();
+ x1 = 0;
+ y1 = 0;
+ x2 = rrb->base.Width - 1;
+ y2 = rrb->base.Height - 1;
}
+ BEGIN_BATCH(8);
+ OUT_BATCH(CP_PACKET0(R200_RE_CNTL, 0));
+ OUT_BATCH(R200_SCISSOR_ENABLE | rmesa->hw.set.cmd[SET_RE_CNTL]);
+ OUT_BATCH(CP_PACKET0(R200_RE_AUX_SCISSOR_CNTL, 0));
+ OUT_BATCH(0);
+ OUT_BATCH(CP_PACKET0(R200_RE_TOP_LEFT, 0));
+ OUT_BATCH((y1 << 16) | x1);
+ OUT_BATCH(CP_PACKET0(R200_RE_WIDTH_HEIGHT, 0));
+ OUT_BATCH((y2 << 16) | x2);
+ END_BATCH();
}
/* Fire a section of the retained (indexed_verts) buffer as a regular
@@ -234,15 +240,9 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
radeonEmitState(&rmesa->radeon);
-#ifdef RADEON_DEBUG_BO
- rmesa->radeon.tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
- 0, R200_ELT_BUF_SZ, 4,
- RADEON_GEM_DOMAIN_GTT, 0, "ELT");
-#else
rmesa->radeon.tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
0, R200_ELT_BUF_SZ, 4,
RADEON_GEM_DOMAIN_GTT, 0);
-#endif
rmesa->radeon.tcl.elt_dma_offset = 0;
rmesa->tcl.elt_used = min_nr * 2;
diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c
index 9a92a320797..8cb287de268 100644
--- a/src/mesa/drivers/dri/r200/r200_context.c
+++ b/src/mesa/drivers/dri/r200/r200_context.c
@@ -500,3 +500,15 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
}
+void r200DestroyContext( __DRIcontextPrivate *driContextPriv )
+{
+ int i;
+ r200ContextPtr rmesa = (r200ContextPtr)driContextPriv->driverPrivate;
+ if (rmesa)
+ {
+ for ( i = 0 ; i < R200_MAX_TEXTURE_UNITS ; i++ ) {
+ _math_matrix_dtr( &rmesa->TexGenMatrix[i] );
+ }
+ }
+ radeonDestroyContext(driContextPriv);
+}
diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c
index bc871d99048..78ad5baebb3 100644
--- a/src/mesa/drivers/dri/r200/r200_state_init.c
+++ b/src/mesa/drivers/dri/r200/r200_state_init.c
@@ -515,7 +515,7 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
if (drb)
dwords += 6;
if (rrb)
- dwords += 6;
+ dwords += 8;
if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM)
dwords += 4;
@@ -546,7 +546,7 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
- OUT_BATCH(cbpitch);
+ OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
}
if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
@@ -563,7 +563,6 @@ static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom)
uint32_t dwords = atom->cmd_size;
int i = atom->idx;
radeonTexObj *t = r200->state.texture.unit[i].texobj;
- radeon_mipmap_level *lvl;
if (t && t->mt && !t->image_override)
dwords += 2;
@@ -591,7 +590,6 @@ static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
uint32_t dwords = atom->cmd_size;
int i = atom->idx;
radeonTexObj *t = r200->state.texture.unit[i].texobj;
- radeon_mipmap_level *lvl;
int hastexture = 1;
if (!r200->state.texture.unit[i].unitneeded)
@@ -774,7 +772,7 @@ void r200InitState( r200ContextPtr rmesa )
ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
}
- for (i = 0; i < 5; i++)
+ for (i = 0; i < 6; i++)
if (rmesa->radeon.radeonScreen->kernel_mm)
rmesa->hw.tex[i].emit = tex_emit_cs;
else
@@ -786,7 +784,7 @@ void r200InitState( r200ContextPtr rmesa )
ALLOC_STATE( cube[3], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-3", 3 );
ALLOC_STATE( cube[4], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-4", 4 );
ALLOC_STATE( cube[5], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-5", 5 );
- for (i = 0; i < 5; i++)
+ for (i = 0; i < 6; i++)
if (rmesa->radeon.radeonScreen->kernel_mm)
rmesa->hw.cube[i].emit = cube_emit_cs;
else
diff --git a/src/mesa/drivers/dri/r200/radeon_bo_legacy.c b/src/mesa/drivers/dri/r200/radeon_bo_legacy.c
new file mode 120000
index 00000000000..79ad050e6b6
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_bo_legacy.c
@@ -0,0 +1 @@
+../radeon/radeon_bo_legacy.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_bo_legacy.h b/src/mesa/drivers/dri/r200/radeon_bo_legacy.h
new file mode 120000
index 00000000000..83b0f7ffabe
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_bo_legacy.h
@@ -0,0 +1 @@
+../radeon/radeon_bo_legacy.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/r200/radeon_bocs_wrapper.h
new file mode 120000
index 00000000000..ca894b2443c
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_bocs_wrapper.h
@@ -0,0 +1 @@
+../radeon/radeon_bocs_wrapper.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_chipset.h b/src/mesa/drivers/dri/r200/radeon_chipset.h
new file mode 120000
index 00000000000..eba99001ff8
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_chipset.h
@@ -0,0 +1 @@
+../radeon/radeon_chipset.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_cmdbuf.h b/src/mesa/drivers/dri/r200/radeon_cmdbuf.h
new file mode 120000
index 00000000000..a799e1dc6df
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_cmdbuf.h
@@ -0,0 +1 @@
+../radeon/radeon_cmdbuf.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_common.c b/src/mesa/drivers/dri/r200/radeon_common.c
new file mode 120000
index 00000000000..67b19ba940d
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_common.c
@@ -0,0 +1 @@
+../radeon/radeon_common.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_common.h b/src/mesa/drivers/dri/r200/radeon_common.h
new file mode 120000
index 00000000000..5bcb696a9f7
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_common.h
@@ -0,0 +1 @@
+../radeon/radeon_common.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_common_context.c b/src/mesa/drivers/dri/r200/radeon_common_context.c
new file mode 120000
index 00000000000..86800f3819c
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_common_context.c
@@ -0,0 +1 @@
+../radeon/radeon_common_context.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_common_context.h b/src/mesa/drivers/dri/r200/radeon_common_context.h
new file mode 120000
index 00000000000..4d663125500
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_common_context.h
@@ -0,0 +1 @@
+../radeon/radeon_common_context.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_cs_legacy.c b/src/mesa/drivers/dri/r200/radeon_cs_legacy.c
new file mode 120000
index 00000000000..006720f8a46
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_cs_legacy.c
@@ -0,0 +1 @@
+../radeon/radeon_cs_legacy.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_cs_legacy.h b/src/mesa/drivers/dri/r200/radeon_cs_legacy.h
new file mode 120000
index 00000000000..a5f95e0a3dc
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_cs_legacy.h
@@ -0,0 +1 @@
+../radeon/radeon_cs_legacy.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_cs_space_drm.c b/src/mesa/drivers/dri/r200/radeon_cs_space_drm.c
new file mode 120000
index 00000000000..c248ea7d1a5
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_cs_space_drm.c
@@ -0,0 +1 @@
+../radeon/radeon_cs_space_drm.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_dma.c b/src/mesa/drivers/dri/r200/radeon_dma.c
new file mode 120000
index 00000000000..43be0006255
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_dma.c
@@ -0,0 +1 @@
+../radeon/radeon_dma.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_dma.h b/src/mesa/drivers/dri/r200/radeon_dma.h
new file mode 120000
index 00000000000..82e50634e3c
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_dma.h
@@ -0,0 +1 @@
+../radeon/radeon_dma.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_fbo.c b/src/mesa/drivers/dri/r200/radeon_fbo.c
new file mode 120000
index 00000000000..0d738d8d780
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_fbo.c
@@ -0,0 +1 @@
+../radeon/radeon_fbo.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_lock.c b/src/mesa/drivers/dri/r200/radeon_lock.c
new file mode 120000
index 00000000000..af4108a8e30
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_lock.c
@@ -0,0 +1 @@
+../radeon/radeon_lock.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_lock.h b/src/mesa/drivers/dri/r200/radeon_lock.h
new file mode 120000
index 00000000000..64bdf94ee7e
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_lock.h
@@ -0,0 +1 @@
+../radeon/radeon_lock.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_mipmap_tree.c b/src/mesa/drivers/dri/r200/radeon_mipmap_tree.c
new file mode 120000
index 00000000000..31c0cfbe942
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_mipmap_tree.c
@@ -0,0 +1 @@
+../radeon/radeon_mipmap_tree.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_mipmap_tree.h b/src/mesa/drivers/dri/r200/radeon_mipmap_tree.h
new file mode 120000
index 00000000000..254d50cf8c5
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_mipmap_tree.h
@@ -0,0 +1 @@
+../radeon/radeon_mipmap_tree.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_screen.c b/src/mesa/drivers/dri/r200/radeon_screen.c
new file mode 120000
index 00000000000..86161118dd3
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_screen.c
@@ -0,0 +1 @@
+../radeon/radeon_screen.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_screen.h b/src/mesa/drivers/dri/r200/radeon_screen.h
new file mode 120000
index 00000000000..23bb6bd4598
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_screen.h
@@ -0,0 +1 @@
+../radeon/radeon_screen.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_span.c b/src/mesa/drivers/dri/r200/radeon_span.c
new file mode 120000
index 00000000000..232868c4c9e
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_span.c
@@ -0,0 +1 @@
+../radeon/radeon_span.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_span.h b/src/mesa/drivers/dri/r200/radeon_span.h
new file mode 120000
index 00000000000..f9d634508c2
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_span.h
@@ -0,0 +1 @@
+../radeon/radeon_span.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_texture.c b/src/mesa/drivers/dri/r200/radeon_texture.c
new file mode 120000
index 00000000000..a822710915f
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_texture.c
@@ -0,0 +1 @@
+../radeon/radeon_texture.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_texture.h b/src/mesa/drivers/dri/r200/radeon_texture.h
new file mode 120000
index 00000000000..17fac3d5ea5
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_texture.h
@@ -0,0 +1 @@
+../radeon/radeon_texture.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/server/radeon.h b/src/mesa/drivers/dri/r200/server/radeon.h
new file mode 120000
index 00000000000..81274a54f11
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/server/radeon.h
@@ -0,0 +1 @@
+../../radeon/server/radeon.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/server/radeon_dri.c b/src/mesa/drivers/dri/r200/server/radeon_dri.c
new file mode 120000
index 00000000000..d05847d650f
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/server/radeon_dri.c
@@ -0,0 +1 @@
+../../radeon/server/radeon_dri.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/server/radeon_dri.h b/src/mesa/drivers/dri/r200/server/radeon_dri.h
new file mode 120000
index 00000000000..27c591d3c9d
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/server/radeon_dri.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_dri.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/server/radeon_egl.c b/src/mesa/drivers/dri/r200/server/radeon_egl.c
new file mode 120000
index 00000000000..d7735a76438
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/server/radeon_egl.c
@@ -0,0 +1 @@
+../../radeon/server/radeon_egl.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/server/radeon_macros.h b/src/mesa/drivers/dri/r200/server/radeon_macros.h
new file mode 120000
index 00000000000..c56cd735b83
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/server/radeon_macros.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_macros.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/server/radeon_reg.h b/src/mesa/drivers/dri/r200/server/radeon_reg.h
new file mode 120000
index 00000000000..e2349dcb685
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/server/radeon_reg.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_reg.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/.gitignore b/src/mesa/drivers/dri/r300/.gitignore
deleted file mode 100644
index 2f9cd1a987b..00000000000
--- a/src/mesa/drivers/dri/r300/.gitignore
+++ /dev/null
@@ -1,15 +0,0 @@
-radeon_bocs_wrapper.h
-radeon_bo_legacy.[ch]
-radeon_chipset.h
-radeon_cmdbuf.h
-radeon_common.[ch]
-radeon_common_context.[ch]
-radeon_cs_legacy.[ch]
-radeon_dma.[ch]
-radeon_fbo.c
-radeon_lock.[ch]
-radeon_mipmap_tree.[ch]
-radeon_screen.[ch]
-radeon_span.[ch]
-radeon_texture.[ch]
-server
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
index a77209074ae..77b3d168f34 100644
--- a/src/mesa/drivers/dri/r300/Makefile
+++ b/src/mesa/drivers/dri/r300/Makefile
@@ -36,7 +36,8 @@ RADEON_COMMON_SOURCES = \
radeon_cs_legacy.c \
radeon_mipmap_tree.c \
radeon_span.c \
- radeon_fbo.c
+ radeon_fbo.c \
+ radeon_buffer_objects.c
DRIVER_SOURCES = \
radeon_screen.c \
@@ -48,20 +49,12 @@ DRIVER_SOURCES = \
r300_render.c \
r300_tex.c \
r300_texstate.c \
- radeon_program.c \
- radeon_program_alu.c \
- radeon_program_pair.c \
- radeon_nqssadce.c \
r300_vertprog.c \
r300_fragprog_common.c \
- r300_fragprog.c \
- r300_fragprog_swizzle.c \
- r300_fragprog_emit.c \
- r500_fragprog.c \
- r500_fragprog_emit.c \
r300_shader.c \
r300_emit.c \
r300_swtcl.c \
+ r300_queryobj.c \
$(RADEON_COMMON_SOURCES) \
$(EGL_SOURCES) \
$(CS_SOURCES)
@@ -73,54 +66,18 @@ DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \
# -DRADEON_BO_TRACK \
-Wall
-SYMLINKS = \
- server/radeon_dri.c \
- server/radeon_dri.h \
- server/radeon.h \
- server/radeon_macros.h \
- server/radeon_reg.h \
- server/radeon_egl.c
-
-COMMON_SYMLINKS = \
- radeon_chipset.h \
- radeon_screen.c \
- radeon_screen.h \
- radeon_span.h \
- radeon_span.c \
- radeon_bo_legacy.c \
- radeon_cs_legacy.c \
- radeon_bo_legacy.h \
- radeon_cs_legacy.h \
- radeon_bocs_wrapper.h \
- radeon_lock.c \
- radeon_lock.h \
- radeon_common.c \
- radeon_common.h \
- radeon_common_context.c \
- radeon_common_context.h \
- radeon_cmdbuf.h \
- radeon_dma.c \
- radeon_dma.h \
- radeon_mipmap_tree.c \
- radeon_mipmap_tree.h \
- radeon_texture.c \
- radeon_texture.h \
- radeon_fbo.c \
- $(CS_SOURCES)
-
DRI_LIB_DEPS += $(RADEON_LDFLAGS)
+PIPE_DRIVERS = compiler/libr300compiler.a
+
##### TARGETS #####
include ../Makefile.template
-server:
- mkdir -p server
-
-$(SYMLINKS): server
- @[ -e $@ ] || ln -sf ../../radeon/$@ server/
+symlinks:
-$(COMMON_SYMLINKS):
- @[ -e $@ ] || ln -sf ../radeon/$@ ./
+# Mark the archive phony so that we always check for recompilation
+.PHONY : compiler/libr300compiler.a
-symlinks: $(SYMLINKS) $(COMMON_SYMLINKS)
+compiler/libr300compiler.a:
+ cd compiler && $(MAKE)
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
new file mode 100644
index 00000000000..d9738441928
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -0,0 +1,75 @@
+# src/mesa/drivers/dri/r300/compiler/Makefile
+
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = r300compiler
+
+C_SOURCES = \
+ radeon_code.c \
+ radeon_compiler.c \
+ radeon_nqssadce.c \
+ radeon_program.c \
+ radeon_program_alu.c \
+ radeon_program_pair.c \
+ r3xx_fragprog.c \
+ r300_fragprog.c \
+ r300_fragprog_swizzle.c \
+ r300_fragprog_emit.c \
+ r500_fragprog.c \
+ r500_fragprog_emit.c \
+ r3xx_vertprog.c \
+ r3xx_vertprog_dump.c \
+ \
+ memory_pool.c
+
+
+### Basic defines ###
+
+OBJECTS = $(C_SOURCES:.c=.o) \
+ $(CPP_SOURCES:.cpp=.o) \
+ $(ASM_SOURCES:.S=.o)
+
+INCLUDES = \
+ -I. \
+ -I$(TOP)/include \
+ -I$(TOP)/src/mesa \
+
+
+##### TARGETS #####
+
+default: depend lib$(LIBNAME).a
+
+lib$(LIBNAME).a: $(OBJECTS) Makefile $(TOP)/configs/current
+ $(MKLIB) -o $(LIBNAME) -static $(OBJECTS)
+
+depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS)
+ rm -f depend
+ touch depend
+ $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) 2> /dev/null
+
+# Emacs tags
+tags:
+ etags `find . -name \*.[ch]` `find ../include`
+
+# Remove .o and backup files
+clean:
+ rm -f $(OBJECTS) lib$(LIBNAME).a depend depend.bak
+
+# Dummy target
+install:
+ @echo -n ""
+
+##### RULES #####
+
+.c.o:
+ $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@
+
+.cpp.o:
+ $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@
+
+.S.o:
+ $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@
+
+
+sinclude depend
diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.c b/src/mesa/drivers/dri/r300/compiler/memory_pool.c
new file mode 100644
index 00000000000..37aa2b65798
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "memory_pool.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+#define POOL_LARGE_ALLOC 4096
+#define POOL_ALIGN 4
+
+
+struct memory_block {
+ struct memory_block * next;
+};
+
+void memory_pool_init(struct memory_pool * pool)
+{
+ memset(pool, 0, sizeof(struct memory_pool));
+}
+
+
+void memory_pool_destroy(struct memory_pool * pool)
+{
+ while(pool->blocks) {
+ struct memory_block * block = pool->blocks;
+ pool->blocks = block->next;
+ free(block);
+ }
+}
+
+static void refill_pool(struct memory_pool * pool)
+{
+ unsigned int blocksize = pool->total_allocated;
+ struct memory_block * newblock;
+
+ if (!blocksize)
+ blocksize = 2*POOL_LARGE_ALLOC;
+
+ newblock = (struct memory_block*)malloc(blocksize);
+ newblock->next = pool->blocks;
+ pool->blocks = newblock;
+
+ pool->head = (unsigned char*)(newblock + 1);
+ pool->end = ((unsigned char*)newblock) + blocksize;
+ pool->total_allocated += blocksize;
+}
+
+
+void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes)
+{
+ if (bytes < POOL_LARGE_ALLOC) {
+ if (pool->head + bytes > pool->end)
+ refill_pool(pool);
+
+ assert(pool->head + bytes <= pool->end);
+
+ void * ptr = pool->head;
+
+ pool->head += bytes;
+ pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1));
+
+ return ptr;
+ } else {
+ struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block));
+
+ block->next = pool->blocks;
+ pool->blocks = block;
+
+ return (block + 1);
+ }
+}
+
+
diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.h b/src/mesa/drivers/dri/r300/compiler/memory_pool.h
new file mode 100644
index 00000000000..ce23c319ad3
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef MEMORY_POOL_H
+#define MEMORY_POOL_H
+
+struct memory_block;
+
+/**
+ * Provides a pool of memory that can quickly be allocated from, at the
+ * cost of being unable to explicitly free one of the allocated blocks.
+ * Instead, the entire pool can be freed at once.
+ *
+ * The idea is to allow one to quickly allocate a flexible amount of
+ * memory during operations like shader compilation while avoiding
+ * reference counting headaches.
+ */
+struct memory_pool {
+ unsigned char * head;
+ unsigned char * end;
+ unsigned int total_allocated;
+ struct memory_block * blocks;
+};
+
+
+void memory_pool_init(struct memory_pool * pool);
+void memory_pool_destroy(struct memory_pool * pool);
+void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes);
+
+#endif /* MEMORY_POOL_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
new file mode 100644
index 00000000000..6c9fba49146
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "r300_fragprog.h"
+
+#include "shader/prog_parameter.h"
+
+#include "../r300_reg.h"
+
+static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
+{
+ struct prog_src_register reg = { 0, };
+
+ reg.File = PROGRAM_STATE_VAR;
+ reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu);
+ reg.Swizzle = SWIZZLE_WWWW;
+ return reg;
+}
+
+/**
+ * Transform TEX, TXP, TXB, and KIL instructions in the following way:
+ * - premultiply texture coordinates for RECT
+ * - extract operand swizzles
+ * - introduce a temporary register when write masks are needed
+ */
+GLboolean r300_transform_TEX(
+ struct radeon_compiler * c,
+ struct rc_instruction* inst,
+ void* data)
+{
+ struct r300_fragment_program_compiler *compiler =
+ (struct r300_fragment_program_compiler*)data;
+
+ if (inst->I.Opcode != OPCODE_TEX &&
+ inst->I.Opcode != OPCODE_TXB &&
+ inst->I.Opcode != OPCODE_TXP &&
+ inst->I.Opcode != OPCODE_KIL)
+ return GL_FALSE;
+
+ /* ARB_shadow & EXT_shadow_funcs */
+ if (inst->I.Opcode != OPCODE_KIL &&
+ c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) {
+ GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
+
+ if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
+ inst->I.Opcode = OPCODE_MOV;
+
+ if (comparefunc == GL_ALWAYS) {
+ inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
+ inst->I.SrcReg[0].Swizzle = SWIZZLE_1111;
+ } else {
+ inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit);
+ }
+
+ return GL_TRUE;
+ } else {
+ GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
+ GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode;
+ struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
+ struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
+ struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
+ int pass, fail;
+
+ inst_rcp->I.Opcode = OPCODE_RCP;
+ inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_rcp->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
+ inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0];
+ inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
+
+ inst_cmp->I.DstReg = inst->I.DstReg;
+ inst->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst->I.DstReg.Index = rc_find_free_temporary(c);
+ inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+
+ inst_mad->I.Opcode = OPCODE_MAD;
+ inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mad->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mad->I.SrcReg[0] = inst->I.SrcReg[0];
+ inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
+ inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY;
+ inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index;
+ inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
+ inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY;
+ inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index;
+ if (depthmode == 0) /* GL_LUMINANCE */
+ inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
+ else if (depthmode == 2) /* GL_ALPHA */
+ inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW;
+
+ /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
+ * r < tex <=> -tex+r < 0
+ * r >= tex <=> not (-tex+r < 0 */
+ if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
+ inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW;
+ else
+ inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW;
+
+ inst_cmp->I.Opcode = OPCODE_CMP;
+ /* DstReg has been filled out above */
+ inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index;
+
+ if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
+ pass = 1;
+ fail = 2;
+ } else {
+ pass = 2;
+ fail = 1;
+ }
+
+ inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN;
+ inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111;
+ inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit);
+ }
+ }
+
+ /* Hardware uses [0..1]x[0..1] range for rectangle textures
+ * instead of [0..Width]x[0..Height].
+ * Add a scaling instruction.
+ */
+ if (inst->I.Opcode != OPCODE_KIL && inst->I.TexSrcTarget == TEXTURE_RECT_INDEX) {
+ struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mul->I.Opcode = OPCODE_MUL;
+ inst_mul->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mul->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mul->I.SrcReg[0] = inst->I.SrcReg[0];
+ inst_mul->I.SrcReg[1].File = PROGRAM_STATE_VAR;
+ inst_mul->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->I.TexSrcUnit);
+
+ reset_srcreg(&inst->I.SrcReg[0]);
+ inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[0].Index = inst_mul->I.DstReg.Index;
+ }
+
+ /* Cannot write texture to output registers or with masks */
+ if (inst->I.Opcode != OPCODE_KIL &&
+ (inst->I.DstReg.File != PROGRAM_TEMPORARY || inst->I.DstReg.WriteMask != WRITEMASK_XYZW)) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
+
+ inst_mov->I.Opcode = OPCODE_MOV;
+ inst_mov->I.DstReg = inst->I.DstReg;
+ inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c);
+
+ inst->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
+ inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+ }
+
+
+ /* Cannot read texture coordinate from constants file */
+ if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mov->I.Opcode = OPCODE_MOV;
+ inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mov->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mov->I.SrcReg[0] = inst->I.SrcReg[0];
+
+ reset_srcreg(&inst->I.SrcReg[0]);
+ inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index;
+ }
+
+ return GL_TRUE;
+}
+
+/* just some random things... */
+void r300FragmentProgramDump(struct rX00_fragment_program_code *c)
+{
+ struct r300_fragment_program_code *code = &c->code.r300;
+ int n, i, j;
+ static int pc = 0;
+
+ fprintf(stderr, "pc=%d*************************************\n", pc++);
+
+ fprintf(stderr, "Hardware program\n");
+ fprintf(stderr, "----------------\n");
+
+ for (n = 0; n <= (code->config & 3); n++) {
+ uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n];
+ int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT;
+ int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT;
+ int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT;
+ int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT;
+
+ fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
+ "alu_end: %d, tex_end: %d (code_addr: %08x)\n", n,
+ alu_offset, tex_offset, alu_end, tex_end, code_addr);
+
+ if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) {
+ fprintf(stderr, " TEX:\n");
+ for (i = tex_offset;
+ i <= tex_offset + tex_end;
+ ++i) {
+ const char *instr;
+
+ switch ((code->tex.
+ inst[i] >> R300_TEX_INST_SHIFT) &
+ 15) {
+ case R300_TEX_OP_LD:
+ instr = "TEX";
+ break;
+ case R300_TEX_OP_KIL:
+ instr = "KIL";
+ break;
+ case R300_TEX_OP_TXP:
+ instr = "TXP";
+ break;
+ case R300_TEX_OP_TXB:
+ instr = "TXB";
+ break;
+ default:
+ instr = "UNKNOWN";
+ }
+
+ fprintf(stderr,
+ " %s t%i, %c%i, texture[%i] (%08x)\n",
+ instr,
+ (code->tex.
+ inst[i] >> R300_DST_ADDR_SHIFT) & 31,
+ 't',
+ (code->tex.
+ inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
+ (code->tex.
+ inst[i] & R300_TEX_ID_MASK) >>
+ R300_TEX_ID_SHIFT,
+ code->tex.inst[i]);
+ }
+ }
+
+ for (i = alu_offset;
+ i <= alu_offset + alu_end; ++i) {
+ char srcc[3][10], dstc[20];
+ char srca[3][10], dsta[20];
+ char argc[3][20];
+ char arga[3][20];
+ char flags[5], tmp[10];
+
+ for (j = 0; j < 3; ++j) {
+ int regc = code->alu.inst[i].rgb_addr >> (j * 6);
+ int rega = code->alu.inst[i].alpha_addr >> (j * 6);
+
+ sprintf(srcc[j], "%c%i",
+ (regc & 32) ? 'c' : 't', regc & 31);
+ sprintf(srca[j], "%c%i",
+ (rega & 32) ? 'c' : 't', rega & 31);
+ }
+
+ dstc[0] = 0;
+ sprintf(flags, "%s%s%s",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : "");
+ if (flags[0] != 0) {
+ sprintf(dstc, "t%i.%s ",
+ (code->alu.inst[i].
+ rgb_addr >> R300_ALU_DSTC_SHIFT) & 31,
+ flags);
+ }
+ sprintf(flags, "%s%s%s",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
+ if (flags[0] != 0) {
+ sprintf(tmp, "o%i.%s",
+ (code->alu.inst[i].
+ rgb_addr >> R300_ALU_DSTC_SHIFT) & 31,
+ flags);
+ strcat(dstc, tmp);
+ }
+
+ dsta[0] = 0;
+ if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
+ sprintf(dsta, "t%i.w ",
+ (code->alu.inst[i].
+ alpha_addr >> R300_ALU_DSTA_SHIFT) & 31);
+ }
+ if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) {
+ sprintf(tmp, "o%i.w ",
+ (code->alu.inst[i].
+ alpha_addr >> R300_ALU_DSTA_SHIFT) & 31);
+ strcat(dsta, tmp);
+ }
+ if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) {
+ strcat(dsta, "Z");
+ }
+
+ fprintf(stderr,
+ "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
+ " w: %3s %3s %3s -> %-20s (%08x)\n", i,
+ srcc[0], srcc[1], srcc[2], dstc,
+ code->alu.inst[i].rgb_addr, srca[0], srca[1],
+ srca[2], dsta, code->alu.inst[i].alpha_addr);
+
+ for (j = 0; j < 3; ++j) {
+ int regc = code->alu.inst[i].rgb_inst >> (j * 7);
+ int rega = code->alu.inst[i].alpha_inst >> (j * 7);
+ int d;
+ char buf[20];
+
+ d = regc & 31;
+ if (d < 12) {
+ switch (d % 4) {
+ case R300_ALU_ARGC_SRC0C_XYZ:
+ sprintf(buf, "%s.xyz",
+ srcc[d / 4]);
+ break;
+ case R300_ALU_ARGC_SRC0C_XXX:
+ sprintf(buf, "%s.xxx",
+ srcc[d / 4]);
+ break;
+ case R300_ALU_ARGC_SRC0C_YYY:
+ sprintf(buf, "%s.yyy",
+ srcc[d / 4]);
+ break;
+ case R300_ALU_ARGC_SRC0C_ZZZ:
+ sprintf(buf, "%s.zzz",
+ srcc[d / 4]);
+ break;
+ }
+ } else if (d < 15) {
+ sprintf(buf, "%s.www", srca[d - 12]);
+ } else if (d == 20) {
+ sprintf(buf, "0.0");
+ } else if (d == 21) {
+ sprintf(buf, "1.0");
+ } else if (d == 22) {
+ sprintf(buf, "0.5");
+ } else if (d >= 23 && d < 32) {
+ d -= 23;
+ switch (d / 3) {
+ case 0:
+ sprintf(buf, "%s.yzx",
+ srcc[d % 3]);
+ break;
+ case 1:
+ sprintf(buf, "%s.zxy",
+ srcc[d % 3]);
+ break;
+ case 2:
+ sprintf(buf, "%s.Wzy",
+ srcc[d % 3]);
+ break;
+ }
+ } else {
+ sprintf(buf, "%i", d);
+ }
+
+ sprintf(argc[j], "%s%s%s%s",
+ (regc & 32) ? "-" : "",
+ (regc & 64) ? "|" : "",
+ buf, (regc & 64) ? "|" : "");
+
+ d = rega & 31;
+ if (d < 9) {
+ sprintf(buf, "%s.%c", srcc[d / 3],
+ 'x' + (char)(d % 3));
+ } else if (d < 12) {
+ sprintf(buf, "%s.w", srca[d - 9]);
+ } else if (d == 16) {
+ sprintf(buf, "0.0");
+ } else if (d == 17) {
+ sprintf(buf, "1.0");
+ } else if (d == 18) {
+ sprintf(buf, "0.5");
+ } else {
+ sprintf(buf, "%i", d);
+ }
+
+ sprintf(arga[j], "%s%s%s%s",
+ (rega & 32) ? "-" : "",
+ (rega & 64) ? "|" : "",
+ buf, (rega & 64) ? "|" : "");
+ }
+
+ fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n"
+ " w: %8s %8s %8s op: %08x\n",
+ argc[0], argc[1], argc[2],
+ code->alu.inst[i].rgb_inst, arga[0], arga[1],
+ arga[2], code->alu.inst[i].alpha_inst);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
new file mode 100644
index 00000000000..0ac46dbd9cb
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ * Ben Skeggs <darktama@iinet.net.au>
+ * Jerome Glisse <j.glisse@gmail.com>
+ */
+#ifndef __R300_FRAGPROG_H_
+#define __R300_FRAGPROG_H_
+
+#include "shader/program.h"
+#include "shader/prog_instruction.h"
+
+#include "radeon_compiler.h"
+#include "radeon_program.h"
+
+
+extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
+
+extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c);
+
+extern GLboolean r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index b75656e7ee1..305dc074ee8 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -40,57 +40,43 @@
#include "r300_fragprog.h"
+#include "../r300_reg.h"
+
#include "radeon_program_pair.h"
#include "r300_fragprog_swizzle.h"
-#include "r300_reg.h"
+struct r300_emit_state {
+ struct r300_fragment_program_compiler * compiler;
+
+ unsigned current_node : 2;
+ unsigned node_first_tex : 8;
+ unsigned node_first_alu : 8;
+ uint32_t node_flags;
+};
+
#define PROG_CODE \
- struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
- struct r300_fragment_program_code *code = &c->code->r300
+ struct r300_emit_state * emit = (struct r300_emit_state*)data; \
+ struct r300_fragment_program_compiler *c = emit->compiler; \
+ struct r300_fragment_program_code *code = &c->code->code.r300
#define error(fmt, args...) do { \
- fprintf(stderr, "%s::%s(): " fmt "\n", \
+ rc_error(&c->Base, "%s::%s(): " fmt "\n", \
__FILE__, __FUNCTION__, ##args); \
} while(0)
-static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwindex)
-{
- PROG_CODE;
-
- for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) {
- if (code->constant[*hwindex].File == file &&
- code->constant[*hwindex].Index == index)
- break;
- }
-
- if (*hwindex >= code->const_nr) {
- if (*hwindex >= R300_PFS_NUM_CONST_REGS) {
- error("Out of hw constants!\n");
- return GL_FALSE;
- }
-
- code->const_nr++;
- code->constant[*hwindex].File = file;
- code->constant[*hwindex].Index = index;
- }
-
- return GL_TRUE;
-}
-
-
/**
* Mark a temporary register as used.
*/
static void use_temporary(struct r300_fragment_program_code *code, GLuint index)
{
- if (index > code->max_temp_idx)
- code->max_temp_idx = index;
+ if (index > code->pixsize)
+ code->pixsize = index;
}
-static GLuint translate_rgb_opcode(GLuint opcode)
+static GLuint translate_rgb_opcode(struct r300_fragment_program_compiler * c, GLuint opcode)
{
switch(opcode) {
case OPCODE_CMP: return R300_ALU_OUTC_CMP;
@@ -109,7 +95,7 @@ static GLuint translate_rgb_opcode(GLuint opcode)
}
}
-static GLuint translate_alpha_opcode(GLuint opcode)
+static GLuint translate_alpha_opcode(struct r300_fragment_program_compiler * c, GLuint opcode)
{
switch(opcode) {
case OPCODE_CMP: return R300_ALU_OUTA_CMP;
@@ -145,63 +131,62 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
int ip = code->alu.length++;
int j;
- code->node[code->cur_node].alu_end++;
- code->alu.inst[ip].inst0 = translate_rgb_opcode(inst->RGB.Opcode);
- code->alu.inst[ip].inst2 = translate_alpha_opcode(inst->Alpha.Opcode);
+ code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
+ code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
for(j = 0; j < 3; ++j) {
GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5);
if (!inst->RGB.Src[j].Constant)
use_temporary(code, inst->RGB.Src[j].Index);
- code->alu.inst[ip].inst1 |= src << (6*j);
+ code->alu.inst[ip].rgb_addr |= src << (6*j);
src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5);
if (!inst->Alpha.Src[j].Constant)
use_temporary(code, inst->Alpha.Src[j].Index);
- code->alu.inst[ip].inst3 |= src << (6*j);
+ code->alu.inst[ip].alpha_addr |= src << (6*j);
GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
arg |= inst->RGB.Arg[j].Abs << 6;
arg |= inst->RGB.Arg[j].Negate << 5;
- code->alu.inst[ip].inst0 |= arg << (7*j);
+ code->alu.inst[ip].rgb_inst |= arg << (7*j);
arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
arg |= inst->Alpha.Arg[j].Abs << 6;
arg |= inst->Alpha.Arg[j].Negate << 5;
- code->alu.inst[ip].inst2 |= arg << (7*j);
+ code->alu.inst[ip].alpha_inst |= arg << (7*j);
}
if (inst->RGB.Saturate)
- code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP;
+ code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
if (inst->Alpha.Saturate)
- code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP;
+ code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
if (inst->RGB.WriteMask) {
use_temporary(code, inst->RGB.DestIndex);
- code->alu.inst[ip].inst1 |=
+ code->alu.inst[ip].rgb_addr |=
(inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) |
(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
}
if (inst->RGB.OutputWriteMask) {
- code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT);
- code->node[code->cur_node].flags |= R300_RGBA_OUT;
+ code->alu.inst[ip].rgb_addr |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT);
+ emit->node_flags |= R300_RGBA_OUT;
}
if (inst->Alpha.WriteMask) {
use_temporary(code, inst->Alpha.DestIndex);
- code->alu.inst[ip].inst3 |=
+ code->alu.inst[ip].alpha_addr |=
(inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) |
R300_ALU_DSTA_REG;
}
if (inst->Alpha.OutputWriteMask) {
- code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT;
- code->node[code->cur_node].flags |= R300_RGBA_OUT;
+ code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT;
+ emit->node_flags |= R300_RGBA_OUT;
}
if (inst->Alpha.DepthWriteMask) {
- code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH;
- code->node[code->cur_node].flags |= R300_W_OUT;
- c->fp->writes_depth = GL_TRUE;
+ code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
+ emit->node_flags |= R300_W_OUT;
+ c->code->writes_depth = GL_TRUE;
}
return GL_TRUE;
@@ -211,31 +196,50 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
/**
* Finish the current node without advancing to the next one.
*/
-static GLboolean finish_node(struct r300_fragment_program_compiler *c)
+static GLboolean finish_node(struct r300_emit_state * emit)
{
- struct r300_fragment_program_code *code = &c->code->r300;
- struct r300_fragment_program_node *node = &code->node[code->cur_node];
+ struct r300_fragment_program_compiler * c = emit->compiler;
+ struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
- if (node->alu_end < 0) {
+ if (code->alu.length == emit->node_first_alu) {
/* Generate a single NOP for this node */
struct radeon_pair_instruction inst;
_mesa_bzero(&inst, sizeof(inst));
- if (!emit_alu(c, &inst))
+ if (!emit_alu(emit, &inst))
return GL_FALSE;
}
- if (node->tex_end < 0) {
- if (code->cur_node == 0) {
- node->tex_end = 0;
- } else {
- error("Node %i has no TEX instructions", code->cur_node);
+ unsigned alu_offset = emit->node_first_alu;
+ unsigned alu_end = code->alu.length - alu_offset - 1;
+ unsigned tex_offset = emit->node_first_tex;
+ unsigned tex_end = code->tex.length - tex_offset - 1;
+
+ if (code->tex.length == emit->node_first_tex) {
+ if (emit->current_node > 0) {
+ error("Node %i has no TEX instructions", emit->current_node);
return GL_FALSE;
}
+
+ tex_end = 0;
} else {
- if (code->cur_node == 0)
- code->first_node_has_tex = 1;
+ if (emit->current_node == 0)
+ code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
}
+ /* Write the config register.
+ * Note: The order in which the words for each node are written
+ * is not correct here and needs to be fixed up once we're entirely
+ * done
+ *
+ * Also note that the register specification from AMD is slightly
+ * incorrect in its description of this register. */
+ code->code_addr[emit->current_node] =
+ (alu_offset << R300_ALU_START_SHIFT) |
+ (alu_end << R300_ALU_SIZE_SHIFT) |
+ (tex_offset << R300_TEX_START_SHIFT) |
+ (tex_end << R300_TEX_SIZE_SHIFT) |
+ emit->node_flags;
+
return GL_TRUE;
}
@@ -248,30 +252,28 @@ static GLboolean begin_tex(void* data)
{
PROG_CODE;
- if (code->cur_node == 0) {
- if (code->node[0].alu_end < 0 &&
- code->node[0].tex_end < 0)
- return GL_TRUE;
+ if (code->alu.length == emit->node_first_alu &&
+ code->tex.length == emit->node_first_tex) {
+ return GL_TRUE;
}
- if (code->cur_node == 3) {
+ if (emit->current_node == 3) {
error("Too many texture indirections");
return GL_FALSE;
}
- if (!finish_node(c))
+ if (!finish_node(emit))
return GL_FALSE;
- struct r300_fragment_program_node *node = &code->node[++code->cur_node];
- node->alu_offset = code->alu.length;
- node->alu_end = -1;
- node->tex_offset = code->tex.length;
- node->tex_end = -1;
+ emit->current_node++;
+ emit->node_first_tex = code->tex.length;
+ emit->node_first_alu = code->alu.length;
+ emit->node_flags = 0;
return GL_TRUE;
}
-static GLboolean emit_tex(void* data, struct prog_instruction* inst)
+static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* inst)
{
PROG_CODE;
@@ -281,31 +283,30 @@ static GLboolean emit_tex(void* data, struct prog_instruction* inst)
}
GLuint unit = inst->TexSrcUnit;
- GLuint dest = inst->DstReg.Index;
+ GLuint dest = inst->DestIndex;
GLuint opcode;
switch(inst->Opcode) {
- case OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
- case OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
- case OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
- case OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
+ case RADEON_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
+ case RADEON_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
+ case RADEON_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
+ case RADEON_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
default:
error("Unknown texture opcode %i", inst->Opcode);
return GL_FALSE;
}
- if (inst->Opcode == OPCODE_KIL) {
+ if (inst->Opcode == RADEON_OPCODE_KIL) {
unit = 0;
dest = 0;
} else {
use_temporary(code, dest);
}
- use_temporary(code, inst->SrcReg[0].Index);
+ use_temporary(code, inst->SrcIndex);
- code->node[code->cur_node].tex_end++;
code->tex.inst[code->tex.length++] =
- (inst->SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
+ (inst->SrcIndex << R300_SRC_ADDR_SHIFT) |
(dest << R300_DST_ADDR_SHIFT) |
(unit << R300_TEX_ID_SHIFT) |
(opcode << R300_TEX_INST_SHIFT);
@@ -314,7 +315,6 @@ static GLboolean emit_tex(void* data, struct prog_instruction* inst)
static const struct radeon_pair_handler pair_handler = {
- .EmitConst = &emit_const,
.EmitPaired = &emit_alu,
.EmitTex = &emit_tex,
.BeginTexBlock = &begin_tex,
@@ -325,20 +325,36 @@ static const struct radeon_pair_handler pair_handler = {
* Final compilation step: Turn the intermediate radeon_program into
* machine-readable instructions.
*/
-GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
+void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
{
- struct r300_fragment_program_code *code = &compiler->code->r300;
-
- _mesa_bzero(code, sizeof(struct r300_fragment_program_code));
- code->node[0].alu_end = -1;
- code->node[0].tex_end = -1;
+ struct r300_emit_state emit;
+ struct r300_fragment_program_code *code = &compiler->code->code.r300;
- if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler))
- return GL_FALSE;
+ memset(&emit, 0, sizeof(emit));
+ emit.compiler = compiler;
- if (!finish_node(compiler))
- return GL_FALSE;
+ _mesa_bzero(code, sizeof(struct r300_fragment_program_code));
- return GL_TRUE;
+ radeonPairProgram(compiler, &pair_handler, &emit);
+ if (compiler->Base.Error)
+ return;
+
+ /* Finish the program */
+ finish_node(&emit);
+
+ code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
+ code->code_offset =
+ (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) |
+ ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) |
+ (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) |
+ ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT);
+
+ if (emit.current_node < 3) {
+ int shift = 3 - emit.current_node;
+ int i;
+ for(i = 0; i <= emit.current_node; ++i)
+ code->code_addr[shift + i] = code->code_addr[i];
+ for(i = 0; i < shift; ++i)
+ code->code_addr[i] = 0;
+ }
}
-
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
index fc9d855bce6..1b14cc3888b 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -33,8 +33,9 @@
#include "r300_fragprog_swizzle.h"
-#include "r300_reg.h"
+#include "../r300_reg.h"
#include "radeon_nqssadce.h"
+#include "radeon_compiler.h"
#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO))
@@ -174,18 +175,15 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst,
}
}
- struct prog_instruction *inst;
-
- _mesa_insert_instructions(s->Program, s->IP, 1);
- inst = s->Program->Instructions + s->IP++;
- inst->Opcode = OPCODE_MOV;
- inst->DstReg = dst;
- inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W);
- inst->SrcReg[0] = src;
- inst->SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE;
+ struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev);
+ inst->I.Opcode = OPCODE_MOV;
+ inst->I.DstReg = dst;
+ inst->I.DstReg.WriteMask &= (best_matchmask | WRITEMASK_W);
+ inst->I.SrcReg[0] = src;
+ inst->I.SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE;
/* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */
- dst.WriteMask &= ~inst->DstReg.WriteMask;
+ dst.WriteMask &= ~inst->I.DstReg.WriteMask;
}
}
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
index 231bf4eef5f..231bf4eef5f 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
new file mode 100644
index 00000000000..d39b82be71c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_statevars.h"
+
+#include "radeon_nqssadce.h"
+#include "radeon_program_alu.h"
+#include "r300_fragprog.h"
+#include "r300_fragprog_swizzle.h"
+#include "r500_fragprog.h"
+
+
+static void nqssadce_init(struct nqssadce_state* s)
+{
+ struct r300_fragment_program_compiler * c = s->UserData;
+ s->Outputs[c->OutputColor].Sourced = WRITEMASK_XYZW;
+ s->Outputs[c->OutputDepth].Sourced = WRITEMASK_W;
+}
+
+static void rewrite_depth_out(struct r300_fragment_program_compiler * c)
+{
+ struct rc_instruction *rci;
+
+ for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) {
+ struct prog_instruction * inst = &rci->I;
+
+ if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != c->OutputDepth)
+ continue;
+
+ if (inst->DstReg.WriteMask & WRITEMASK_Z) {
+ inst->DstReg.WriteMask = WRITEMASK_W;
+ } else {
+ inst->DstReg.WriteMask = 0;
+ continue;
+ }
+
+ switch (inst->Opcode) {
+ case OPCODE_FRC:
+ case OPCODE_MOV:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ break;
+ case OPCODE_ADD:
+ case OPCODE_MAX:
+ case OPCODE_MIN:
+ case OPCODE_MUL:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ break;
+ case OPCODE_CMP:
+ case OPCODE_MAD:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
+ break;
+ default:
+ // Scalar instructions needn't be reswizzled
+ break;
+ }
+ }
+}
+
+void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
+{
+ rewrite_depth_out(c);
+
+ if (c->is_r500) {
+ struct radeon_program_transformation transformations[] = {
+ { &r500_transform_TEX, c },
+ { &radeonTransformALU, 0 },
+ { &radeonTransformDeriv, 0 },
+ { &radeonTransformTrigScale, 0 }
+ };
+ radeonLocalTransform(&c->Base, 4, transformations);
+ } else {
+ struct radeon_program_transformation transformations[] = {
+ { &r300_transform_TEX, c },
+ { &radeonTransformALU, 0 },
+ { &radeonTransformTrigSimple, 0 }
+ };
+ radeonLocalTransform(&c->Base, 3, transformations);
+ }
+
+ if (c->Base.Debug) {
+ _mesa_printf("Fragment Program: After native rewrite:\n");
+ rc_print_program(&c->Base.Program);
+ fflush(stdout);
+ }
+
+ if (c->is_r500) {
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadce_init,
+ .IsNativeSwizzle = &r500FPIsNativeSwizzle,
+ .BuildSwizzle = &r500FPBuildSwizzle
+ };
+ radeonNqssaDce(&c->Base, &nqssadce, c);
+ } else {
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadce_init,
+ .IsNativeSwizzle = &r300FPIsNativeSwizzle,
+ .BuildSwizzle = &r300FPBuildSwizzle
+ };
+ radeonNqssaDce(&c->Base, &nqssadce, c);
+ }
+
+ if (c->Base.Debug) {
+ _mesa_printf("Compiler: after NqSSA-DCE:\n");
+ rc_print_program(&c->Base.Program);
+ fflush(stdout);
+ }
+
+ if (c->is_r500) {
+ r500BuildFragmentProgramHwCode(c);
+ } else {
+ r300BuildFragmentProgramHwCode(c);
+ }
+
+ rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
+
+ if (c->Base.Debug) {
+ if (c->is_r500) {
+ r500FragmentProgramDump(c->code);
+ } else {
+ r300FragmentProgramDump(c->code);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
new file mode 100644
index 00000000000..fc9c8f805ae
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -0,0 +1,615 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include "../r300_reg.h"
+
+#include "radeon_nqssadce.h"
+#include "radeon_program.h"
+#include "radeon_program_alu.h"
+
+#include "shader/prog_print.h"
+
+
+/*
+ * Take an already-setup and valid source then swizzle it appropriately to
+ * obtain a constant ZERO or ONE source.
+ */
+#define __CONST(x, y) \
+ (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \
+ t_swizzle(y), \
+ t_swizzle(y), \
+ t_swizzle(y), \
+ t_swizzle(y), \
+ t_src_class(vpi->SrcReg[x].File), \
+ NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4))
+
+
+static unsigned long t_dst_mask(GLuint mask)
+{
+ /* WRITEMASK_* is equivalent to VSF_FLAG_* */
+ return mask & WRITEMASK_XYZW;
+}
+
+static unsigned long t_dst_class(gl_register_file file)
+{
+
+ switch (file) {
+ case PROGRAM_TEMPORARY:
+ return PVS_DST_REG_TEMPORARY;
+ case PROGRAM_OUTPUT:
+ return PVS_DST_REG_OUT;
+ case PROGRAM_ADDRESS:
+ return PVS_DST_REG_A0;
+ /*
+ case PROGRAM_INPUT:
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_WRITE_ONLY:
+ case PROGRAM_ADDRESS:
+ */
+ default:
+ fprintf(stderr, "problem in %s", __FUNCTION__);
+ _mesa_exit(-1);
+ return -1;
+ }
+}
+
+static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
+ struct prog_dst_register *dst)
+{
+ if (dst->File == PROGRAM_OUTPUT)
+ return vp->outputs[dst->Index];
+
+ return dst->Index;
+}
+
+static unsigned long t_src_class(gl_register_file file)
+{
+ switch (file) {
+ case PROGRAM_TEMPORARY:
+ return PVS_SRC_REG_TEMPORARY;
+ case PROGRAM_INPUT:
+ return PVS_SRC_REG_INPUT;
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_STATE_VAR:
+ return PVS_SRC_REG_CONSTANT;
+ /*
+ case PROGRAM_OUTPUT:
+ case PROGRAM_WRITE_ONLY:
+ case PROGRAM_ADDRESS:
+ */
+ default:
+ fprintf(stderr, "problem in %s", __FUNCTION__);
+ _mesa_exit(-1);
+ return -1;
+ }
+}
+
+static GLboolean t_src_conflict(struct prog_src_register a, struct prog_src_register b)
+{
+ unsigned long aclass = t_src_class(a.File);
+ unsigned long bclass = t_src_class(b.File);
+
+ if (aclass != bclass)
+ return GL_FALSE;
+ if (aclass == PVS_SRC_REG_TEMPORARY)
+ return GL_FALSE;
+
+ if (a.RelAddr || b.RelAddr)
+ return GL_TRUE;
+ if (a.Index != b.Index)
+ return GL_TRUE;
+
+ return GL_FALSE;
+}
+
+static INLINE unsigned long t_swizzle(GLubyte swizzle)
+{
+ /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
+ return swizzle;
+}
+
+static unsigned long t_src_index(struct r300_vertex_program_code *vp,
+ struct prog_src_register *src)
+{
+ if (src->File == PROGRAM_INPUT) {
+ assert(vp->inputs[src->Index] != -1);
+ return vp->inputs[src->Index];
+ } else {
+ if (src->Index < 0) {
+ fprintf(stderr,
+ "negative offsets for indirect addressing do not work.\n");
+ return 0;
+ }
+ return src->Index;
+ }
+}
+
+/* these two functions should probably be merged... */
+
+static unsigned long t_src(struct r300_vertex_program_code *vp,
+ struct prog_src_register *src)
+{
+ /* src->Negate uses the NEGATE_ flags from program_instruction.h,
+ * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+ */
+ return PVS_SRC_OPERAND(t_src_index(vp, src),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 1)),
+ t_swizzle(GET_SWZ(src->Swizzle, 2)),
+ t_swizzle(GET_SWZ(src->Swizzle, 3)),
+ t_src_class(src->File),
+ src->Negate) | (src->RelAddr << 4);
+}
+
+static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
+ struct prog_src_register *src)
+{
+ /* src->Negate uses the NEGATE_ flags from program_instruction.h,
+ * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+ */
+ return PVS_SRC_OPERAND(t_src_index(vp, src),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_src_class(src->File),
+ src->Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (src->RelAddr << 4);
+}
+
+static GLboolean valid_dst(struct r300_vertex_program_code *vp,
+ struct prog_dst_register *dst)
+{
+ if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
+ return GL_FALSE;
+ } else if (dst->File == PROGRAM_ADDRESS) {
+ assert(dst->Index == 0);
+ }
+
+ return GL_TRUE;
+}
+
+static void ei_vector1(struct r300_vertex_program_code *vp,
+ GLuint hw_opcode,
+ struct prog_instruction *vpi,
+ GLuint * inst)
+{
+ inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &vpi->SrcReg[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+}
+
+static void ei_vector2(struct r300_vertex_program_code *vp,
+ GLuint hw_opcode,
+ struct prog_instruction *vpi,
+ GLuint * inst)
+{
+ inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+ GL_FALSE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &vpi->SrcReg[0]);
+ inst[2] = t_src(vp, &vpi->SrcReg[1]);
+ inst[3] = __CONST(1, SWIZZLE_ZERO);
+}
+
+static void ei_math1(struct r300_vertex_program_code *vp,
+ GLuint hw_opcode,
+ struct prog_instruction *vpi,
+ GLuint * inst)
+{
+ inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = __CONST(0, SWIZZLE_ZERO);
+}
+
+static void ei_lit(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst)
+{
+ //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
+
+ inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ /* NOTE: Users swizzling might not work. */
+ inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
+ PVS_SRC_SELECT_FORCE_0, // Z
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
+ t_src_class(vpi->SrcReg[0].File),
+ vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (vpi->SrcReg[0].RelAddr << 4);
+ inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
+ PVS_SRC_SELECT_FORCE_0, // Z
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
+ t_src_class(vpi->SrcReg[0].File),
+ vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (vpi->SrcReg[0].RelAddr << 4);
+ inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
+ PVS_SRC_SELECT_FORCE_0, // Z
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
+ t_src_class(vpi->SrcReg[0].File),
+ vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ (vpi->SrcReg[0].RelAddr << 4);
+}
+
+static void ei_mad(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst)
+{
+ inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
+ GL_FALSE,
+ GL_TRUE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &vpi->SrcReg[0]);
+ inst[2] = t_src(vp, &vpi->SrcReg[1]);
+ inst[3] = t_src(vp, &vpi->SrcReg[2]);
+}
+
+static void ei_pow(struct r300_vertex_program_code *vp,
+ struct prog_instruction *vpi,
+ GLuint * inst)
+{
+ inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
+ GL_TRUE,
+ GL_FALSE,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
+ inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
+}
+
+
+static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
+{
+ struct rc_instruction *rci;
+
+ compiler->code->pos_end = 0; /* Not supported yet */
+ compiler->code->length = 0;
+
+ compiler->SetHwInputOutput(compiler);
+
+ for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
+ struct prog_instruction *vpi = &rci->I;
+ GLuint *inst = compiler->code->body.d + compiler->code->length;
+
+ /* Skip instructions writing to non-existing destination */
+ if (!valid_dst(compiler->code, &vpi->DstReg))
+ continue;
+
+ if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
+ rc_error(&compiler->Base, "Vertex program has too many instructions\n");
+ return;
+ }
+
+ switch (vpi->Opcode) {
+ case OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
+ case OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
+ case OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
+ case OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
+ case OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
+ case OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
+ case OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
+ case OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
+ case OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
+ case OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
+ case OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
+ case OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
+ case OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
+ case OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
+ case OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
+ case OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
+ case OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
+ case OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
+ case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
+ case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
+ default:
+ rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode);
+ return;
+ }
+
+ compiler->code->length += 4;
+
+ if (compiler->Base.Error)
+ return;
+ }
+}
+
+struct temporary_allocation {
+ GLuint Allocated:1;
+ GLuint HwTemp:15;
+ struct rc_instruction * LastRead;
+};
+
+static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
+{
+ struct rc_instruction *inst;
+ GLuint num_orig_temps = 0;
+ GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS];
+ struct temporary_allocation * ta;
+ GLuint i, j;
+
+ compiler->code->num_temporaries = 0;
+ memset(hwtemps, 0, sizeof(hwtemps));
+
+ /* Pass 1: Count original temporaries and allocate structures */
+ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+ GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+
+ for (i = 0; i < numsrcs; ++i) {
+ if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
+ if (inst->I.SrcReg[i].Index >= num_orig_temps)
+ num_orig_temps = inst->I.SrcReg[i].Index + 1;
+ }
+ }
+
+ if (numdsts) {
+ if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
+ if (inst->I.DstReg.Index >= num_orig_temps)
+ num_orig_temps = inst->I.DstReg.Index + 1;
+ }
+ }
+ }
+
+ ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
+ sizeof(struct temporary_allocation) * num_orig_temps);
+ memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
+
+ /* Pass 2: Determine original temporary lifetimes */
+ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+ GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+
+ for (i = 0; i < numsrcs; ++i) {
+ if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY)
+ ta[inst->I.SrcReg[i].Index].LastRead = inst;
+ }
+ }
+
+ /* Pass 3: Register allocation */
+ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+ GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+
+ for (i = 0; i < numsrcs; ++i) {
+ if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
+ GLuint orig = inst->I.SrcReg[i].Index;
+ inst->I.SrcReg[i].Index = ta[orig].HwTemp;
+
+ if (ta[orig].Allocated && inst == ta[orig].LastRead)
+ hwtemps[ta[orig].HwTemp] = GL_FALSE;
+ }
+ }
+
+ if (numdsts) {
+ if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
+ GLuint orig = inst->I.DstReg.Index;
+
+ if (!ta[orig].Allocated) {
+ for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
+ if (!hwtemps[j])
+ break;
+ }
+ if (j >= VSF_MAX_FRAGMENT_TEMPS) {
+ fprintf(stderr, "Out of hw temporaries\n");
+ } else {
+ ta[orig].Allocated = GL_TRUE;
+ ta[orig].HwTemp = j;
+ hwtemps[j] = GL_TRUE;
+
+ if (j >= compiler->code->num_temporaries)
+ compiler->code->num_temporaries = j + 1;
+ }
+ }
+
+ inst->I.DstReg.Index = ta[orig].HwTemp;
+ }
+ }
+ }
+}
+
+
+/**
+ * Vertex engine cannot read two inputs or two constants at the same time.
+ * Introduce intermediate MOVs to temporary registers to account for this.
+ */
+static GLboolean transform_source_conflicts(
+ struct radeon_compiler *c,
+ struct rc_instruction* inst,
+ void* unused)
+{
+ GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode);
+
+ if (num_operands == 3) {
+ if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2])
+ || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) {
+ int tmpreg = rc_find_free_temporary(c);
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+ inst_mov->I.Opcode = OPCODE_MOV;
+ inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mov->I.DstReg.Index = tmpreg;
+ inst_mov->I.SrcReg[0] = inst->I.SrcReg[2];
+
+ reset_srcreg(&inst->I.SrcReg[2]);
+ inst->I.SrcReg[2].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[2].Index = tmpreg;
+ }
+ }
+
+ if (num_operands >= 2) {
+ if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) {
+ int tmpreg = rc_find_free_temporary(c);
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+ inst_mov->I.Opcode = OPCODE_MOV;
+ inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mov->I.DstReg.Index = tmpreg;
+ inst_mov->I.SrcReg[0] = inst->I.SrcReg[1];
+
+ reset_srcreg(&inst->I.SrcReg[1]);
+ inst->I.SrcReg[1].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[1].Index = tmpreg;
+ }
+ }
+
+ return GL_TRUE;
+}
+
+static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
+{
+ int i;
+
+ for(i = 0; i < 32; ++i) {
+ if ((compiler->RequiredOutputs & (1 << i)) &&
+ !(compiler->Base.Program.OutputsWritten & (1 << i))) {
+ struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
+ inst->I.Opcode = OPCODE_MOV;
+
+ inst->I.DstReg.File = PROGRAM_OUTPUT;
+ inst->I.DstReg.Index = i;
+ inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+
+ inst->I.SrcReg[0].File = PROGRAM_CONSTANT;
+ inst->I.SrcReg[0].Index = 0;
+ inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ compiler->Base.Program.OutputsWritten |= 1 << i;
+ }
+ }
+}
+
+static void nqssadceInit(struct nqssadce_state* s)
+{
+ struct r300_vertex_program_compiler * compiler = s->UserData;
+ int i;
+
+ for(i = 0; i < VERT_RESULT_MAX; ++i) {
+ if (compiler->RequiredOutputs & (1 << i))
+ s->Outputs[i].Sourced = WRITEMASK_XYZW;
+ }
+}
+
+static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
+{
+ (void) opcode;
+ (void) reg;
+
+ return GL_TRUE;
+}
+
+
+
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
+{
+ addArtificialOutputs(compiler);
+
+ {
+ struct radeon_program_transformation transformations[] = {
+ { &r300_transform_vertex_alu, 0 },
+ };
+ radeonLocalTransform(&compiler->Base, 1, transformations);
+ }
+
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Vertex program after native rewrite:\n");
+ rc_print_program(&compiler->Base.Program);
+ fflush(stdout);
+ }
+
+ {
+ /* Note: This pass has to be done seperately from ALU rewrite,
+ * otherwise non-native ALU instructions with source conflits
+ * will not be treated properly.
+ */
+ struct radeon_program_transformation transformations[] = {
+ { &transform_source_conflicts, 0 },
+ };
+ radeonLocalTransform(&compiler->Base, 1, transformations);
+ }
+
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Vertex program after source conflict resolve:\n");
+ rc_print_program(&compiler->Base.Program);
+ fflush(stdout);
+ }
+
+ {
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadceInit,
+ .IsNativeSwizzle = &swizzleIsNative,
+ .BuildSwizzle = NULL
+ };
+ radeonNqssaDce(&compiler->Base, &nqssadce, compiler);
+
+ /* We need this step for reusing temporary registers */
+ allocate_temporary_registers(compiler);
+
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Vertex program after NQSSADCE:\n");
+ rc_print_program(&compiler->Base.Program);
+ fflush(stdout);
+ }
+ }
+
+ translate_vertex_program(compiler);
+
+ rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants);
+
+ compiler->code->InputsRead = compiler->Base.Program.InputsRead;
+ compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
+
+ if (compiler->Base.Debug) {
+ printf("Final vertex program code:\n");
+ r300_vertex_program_dump(compiler->code);
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
new file mode 100644
index 00000000000..39cc6953ba5
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_code.h"
+
+#include <stdio.h>
+
+static char* r300_vs_ve_ops[] = {
+ /* R300 vector ops */
+ " VE_NO_OP",
+ " VE_DOT_PRODUCT",
+ " VE_MULTIPLY",
+ " VE_ADD",
+ " VE_MULTIPLY_ADD",
+ " VE_DISTANCE_FACTOR",
+ " VE_FRACTION",
+ " VE_MAXIMUM",
+ " VE_MINIMUM",
+ "VE_SET_GREATER_THAN_EQUAL",
+ " VE_SET_LESS_THAN",
+ " VE_MULTIPLYX2_ADD",
+ " VE_MULTIPLY_CLAMP",
+ " VE_FLT2FIX_DX",
+ " VE_FLT2FIX_DX_RND",
+ /* R500 vector ops */
+ " VE_PRED_SET_EQ_PUSH",
+ " VE_PRED_SET_GT_PUSH",
+ " VE_PRED_SET_GTE_PUSH",
+ " VE_PRED_SET_NEQ_PUSH",
+ " VE_COND_WRITE_EQ",
+ " VE_COND_WRITE_GT",
+ " VE_COND_WRITE_GTE",
+ " VE_COND_WRITE_NEQ",
+ " VE_SET_GREATER_THAN",
+ " VE_SET_EQUAL",
+ " VE_SET_NOT_EQUAL",
+ " (reserved)",
+ " (reserved)",
+ " (reserved)",
+};
+
+static char* r300_vs_me_ops[] = {
+ /* R300 math ops */
+ " ME_NO_OP",
+ " ME_EXP_BASE2_DX",
+ " ME_LOG_BASE2_DX",
+ " ME_EXP_BASEE_FF",
+ " ME_LIGHT_COEFF_DX",
+ " ME_POWER_FUNC_FF",
+ " ME_RECIP_DX",
+ " ME_RECIP_FF",
+ " ME_RECIP_SQRT_DX",
+ " ME_RECIP_SQRT_FF",
+ " ME_MULTIPLY",
+ " ME_EXP_BASE2_FULL_DX",
+ " ME_LOG_BASE2_FULL_DX",
+ " ME_POWER_FUNC_FF_CLAMP_B",
+ "ME_POWER_FUNC_FF_CLAMP_B1",
+ "ME_POWER_FUNC_FF_CLAMP_01",
+ " ME_SIN",
+ " ME_COS",
+ /* R500 math ops */
+ " ME_LOG_BASE2_IEEE",
+ " ME_RECIP_IEEE",
+ " ME_RECIP_SQRT_IEEE",
+ " ME_PRED_SET_EQ",
+ " ME_PRED_SET_GT",
+ " ME_PRED_SET_GTE",
+ " ME_PRED_SET_NEQ",
+ " ME_PRED_SET_CLR",
+ " ME_PRED_SET_INV",
+ " ME_PRED_SET_POP",
+ " ME_PRED_SET_RESTORE",
+ " (reserved)",
+ " (reserved)",
+ " (reserved)",
+};
+
+/* XXX refactor to avoid clashing symbols */
+static char* r300_vs_src_debug[] = {
+ "t",
+ "i",
+ "c",
+ "a",
+};
+
+static char* r300_vs_dst_debug[] = {
+ "t",
+ "a0",
+ "o",
+ "ox",
+ "a",
+ "i",
+ "u",
+ "u",
+};
+
+static char* r300_vs_swiz_debug[] = {
+ "X",
+ "Y",
+ "Z",
+ "W",
+ "0",
+ "1",
+ "U",
+ "U",
+};
+
+
+static void r300_vs_op_dump(uint32_t op)
+{
+ printf(" dst: %d%s op: ",
+ (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]);
+ if (op & 0x80) {
+ if (op & 0x1) {
+ printf("PVS_MACRO_OP_2CLK_M2X_ADD\n");
+ } else {
+ printf(" PVS_MACRO_OP_2CLK_MADD\n");
+ }
+ } else if (op & 0x40) {
+ printf("%s\n", r300_vs_me_ops[op & 0x1f]);
+ } else {
+ printf("%s\n", r300_vs_ve_ops[op & 0x1f]);
+ }
+}
+
+static void r300_vs_src_dump(uint32_t src)
+{
+ printf(" reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n",
+ (src >> 5) & 0x7f, r300_vs_src_debug[src & 0x3],
+ src & (1 << 25) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 13) & 0x7],
+ src & (1 << 26) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 16) & 0x7],
+ src & (1 << 27) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 19) & 0x7],
+ src & (1 << 28) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 22) & 0x7]);
+}
+
+void r300_vertex_program_dump(struct r300_vertex_program_code * vs)
+{
+ unsigned instrcount = vs->length / 4;
+ unsigned i;
+
+ for(i = 0; i < instrcount; i++) {
+ unsigned offset = i*4;
+ unsigned src;
+
+ printf("%d: op: 0x%08x", i, vs->body.d[offset]);
+ r300_vs_op_dump(vs->body.d[offset]);
+
+ for(src = 0; src < 3; ++src) {
+ printf(" src%i: 0x%08x", src, vs->body.d[offset+1+src]);
+ r300_vs_src_dump(vs->body.d[offset+1+src]);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index 4d58cf21622..7e2faed6908 100644
--- a/src/mesa/drivers/dri/r300/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -27,158 +27,141 @@
#include "r500_fragprog.h"
-static void reset_srcreg(struct prog_src_register* reg)
-{
- _mesa_bzero(reg, sizeof(*reg));
- reg->Swizzle = SWIZZLE_NOOP;
-}
+#include "../r300_reg.h"
-static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
+static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
{
- gl_state_index fail_value_tokens[STATE_LENGTH] = {
- STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
- };
struct prog_src_register reg = { 0, };
- fail_value_tokens[2] = tmu;
reg.File = PROGRAM_STATE_VAR;
- reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
+ reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu);
reg.Swizzle = SWIZZLE_WWWW;
return reg;
}
/**
* Transform TEX, TXP, TXB, and KIL instructions in the following way:
- * - premultiply texture coordinates for RECT
- * - extract operand swizzles
- * - introduce a temporary register when write masks are needed
- *
+ * - implement texture compare (shadow extensions)
+ * - extract non-native source / destination operands
*/
GLboolean r500_transform_TEX(
- struct radeon_transform_context *t,
- struct prog_instruction* orig_inst, void* data)
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data)
{
struct r300_fragment_program_compiler *compiler =
(struct r300_fragment_program_compiler*)data;
- struct prog_instruction inst = *orig_inst;
- struct prog_instruction* tgt;
- GLboolean destredirect = GL_FALSE;
-
- if (inst.Opcode != OPCODE_TEX &&
- inst.Opcode != OPCODE_TXB &&
- inst.Opcode != OPCODE_TXP &&
- inst.Opcode != OPCODE_KIL)
+
+ if (inst->I.Opcode != OPCODE_TEX &&
+ inst->I.Opcode != OPCODE_TXB &&
+ inst->I.Opcode != OPCODE_TXP &&
+ inst->I.Opcode != OPCODE_KIL)
return GL_FALSE;
/* ARB_shadow & EXT_shadow_funcs */
- if (inst.Opcode != OPCODE_KIL &&
- t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
+ if (inst->I.Opcode != OPCODE_KIL &&
+ c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) {
+ GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
- tgt = radeonAppendInstructions(t->Program, 1);
+ inst->I.Opcode = OPCODE_MOV;
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg = inst.DstReg;
if (comparefunc == GL_ALWAYS) {
- tgt->SrcReg[0].File = PROGRAM_BUILTIN;
- tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
+ inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
+ inst->I.SrcReg[0].Swizzle = SWIZZLE_1111;
} else {
- tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
+ inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit);
}
+
return GL_TRUE;
+ } else {
+ GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
+ GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode;
+ struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
+ struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
+ struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
+ int pass, fail;
+
+ inst_rcp->I.Opcode = OPCODE_RCP;
+ inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_rcp->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
+ inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0];
+ inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
+
+ inst_cmp->I.DstReg = inst->I.DstReg;
+ inst->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst->I.DstReg.Index = rc_find_free_temporary(c);
+ inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+
+ inst_mad->I.Opcode = OPCODE_MAD;
+ inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mad->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mad->I.SrcReg[0] = inst->I.SrcReg[0];
+ inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
+ inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY;
+ inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index;
+ inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
+ inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY;
+ inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index;
+ if (depthmode == 0) /* GL_LUMINANCE */
+ inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
+ else if (depthmode == 2) /* GL_ALPHA */
+ inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW;
+
+ /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
+ * r < tex <=> -tex+r < 0
+ * r >= tex <=> not (-tex+r < 0 */
+ if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
+ inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW;
+ else
+ inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW;
+
+ inst_cmp->I.Opcode = OPCODE_CMP;
+ /* DstReg has been filled out above */
+ inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index;
+
+ if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
+ pass = 1;
+ fail = 2;
+ } else {
+ pass = 2;
+ fail = 1;
+ }
+
+ inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN;
+ inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111;
+ inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit);
}
+ }
- inst.DstReg.File = PROGRAM_TEMPORARY;
- inst.DstReg.Index = radeonFindFreeTemporary(t);
- inst.DstReg.WriteMask = WRITEMASK_XYZW;
- } else if (inst.Opcode != OPCODE_KIL && inst.DstReg.File != PROGRAM_TEMPORARY) {
- int tempreg = radeonFindFreeTemporary(t);
+ /* Cannot write texture to output registers */
+ if (inst->I.Opcode != OPCODE_KIL && inst->I.DstReg.File != PROGRAM_TEMPORARY) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
- inst.DstReg.File = PROGRAM_TEMPORARY;
- inst.DstReg.Index = tempreg;
- inst.DstReg.WriteMask = WRITEMASK_XYZW;
- destredirect = GL_TRUE;
- }
+ inst_mov->I.Opcode = OPCODE_MOV;
+ inst_mov->I.DstReg = inst->I.DstReg;
+ inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c);
- if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
- int tmpreg = radeonFindFreeTemporary(t);
- tgt = radeonAppendInstructions(t->Program, 1);
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg.File = PROGRAM_TEMPORARY;
- tgt->DstReg.Index = tmpreg;
- tgt->SrcReg[0] = inst.SrcReg[0];
-
- reset_srcreg(&inst.SrcReg[0]);
- inst.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst.SrcReg[0].Index = tmpreg;
+ inst->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
+ inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
}
- tgt = radeonAppendInstructions(t->Program, 1);
- _mesa_copy_instructions(tgt, &inst, 1);
-
- if (inst.Opcode != OPCODE_KIL &&
- t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
- GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode;
- int rcptemp = radeonFindFreeTemporary(t);
- int pass, fail;
-
- tgt = radeonAppendInstructions(t->Program, 3);
-
- tgt[0].Opcode = OPCODE_RCP;
- tgt[0].DstReg.File = PROGRAM_TEMPORARY;
- tgt[0].DstReg.Index = rcptemp;
- tgt[0].DstReg.WriteMask = WRITEMASK_W;
- tgt[0].SrcReg[0] = inst.SrcReg[0];
- tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
-
- tgt[1].Opcode = OPCODE_MAD;
- tgt[1].DstReg = inst.DstReg;
- tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
- tgt[1].SrcReg[0] = inst.SrcReg[0];
- tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
- tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
- tgt[1].SrcReg[1].Index = rcptemp;
- tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
- tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
- tgt[1].SrcReg[2].Index = inst.DstReg.Index;
- if (depthmode == 0) /* GL_LUMINANCE */
- tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
- else if (depthmode == 2) /* GL_ALPHA */
- tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
-
- /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
- * r < tex <=> -tex+r < 0
- * r >= tex <=> not (-tex+r < 0 */
- if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
- tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW;
- else
- tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW;
-
- tgt[2].Opcode = OPCODE_CMP;
- tgt[2].DstReg = orig_inst->DstReg;
- tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
- tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
-
- if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
- pass = 1;
- fail = 2;
- } else {
- pass = 2;
- fail = 1;
- }
+ /* Cannot read texture coordinate from constants file */
+ if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
- tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
- tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
- tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
- } else if (destredirect) {
- tgt = radeonAppendInstructions(t->Program, 1);
+ inst_mov->I.Opcode = OPCODE_MOV;
+ inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mov->I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mov->I.SrcReg[0] = inst->I.SrcReg[0];
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg = orig_inst->DstReg;
- tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
- tgt->SrcReg[0].Index = inst.DstReg.Index;
+ reset_srcreg(&inst->I.SrcReg[0]);
+ inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index;
}
return GL_TRUE;
@@ -249,7 +232,6 @@ GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg)
*/
void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src)
{
- struct prog_instruction *inst;
GLuint negatebase[2] = { 0, 0 };
int i;
@@ -260,20 +242,16 @@ void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst,
negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
}
- _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0));
- inst = s->Program->Instructions + s->IP;
-
for(i = 0; i <= 1; ++i) {
if (!negatebase[i])
continue;
- inst->Opcode = OPCODE_MOV;
- inst->DstReg = dst;
- inst->DstReg.WriteMask = negatebase[i];
- inst->SrcReg[0] = src;
- inst->SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW;
- inst++;
- s->IP++;
+ struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev);
+ inst->I.Opcode = OPCODE_MOV;
+ inst->I.DstReg = dst;
+ inst->I.DstReg.WriteMask = negatebase[i];
+ inst->I.SrcReg[0] = src;
+ inst->I.SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW;
}
}
@@ -375,9 +353,9 @@ static char *to_texop(int val)
return NULL;
}
-void r500FragmentProgramDump(union rX00_fragment_program_code *c)
+void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
{
- struct r500_fragment_program_code *code = &c->r500;
+ struct r500_fragment_program_code *code = &c->code.r500;
fprintf(stderr, "R500 Fragment Program:\n--------\n");
int n;
@@ -385,15 +363,6 @@ void r500FragmentProgramDump(union rX00_fragment_program_code *c)
uint32_t inst0;
char *str = NULL;
- if (code->const_nr) {
- fprintf(stderr, "--------\nConstants:\n");
- for (n = 0; n < code->const_nr; n++) {
- fprintf(stderr, "Constant %d: %i[%i]\n", n,
- code->constant[n].File, code->constant[n].Index);
- }
- fprintf(stderr, "--------\n");
- }
-
for (n = 0; n < code->inst_end+1; n++) {
inst0 = inst = code->inst[n].inst0;
fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
index 1179bf66073..9091f65cd20 100644
--- a/src/mesa/drivers/dri/r300/r500_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
@@ -36,17 +36,20 @@
#include "shader/prog_parameter.h"
#include "shader/prog_instruction.h"
-#include "r300_context.h"
+#include "radeon_compiler.h"
#include "radeon_nqssadce.h"
-extern GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
+extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
-extern void r500FragmentProgramDump(union rX00_fragment_program_code *c);
+extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c);
extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg);
extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src);
-extern GLboolean r500_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data);
+extern GLboolean r500_transform_TEX(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data);
#endif
diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 30f4514897e..d694725c9bb 100644
--- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -45,47 +45,22 @@
#include "r500_fragprog.h"
+#include "../r300_reg.h"
+
#include "radeon_program_pair.h"
#define PROG_CODE \
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
- struct r500_fragment_program_code *code = &c->code->r500
+ struct r500_fragment_program_code *code = &c->code->code.r500
#define error(fmt, args...) do { \
- fprintf(stderr, "%s::%s(): " fmt "\n", \
+ rc_error(&c->Base, "%s::%s(): " fmt "\n", \
__FILE__, __FUNCTION__, ##args); \
} while(0)
-/**
- * Callback to register hardware constants.
- */
-static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex)
-{
- PROG_CODE;
-
- for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) {
- if (code->constant[*hwindex].File == file &&
- code->constant[*hwindex].Index == idx)
- break;
- }
-
- if (*hwindex >= code->const_nr) {
- if (*hwindex >= R500_PFS_NUM_CONST_REGS) {
- error("Out of hw constants!\n");
- return GL_FALSE;
- }
-
- code->const_nr++;
- code->constant[*hwindex].File = file;
- code->constant[*hwindex].Index = idx;
- }
-
- return GL_TRUE;
-}
-
-static GLuint translate_rgb_op(GLuint opcode)
+static GLuint translate_rgb_op(struct r300_fragment_program_compiler *c, GLuint opcode)
{
switch(opcode) {
case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
@@ -106,7 +81,7 @@ static GLuint translate_rgb_op(GLuint opcode)
}
}
-static GLuint translate_alpha_op(GLuint opcode)
+static GLuint translate_alpha_op(struct r300_fragment_program_compiler *c, GLuint opcode)
{
switch(opcode) {
case OPCODE_CMP: return R500_ALPHA_OP_CMP;
@@ -189,8 +164,8 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
int ip = ++code->inst_end;
- code->inst[ip].inst5 = translate_rgb_op(inst->RGB.Opcode);
- code->inst[ip].inst4 = translate_alpha_op(inst->Alpha.Opcode);
+ code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
+ code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask)
code->inst[ip].inst0 = R500_INST_TYPE_OUT;
@@ -202,7 +177,7 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
if (inst->Alpha.DepthWriteMask) {
code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
- c->fp->writes_depth = GL_TRUE;
+ c->code->writes_depth = GL_TRUE;
}
code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
@@ -234,19 +209,19 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
return GL_TRUE;
}
-static GLuint translate_strq_swizzle(struct prog_src_register src)
+static GLuint translate_strq_swizzle(GLuint swizzle)
{
GLuint swiz = 0;
int i;
for (i = 0; i < 4; i++)
- swiz |= (GET_SWZ(src.Swizzle, i) & 0x3) << i*2;
+ swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
return swiz;
}
/**
* Emit a single TEX instruction
*/
-static GLboolean emit_tex(void *data, struct prog_instruction *inst)
+static GLboolean emit_tex(void *data, struct radeon_pair_texture_instruction *inst)
{
PROG_CODE;
@@ -258,7 +233,7 @@ static GLboolean emit_tex(void *data, struct prog_instruction *inst)
int ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_TEX
- | (inst->DstReg.WriteMask << 11)
+ | (inst->WriteMask << 11)
| R500_INST_TEX_SEM_WAIT;
code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
| R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
@@ -267,25 +242,25 @@ static GLboolean emit_tex(void *data, struct prog_instruction *inst)
code->inst[ip].inst1 |= R500_TEX_UNSCALED;
switch (inst->Opcode) {
- case OPCODE_KIL:
+ case RADEON_OPCODE_KIL:
code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
break;
- case OPCODE_TEX:
+ case RADEON_OPCODE_TEX:
code->inst[ip].inst1 |= R500_TEX_INST_LD;
break;
- case OPCODE_TXB:
+ case RADEON_OPCODE_TXB:
code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
break;
- case OPCODE_TXP:
+ case RADEON_OPCODE_TXP:
code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
break;
default:
error("emit_tex can't handle opcode %x\n", inst->Opcode);
}
- code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
- | (translate_strq_swizzle(inst->SrcReg[0]) << 8)
- | R500_TEX_DST_ADDR(inst->DstReg.Index)
+ code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcIndex)
+ | (translate_strq_swizzle(inst->SrcSwizzle) << 8)
+ | R500_TEX_DST_ADDR(inst->DestIndex)
| R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
| R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
@@ -293,35 +268,32 @@ static GLboolean emit_tex(void *data, struct prog_instruction *inst)
}
static const struct radeon_pair_handler pair_handler = {
- .EmitConst = emit_const,
.EmitPaired = emit_paired,
.EmitTex = emit_tex,
.MaxHwTemps = 128
};
-GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
+void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
{
- struct r500_fragment_program_code *code = &compiler->code->r500;
+ struct r500_fragment_program_code *code = &compiler->code->code.r500;
_mesa_bzero(code, sizeof(*code));
code->max_temp_idx = 1;
- code->inst_offset = 0;
code->inst_end = -1;
- if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler))
- return GL_FALSE;
+ radeonPairProgram(compiler, &pair_handler, compiler);
+ if (compiler->Base.Error)
+ return;
if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
/* This may happen when dead-code elimination is disabled or
* when most of the fragment program logic is leading to a KIL */
if (code->inst_end >= 511) {
- error("Introducing fake OUT: Too many instructions");
- return GL_FALSE;
+ rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
+ return;
}
int ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
}
-
- return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.c b/src/mesa/drivers/dri/r300/compiler/radeon_code.c
new file mode 100644
index 00000000000..c7923004df9
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "main/mtypes.h"
+#include "shader/prog_instruction.h"
+
+#include "radeon_code.h"
+
+void rc_constants_init(struct rc_constant_list * c)
+{
+ memset(c, 0, sizeof(*c));
+}
+
+/**
+ * Copy a constants structure, assuming that the destination structure
+ * is not initialized.
+ */
+void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src)
+{
+ dst->Constants = malloc(sizeof(struct rc_constant) * src->Count);
+ memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count);
+ dst->Count = src->Count;
+ dst->_Reserved = src->Count;
+}
+
+void rc_constants_destroy(struct rc_constant_list * c)
+{
+ free(c->Constants);
+ memset(c, 0, sizeof(*c));
+}
+
+unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant)
+{
+ unsigned index = c->Count;
+
+ if (c->Count >= c->_Reserved) {
+ struct rc_constant * newlist;
+
+ c->_Reserved = c->_Reserved * 2;
+ if (!c->_Reserved)
+ c->_Reserved = 16;
+
+ newlist = malloc(sizeof(struct rc_constant) * c->_Reserved);
+ memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count);
+
+ free(c->Constants);
+ c->Constants = newlist;
+ }
+
+ c->Constants[index] = *constant;
+ c->Count++;
+
+ return index;
+}
+
+
+/**
+ * Add a state vector to the constant list, while trying to avoid duplicates.
+ */
+unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1)
+{
+ unsigned index;
+ struct rc_constant constant;
+
+ for(index = 0; index < c->Count; ++index) {
+ if (c->Constants[index].Type == RC_CONSTANT_STATE) {
+ if (c->Constants[index].u.State[0] == state0 &&
+ c->Constants[index].u.State[1] == state1)
+ return index;
+ }
+ }
+
+ memset(&constant, 0, sizeof(constant));
+ constant.Type = RC_CONSTANT_STATE;
+ constant.Size = 4;
+ constant.u.State[0] = state0;
+ constant.u.State[1] = state1;
+
+ return rc_constants_add(c, &constant);
+}
+
+
+/**
+ * Add an immediate vector to the constant list, while trying to avoid
+ * duplicates.
+ */
+unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data)
+{
+ unsigned index;
+ struct rc_constant constant;
+
+ for(index = 0; index < c->Count; ++index) {
+ if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+ if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4))
+ return index;
+ }
+ }
+
+ memset(&constant, 0, sizeof(constant));
+ constant.Type = RC_CONSTANT_IMMEDIATE;
+ constant.Size = 4;
+ memcpy(constant.u.Immediate, data, sizeof(float) * 4);
+
+ return rc_constants_add(c, &constant);
+}
+
+
+/**
+ * Add an immediate scalar to the constant list, while trying to avoid
+ * duplicates.
+ */
+unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle)
+{
+ unsigned index;
+ int free_index = -1;
+ struct rc_constant constant;
+
+ for(index = 0; index < c->Count; ++index) {
+ if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+ for(unsigned comp = 0; comp < c->Constants[index].Size; ++comp) {
+ if (c->Constants[index].u.Immediate[comp] == data) {
+ *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
+ return index;
+ }
+ }
+
+ if (c->Constants[index].Size < 4)
+ free_index = index;
+ }
+ }
+
+ if (free_index >= 0) {
+ unsigned comp = c->Constants[free_index].Size++;
+ c->Constants[free_index].u.Immediate[comp] = data;
+ *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
+ return free_index;
+ }
+
+ memset(&constant, 0, sizeof(constant));
+ constant.Type = RC_CONSTANT_IMMEDIATE;
+ constant.Size = 1;
+ constant.u.Immediate[0] = data;
+ *swizzle = SWIZZLE_XXXX;
+
+ return rc_constants_add(c, &constant);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
new file mode 100644
index 00000000000..0806fb1b5c6
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -0,0 +1,206 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_CODE_H
+#define RADEON_CODE_H
+
+#include <stdint.h>
+
+#define R300_PFS_MAX_ALU_INST 64
+#define R300_PFS_MAX_TEX_INST 32
+#define R300_PFS_MAX_TEX_INDIRECT 4
+#define R300_PFS_NUM_TEMP_REGS 32
+#define R300_PFS_NUM_CONST_REGS 32
+
+#define R500_PFS_MAX_INST 512
+#define R500_PFS_NUM_TEMP_REGS 128
+#define R500_PFS_NUM_CONST_REGS 256
+
+
+#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
+
+enum {
+ /**
+ * External constants are constants whose meaning is unknown to this
+ * compiler. For example, a Mesa gl_program's constants are turned
+ * into external constants.
+ */
+ RC_CONSTANT_EXTERNAL = 0,
+
+ RC_CONSTANT_IMMEDIATE,
+
+ /**
+ * Constant referring to state that is known by this compiler,
+ * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state.
+ */
+ RC_CONSTANT_STATE
+};
+
+enum {
+ RC_STATE_SHADOW_AMBIENT = 0,
+
+ RC_STATE_R300_WINDOW_DIMENSION,
+ RC_STATE_R300_TEXRECT_FACTOR
+};
+
+struct rc_constant {
+ unsigned Type:2; /**< RC_CONSTANT_xxx */
+ unsigned Size:3;
+
+ union {
+ unsigned External;
+ float Immediate[4];
+ unsigned State[2];
+ } u;
+};
+
+struct rc_constant_list {
+ struct rc_constant * Constants;
+ unsigned Count;
+
+ unsigned _Reserved;
+};
+
+void rc_constants_init(struct rc_constant_list * c);
+void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src);
+void rc_constants_destroy(struct rc_constant_list * c);
+unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant);
+unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2);
+unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data);
+unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
+
+/**
+ * Stores state that influences the compilation of a fragment program.
+ */
+struct r300_fragment_program_external_state {
+ struct {
+ /**
+ * If the sampler is used as a shadow sampler,
+ * this field is:
+ * 0 - GL_LUMINANCE
+ * 1 - GL_INTENSITY
+ * 2 - GL_ALPHA
+ * depending on the depth texture mode.
+ */
+ unsigned depth_texture_mode : 2;
+
+ /**
+ * If the sampler is used as a shadow sampler,
+ * this field is (texture_compare_func - GL_NEVER).
+ * [e.g. if compare function is GL_LEQUAL, this field is 3]
+ *
+ * Otherwise, this field is 0.
+ */
+ unsigned texture_compare_func : 3;
+ } unit[16];
+};
+
+
+
+struct r300_fragment_program_node {
+ int tex_offset; /**< first tex instruction */
+ int tex_end; /**< last tex instruction, relative to tex_offset */
+ int alu_offset; /**< first ALU instruction */
+ int alu_end; /**< last ALU instruction, relative to alu_offset */
+ int flags;
+};
+
+/**
+ * Stores an R300 fragment program in its compiled-to-hardware form.
+ */
+struct r300_fragment_program_code {
+ struct {
+ int length; /**< total # of texture instructions used */
+ uint32_t inst[R300_PFS_MAX_TEX_INST];
+ } tex;
+
+ struct {
+ int length; /**< total # of ALU instructions used */
+ struct {
+ uint32_t rgb_inst;
+ uint32_t rgb_addr;
+ uint32_t alpha_inst;
+ uint32_t alpha_addr;
+ } inst[R300_PFS_MAX_ALU_INST];
+ } alu;
+
+ uint32_t config; /* US_CONFIG */
+ uint32_t pixsize; /* US_PIXSIZE */
+ uint32_t code_offset; /* US_CODE_OFFSET */
+ uint32_t code_addr[4]; /* US_CODE_ADDR */
+};
+
+
+struct r500_fragment_program_code {
+ struct {
+ uint32_t inst0;
+ uint32_t inst1;
+ uint32_t inst2;
+ uint32_t inst3;
+ uint32_t inst4;
+ uint32_t inst5;
+ } inst[R500_PFS_MAX_INST];
+
+ int inst_end; /* Number of instructions - 1; also, last instruction to be executed */
+
+ int max_temp_idx;
+};
+
+struct rX00_fragment_program_code {
+ union {
+ struct r300_fragment_program_code r300;
+ struct r500_fragment_program_code r500;
+ } code;
+
+ unsigned writes_depth:1;
+
+ struct rc_constant_list constants;
+};
+
+
+#define VSF_MAX_FRAGMENT_LENGTH (255*4)
+#define VSF_MAX_FRAGMENT_TEMPS (14)
+
+#define VSF_MAX_INPUTS 32
+#define VSF_MAX_OUTPUTS 32
+
+struct r300_vertex_program_code {
+ int length;
+ union {
+ uint32_t d[VSF_MAX_FRAGMENT_LENGTH];
+ float f[VSF_MAX_FRAGMENT_LENGTH];
+ } body;
+
+ int pos_end;
+ int num_temporaries; /* Number of temp vars used by program */
+ int inputs[VSF_MAX_INPUTS];
+ int outputs[VSF_MAX_OUTPUTS];
+
+ struct rc_constant_list constants;
+
+ uint32_t InputsRead;
+ uint32_t OutputsWritten;
+};
+
+void r300_vertex_program_dump(struct r300_vertex_program_code * vs);
+
+#endif /* RADEON_CODE_H */ \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
new file mode 100644
index 00000000000..da950d52894
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdarg.h>
+
+#include "radeon_program.h"
+
+
+void rc_init(struct radeon_compiler * c)
+{
+ memset(c, 0, sizeof(*c));
+
+ memory_pool_init(&c->Pool);
+ c->Program.Instructions.Prev = &c->Program.Instructions;
+ c->Program.Instructions.Next = &c->Program.Instructions;
+ c->Program.Instructions.I.Opcode = OPCODE_END;
+}
+
+void rc_destroy(struct radeon_compiler * c)
+{
+ rc_constants_destroy(&c->Program.Constants);
+ memory_pool_destroy(&c->Pool);
+ free(c->ErrorMsg);
+}
+
+void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
+{
+ va_list ap;
+
+ if (!c->Debug)
+ return;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+}
+
+void rc_error(struct radeon_compiler * c, const char * fmt, ...)
+{
+ va_list ap;
+
+ c->Error = GL_TRUE;
+
+ if (!c->ErrorMsg) {
+ /* Only remember the first error */
+ char buf[1024];
+ int written;
+
+ va_start(ap, fmt);
+ written = vsnprintf(buf, sizeof(buf), fmt, ap);
+ va_end(ap);
+
+ if (written < sizeof(buf)) {
+ c->ErrorMsg = strdup(buf);
+ } else {
+ c->ErrorMsg = malloc(written + 1);
+
+ va_start(ap, fmt);
+ vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
+ va_end(ap);
+ }
+ }
+
+ if (c->Debug) {
+ fprintf(stderr, "r300compiler error: ");
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ }
+}
+
+/**
+ * Rewrite the program such that everything that source the given input
+ * register will source new_input instead.
+ */
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input)
+{
+ struct rc_instruction * inst;
+
+ c->Program.InputsRead &= ~(1 << input);
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ unsigned i;
+
+ for(i = 0; i < numsrcs; ++i) {
+ if (inst->I.SrcReg[i].File == PROGRAM_INPUT && inst->I.SrcReg[i].Index == input) {
+ inst->I.SrcReg[i].File = new_input.File;
+ inst->I.SrcReg[i].Index = new_input.Index;
+ inst->I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->I.SrcReg[i].Swizzle);
+ if (!inst->I.SrcReg[i].Abs) {
+ inst->I.SrcReg[i].Negate ^= new_input.Negate;
+ inst->I.SrcReg[i].Abs = new_input.Abs;
+ }
+
+ c->Program.InputsRead |= 1 << new_input.Index;
+ }
+ }
+ }
+}
+
+
+/**
+ * Rewrite the program such that everything that writes into the given
+ * output register will instead write to new_output. The new_output
+ * writemask is honoured.
+ */
+void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask)
+{
+ struct rc_instruction * inst;
+
+ c->Program.OutputsWritten &= ~(1 << output);
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+
+ if (numdsts) {
+ if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) {
+ inst->I.DstReg.Index = new_output;
+ inst->I.DstReg.WriteMask &= writemask;
+
+ c->Program.OutputsWritten |= 1 << new_output;
+ }
+ }
+ }
+}
+
+
+/**
+ * Rewrite the program such that a given output is duplicated.
+ */
+void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
+{
+ unsigned tempreg = rc_find_free_temporary(c);
+ struct rc_instruction * inst;
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+
+ if (numdsts) {
+ if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) {
+ inst->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst->I.DstReg.Index = tempreg;
+ }
+ }
+ }
+
+ inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+ inst->I.Opcode = OPCODE_MOV;
+ inst->I.DstReg.File = PROGRAM_OUTPUT;
+ inst->I.DstReg.Index = output;
+
+ inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[0].Index = tempreg;
+ inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+ inst->I.Opcode = OPCODE_MOV;
+ inst->I.DstReg.File = PROGRAM_OUTPUT;
+ inst->I.DstReg.Index = dup_output;
+
+ inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[0].Index = tempreg;
+ inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ c->Program.OutputsWritten |= 1 << dup_output;
+}
+
+
+/**
+ * Introduce standard code fragment to deal with fragment.position.
+ */
+void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input)
+{
+ unsigned tempregi = rc_find_free_temporary(c);
+
+ c->Program.InputsRead &= ~(1 << wpos);
+ c->Program.InputsRead |= 1 << new_input;
+
+ /* perspective divide */
+ struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
+ inst_rcp->I.Opcode = OPCODE_RCP;
+
+ inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_rcp->I.DstReg.Index = tempregi;
+ inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
+
+ inst_rcp->I.SrcReg[0].File = PROGRAM_INPUT;
+ inst_rcp->I.SrcReg[0].Index = new_input;
+ inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
+
+ struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst_rcp);
+ inst_mul->I.Opcode = OPCODE_MUL;
+
+ inst_mul->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mul->I.DstReg.Index = tempregi;
+ inst_mul->I.DstReg.WriteMask = WRITEMASK_XYZ;
+
+ inst_mul->I.SrcReg[0].File = PROGRAM_INPUT;
+ inst_mul->I.SrcReg[0].Index = new_input;
+
+ inst_mul->I.SrcReg[1].File = PROGRAM_TEMPORARY;
+ inst_mul->I.SrcReg[1].Index = tempregi;
+ inst_mul->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
+
+ /* viewport transformation */
+ struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_mul);
+ inst_mad->I.Opcode = OPCODE_MAD;
+
+ inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst_mad->I.DstReg.Index = tempregi;
+ inst_mad->I.DstReg.WriteMask = WRITEMASK_XYZ;
+
+ inst_mad->I.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst_mad->I.SrcReg[0].Index = tempregi;
+ inst_mad->I.SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+
+ inst_mad->I.SrcReg[1].File = PROGRAM_STATE_VAR;
+ inst_mad->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
+ inst_mad->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+
+ inst_mad->I.SrcReg[2].File = PROGRAM_STATE_VAR;
+ inst_mad->I.SrcReg[2].Index = inst_mad->I.SrcReg[1].Index;
+ inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+
+ struct rc_instruction * inst;
+ for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ unsigned i;
+
+ for(i = 0; i < numsrcs; i++) {
+ if (inst->I.SrcReg[i].File == PROGRAM_INPUT &&
+ inst->I.SrcReg[i].Index == wpos) {
+ inst->I.SrcReg[i].File = PROGRAM_TEMPORARY;
+ inst->I.SrcReg[i].Index = tempregi;
+ }
+ }
+ }
+}
+
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
new file mode 100644
index 00000000000..e63ab8840ab
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_COMPILER_H
+#define RADEON_COMPILER_H
+
+#include "main/mtypes.h"
+#include "shader/prog_instruction.h"
+
+#include "memory_pool.h"
+#include "radeon_code.h"
+
+
+struct rc_instruction {
+ struct rc_instruction * Prev;
+ struct rc_instruction * Next;
+ struct prog_instruction I;
+};
+
+struct rc_program {
+ /**
+ * Instructions.Next points to the first instruction,
+ * Instructions.Prev points to the last instruction.
+ */
+ struct rc_instruction Instructions;
+
+ /* Long term, we should probably remove InputsRead & OutputsWritten,
+ * since updating dependent state can be fragile, and they aren't
+ * actually used very often. */
+ uint32_t InputsRead;
+ uint32_t OutputsWritten;
+ uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */
+
+ struct rc_constant_list Constants;
+};
+
+struct radeon_compiler {
+ struct memory_pool Pool;
+ struct rc_program Program;
+ unsigned Debug:1;
+ unsigned Error:1;
+ char * ErrorMsg;
+};
+
+void rc_init(struct radeon_compiler * c);
+void rc_destroy(struct radeon_compiler * c);
+
+void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
+void rc_error(struct radeon_compiler * c, const char * fmt, ...);
+
+void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program);
+
+void rc_calculate_inputs_outputs(struct radeon_compiler * c);
+
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input);
+void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask);
+void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
+void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input);
+
+struct r300_fragment_program_compiler {
+ struct radeon_compiler Base;
+ struct rX00_fragment_program_code *code;
+ struct r300_fragment_program_external_state state;
+ unsigned is_r500;
+ unsigned OutputDepth;
+ unsigned OutputColor;
+
+ void * UserData;
+ void (*AllocateHwInputs)(
+ struct r300_fragment_program_compiler * c,
+ void (*allocate)(void * data, unsigned input, unsigned hwreg),
+ void * mydata);
+};
+
+void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
+
+
+struct r300_vertex_program_compiler {
+ struct radeon_compiler Base;
+ struct r300_vertex_program_code *code;
+ GLbitfield RequiredOutputs;
+
+ void * UserData;
+ void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
+};
+
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
+
+#endif /* RADEON_COMPILER_H */
diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
index 840c9733b16..aaaa50ad1f9 100644
--- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
@@ -36,6 +36,8 @@
#include "radeon_nqssadce.h"
+#include "radeon_compiler.h"
+
/**
* Return the @ref register_state for the given register (or 0 for untracked
@@ -76,9 +78,10 @@ struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register s
}
-static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
- struct prog_instruction *inst, GLint src, GLuint sourced)
+static void track_used_srcreg(struct nqssadce_state* s,
+ GLint src, GLuint sourced)
{
+ struct prog_instruction * inst = &s->IP->I;
int i;
GLuint deswz_source = 0;
@@ -95,12 +98,11 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
struct prog_dst_register dstreg = inst->DstReg;
dstreg.File = PROGRAM_TEMPORARY;
- dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
+ dstreg.Index = rc_find_free_temporary(s->Compiler);
dstreg.WriteMask = sourced;
s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
- inst = s->Program->Instructions + s->IP;
inst->SrcReg[src].File = PROGRAM_TEMPORARY;
inst->SrcReg[src].Index = dstreg.Index;
inst->SrcReg[src].Swizzle = 0;
@@ -117,37 +119,36 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
struct register_state *regstate;
- if (inst->SrcReg[src].RelAddr)
+ if (inst->SrcReg[src].RelAddr) {
regstate = get_reg_state(s, PROGRAM_ADDRESS, 0);
- else
+ if (regstate)
+ regstate->Sourced |= WRITEMASK_X;
+ } else {
regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
-
- if (regstate)
- regstate->Sourced |= deswz_source & 0xf;
-
- return inst;
+ if (regstate)
+ regstate->Sourced |= deswz_source & 0xf;
+ }
}
-static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex)
+static void unalias_srcregs(struct rc_instruction *inst, GLuint oldindex, GLuint newindex)
{
- int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
+ int nsrc = _mesa_num_inst_src_regs(inst->I.Opcode);
int i;
for(i = 0; i < nsrc; ++i)
- if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex)
- inst->SrcReg[i].Index = newindex;
+ if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY && inst->I.SrcReg[i].Index == oldindex)
+ inst->I.SrcReg[i].Index = newindex;
}
static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
{
- GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
- int ip;
- for(ip = 0; ip < s->IP; ++ip) {
- struct prog_instruction* inst = s->Program->Instructions + ip;
- if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex)
- inst->DstReg.Index = newindex;
+ GLuint newindex = rc_find_free_temporary(s->Compiler);
+ struct rc_instruction * inst;
+ for(inst = s->Compiler->Program.Instructions.Next; inst != s->IP; inst = inst->Next) {
+ if (inst->I.DstReg.File == PROGRAM_TEMPORARY && inst->I.DstReg.Index == oldindex)
+ inst->I.DstReg.Index = newindex;
unalias_srcregs(inst, oldindex, newindex);
}
- unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex);
+ unalias_srcregs(s->IP, oldindex, newindex);
}
@@ -156,7 +157,8 @@ static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
*/
static void process_instruction(struct nqssadce_state* s)
{
- struct prog_instruction *inst = s->Program->Instructions + s->IP;
+ struct prog_instruction *inst = &s->IP->I;
+ GLuint WriteMask;
if (inst->Opcode == OPCODE_END)
return;
@@ -164,7 +166,7 @@ static void process_instruction(struct nqssadce_state* s)
if (inst->Opcode != OPCODE_KIL) {
struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
if (!regstate) {
- _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n",
+ rc_error(s->Compiler, "NqssaDce: bad destination register (%i[%i])\n",
inst->DstReg.File, inst->DstReg.Index);
return;
}
@@ -173,7 +175,9 @@ static void process_instruction(struct nqssadce_state* s)
regstate->Sourced &= ~inst->DstReg.WriteMask;
if (inst->DstReg.WriteMask == 0) {
- _mesa_delete_instructions(s->Program, s->IP, 1);
+ struct rc_instruction * inst_remove = s->IP;
+ s->IP = s->IP->Prev;
+ rc_remove_instruction(inst_remove);
return;
}
@@ -181,16 +185,15 @@ static void process_instruction(struct nqssadce_state* s)
unalias_temporary(s, inst->DstReg.Index);
}
- /* Attention: Due to swizzle emulation code, the following
- * might change the instruction stream under us, so we have
- * to be careful with the inst pointer. */
+ WriteMask = inst->DstReg.WriteMask;
+
switch (inst->Opcode) {
case OPCODE_ARL:
case OPCODE_DDX:
case OPCODE_DDY:
case OPCODE_FRC:
case OPCODE_MOV:
- inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
+ track_used_srcreg(s, 0, WriteMask);
break;
case OPCODE_ADD:
case OPCODE_MAX:
@@ -198,14 +201,14 @@ static void process_instruction(struct nqssadce_state* s)
case OPCODE_MUL:
case OPCODE_SGE:
case OPCODE_SLT:
- inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
- inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
+ track_used_srcreg(s, 0, WriteMask);
+ track_used_srcreg(s, 1, WriteMask);
break;
case OPCODE_CMP:
case OPCODE_MAD:
- inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
- inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
- inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask);
+ track_used_srcreg(s, 0, WriteMask);
+ track_used_srcreg(s, 1, WriteMask);
+ track_used_srcreg(s, 2, WriteMask);
break;
case OPCODE_COS:
case OPCODE_EX2:
@@ -213,83 +216,79 @@ static void process_instruction(struct nqssadce_state* s)
case OPCODE_RCP:
case OPCODE_RSQ:
case OPCODE_SIN:
- inst = track_used_srcreg(s, inst, 0, 0x1);
+ track_used_srcreg(s, 0, 0x1);
break;
case OPCODE_DP3:
- inst = track_used_srcreg(s, inst, 0, 0x7);
- inst = track_used_srcreg(s, inst, 1, 0x7);
+ track_used_srcreg(s, 0, 0x7);
+ track_used_srcreg(s, 1, 0x7);
break;
case OPCODE_DP4:
- inst = track_used_srcreg(s, inst, 0, 0xf);
- inst = track_used_srcreg(s, inst, 1, 0xf);
+ track_used_srcreg(s, 0, 0xf);
+ track_used_srcreg(s, 1, 0xf);
break;
case OPCODE_KIL:
case OPCODE_TEX:
case OPCODE_TXB:
case OPCODE_TXP:
- inst = track_used_srcreg(s, inst, 0, 0xf);
+ track_used_srcreg(s, 0, 0xf);
break;
case OPCODE_DST:
- inst = track_used_srcreg(s, inst, 0, 0x6);
- inst = track_used_srcreg(s, inst, 1, 0xa);
+ track_used_srcreg(s, 0, 0x6);
+ track_used_srcreg(s, 1, 0xa);
break;
case OPCODE_EXP:
case OPCODE_LOG:
case OPCODE_POW:
- inst = track_used_srcreg(s, inst, 0, 0x3);
+ track_used_srcreg(s, 0, 0x3);
break;
case OPCODE_LIT:
- inst = track_used_srcreg(s, inst, 0, 0xb);
+ track_used_srcreg(s, 0, 0xb);
break;
default:
- _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
+ rc_error(s->Compiler, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
return;
}
+
+ s->IP = s->IP->Prev;
}
-static void calculateInputsOutputs(struct gl_program *p)
+void rc_calculate_inputs_outputs(struct radeon_compiler * c)
{
- struct prog_instruction *inst;
- GLuint InputsRead, OutputsWritten;
+ struct rc_instruction *inst;
- inst = p->Instructions;
- InputsRead = 0;
- OutputsWritten = 0;
- while (inst->Opcode != OPCODE_END)
+ c->Program.InputsRead = 0;
+ c->Program.OutputsWritten = 0;
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
{
- int i, num_src_regs;
+ int i;
+ int num_src_regs = _mesa_num_inst_src_regs(inst->I.Opcode);
- num_src_regs = _mesa_num_inst_src_regs(inst->Opcode);
for (i = 0; i < num_src_regs; ++i) {
- if (inst->SrcReg[i].File == PROGRAM_INPUT)
- InputsRead |= 1 << inst->SrcReg[i].Index;
+ if (inst->I.SrcReg[i].File == PROGRAM_INPUT)
+ c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index;
}
- if (inst->DstReg.File == PROGRAM_OUTPUT)
- OutputsWritten |= 1 << inst->DstReg.Index;
-
- ++inst;
+ if (_mesa_num_inst_dst_regs(inst->I.Opcode)) {
+ if (inst->I.DstReg.File == PROGRAM_OUTPUT)
+ c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index;
+ }
}
-
- p->InputsRead = InputsRead;
- p->OutputsWritten = OutputsWritten;
}
-void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr)
+void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data)
{
struct nqssadce_state s;
_mesa_bzero(&s, sizeof(s));
- s.Ctx = ctx;
- s.Program = p;
+ s.Compiler = c;
s.Descr = descr;
+ s.UserData = data;
s.Descr->Init(&s);
- s.IP = p->NumInstructions;
+ s.IP = c->Program.Instructions.Prev;
- while(s.IP > 0) {
- s.IP--;
+ while(s.IP != &c->Program.Instructions && !c->Error)
process_instruction(&s);
- }
- calculateInputsOutputs(p);
+ rc_calculate_inputs_outputs(c);
}
diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h
index 8626f21c25e..b3fc77a35a6 100644
--- a/src/mesa/drivers/dri/r300/radeon_nqssadce.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h
@@ -30,7 +30,6 @@
#include "radeon_program.h"
-
struct register_state {
/**
* Bitmask indicating which components of the register are sourced
@@ -44,14 +43,13 @@ struct register_state {
* read from, etc.
*/
struct nqssadce_state {
- GLcontext *Ctx;
- struct gl_program *Program;
+ struct radeon_compiler *Compiler;
struct radeon_nqssadce_descr *Descr;
/**
* All instructions after this instruction pointer have been dealt with.
*/
- int IP;
+ struct rc_instruction * IP;
/**
* Which registers are read by subsequent instructions?
@@ -59,6 +57,8 @@ struct nqssadce_state {
struct register_state Temps[MAX_PROGRAM_TEMPS];
struct register_state Outputs[VERT_RESULT_MAX];
struct register_state Address;
+
+ void * UserData;
};
@@ -83,11 +83,9 @@ struct radeon_nqssadce_descr {
* The transformation will work recursively on the emitted instruction(s).
*/
void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
-
- void *Data;
};
-void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr);
+void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data);
struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg);
#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
new file mode 100644
index 00000000000..208d3b90c83
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program.h"
+
+#include "radeon_compiler.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+
+
+/**
+ * Transform the given clause in the following way:
+ * 1. Replace it with an empty clause
+ * 2. For every instruction in the original clause, try the given
+ * transformations in order.
+ * 3. If one of the transformations returns GL_TRUE, assume that it
+ * has emitted the appropriate instruction(s) into the new clause;
+ * otherwise, copy the instruction verbatim.
+ *
+ * \note The transformation is currently not recursive; in other words,
+ * instructions emitted by transformations are not transformed.
+ *
+ * \note The transform is called 'local' because it can only look at
+ * one instruction at a time.
+ */
+void radeonLocalTransform(
+ struct radeon_compiler * c,
+ int num_transformations,
+ struct radeon_program_transformation* transformations)
+{
+ struct rc_instruction * inst = c->Program.Instructions.Next;
+
+ while(inst != &c->Program.Instructions) {
+ struct rc_instruction * current = inst;
+ int i;
+
+ inst = inst->Next;
+
+ for(i = 0; i < num_transformations; ++i) {
+ struct radeon_program_transformation* t = transformations + i;
+
+ if (t->function(c, current, t->userData))
+ break;
+ }
+ }
+}
+
+
+GLint rc_find_free_temporary(struct radeon_compiler * c)
+{
+ GLboolean used[MAX_PROGRAM_TEMPS];
+ GLuint i;
+
+ memset(used, 0, sizeof(used));
+
+ for (struct rc_instruction * rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) {
+ const struct prog_instruction *inst = &rcinst->I;
+ const GLuint n = _mesa_num_inst_src_regs(inst->Opcode);
+ GLuint k;
+
+ for (k = 0; k < n; k++) {
+ if (inst->SrcReg[k].File == PROGRAM_TEMPORARY)
+ used[inst->SrcReg[k].Index] = GL_TRUE;
+ }
+ }
+
+ for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+ if (!used[i])
+ return i;
+ }
+
+ return -1;
+}
+
+
+struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction));
+
+ inst->Prev = 0;
+ inst->Next = 0;
+
+ _mesa_init_instructions(&inst->I, 1);
+
+ return inst;
+}
+
+
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after)
+{
+ struct rc_instruction * inst = rc_alloc_instruction(c);
+
+ inst->Prev = after;
+ inst->Next = after->Next;
+
+ inst->Prev->Next = inst;
+ inst->Next->Prev = inst;
+
+ return inst;
+}
+
+void rc_remove_instruction(struct rc_instruction * inst)
+{
+ inst->Prev->Next = inst->Next;
+ inst->Next->Prev = inst->Prev;
+}
+
+
+void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program)
+{
+ struct prog_instruction *source;
+ unsigned int i;
+
+ for(source = program->Instructions; source->Opcode != OPCODE_END; ++source) {
+ struct rc_instruction * dest = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+ dest->I = *source;
+ }
+
+ c->Program.ShadowSamplers = program->ShadowSamplers;
+ c->Program.InputsRead = program->InputsRead;
+ c->Program.OutputsWritten = program->OutputsWritten;
+
+ for(i = 0; i < program->Parameters->NumParameters; ++i) {
+ struct rc_constant constant;
+
+ constant.Type = RC_CONSTANT_EXTERNAL;
+ constant.Size = 4;
+ constant.u.External = i;
+
+ rc_constants_add(&c->Program.Constants, &constant);
+ }
+}
+
+
+/**
+ * Print program to stderr, default options.
+ */
+void rc_print_program(const struct rc_program *prog)
+{
+ GLuint indent = 0;
+ GLuint linenum = 1;
+ struct rc_instruction *inst;
+
+ fprintf(stderr, "# Radeon Compiler Program\n");
+
+ for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
+ fprintf(stderr, "%3d: ", linenum);
+
+ /* Massive hack: We rely on the fact that the printers do not actually
+ * use the gl_program argument (last argument) in debug mode */
+ indent = _mesa_fprint_instruction_opt(
+ stderr, &inst->I,
+ indent, PROG_PRINT_DEBUG, 0);
+
+ linenum++;
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index 88474d43a22..561958608ca 100644
--- a/src/mesa/drivers/dri/r300/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -34,12 +34,9 @@
#include "shader/program.h"
#include "shader/prog_instruction.h"
-
-enum {
- CLAUSE_MIXED = 0,
- CLAUSE_ALU,
- CLAUSE_TEX
-};
+struct radeon_compiler;
+struct rc_instruction;
+struct rc_program;
enum {
PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */
@@ -83,19 +80,12 @@ static inline GLuint combine_swizzles(GLuint src, GLuint swz)
return ret;
}
+static INLINE void reset_srcreg(struct prog_src_register* reg)
+{
+ _mesa_bzero(reg, sizeof(*reg));
+ reg->Swizzle = SWIZZLE_NOOP;
+}
-/**
- * Transformation context that is passed to local transformations.
- *
- * Care must be taken with some operations during transformation,
- * e.g. finding new temporary registers must use @ref radeonFindFreeTemporary
- */
-struct radeon_transform_context {
- GLcontext *Ctx;
- struct gl_program *Program;
- struct prog_instruction *OldInstructions;
- GLuint OldNumInstructions;
-};
/**
* A transformation that can be passed to \ref radeonLocalTransform.
@@ -109,23 +99,23 @@ struct radeon_transform_context {
*/
struct radeon_program_transformation {
GLboolean (*function)(
- struct radeon_transform_context*,
- struct prog_instruction*,
+ struct radeon_compiler*,
+ struct rc_instruction*,
void*);
void *userData;
};
void radeonLocalTransform(
- GLcontext* ctx,
- struct gl_program *program,
+ struct radeon_compiler *c,
int num_transformations,
struct radeon_program_transformation* transformations);
-/**
- * Find a usable free temporary register during program transformation
- */
-GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx);
+GLint rc_find_free_temporary(struct radeon_compiler * c);
+
+struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
+void rc_remove_instruction(struct rc_instruction * inst);
-struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count);
+void rc_print_program(const struct rc_program *prog);
#endif
diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index 8283723bad7..609e510ff2b 100644
--- a/src/mesa/drivers/dri/r300/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -35,49 +35,52 @@
#include "radeon_program_alu.h"
-#include "shader/prog_parameter.h"
+#include "radeon_compiler.h"
-static struct prog_instruction *emit1(struct gl_program* p,
+static struct rc_instruction *emit1(
+ struct radeon_compiler * c, struct rc_instruction * after,
gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
struct prog_src_register SrcReg)
{
- struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
+ struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
- fpi->Opcode = Opcode;
- fpi->SaturateMode = Saturate;
- fpi->DstReg = DstReg;
- fpi->SrcReg[0] = SrcReg;
+ fpi->I.Opcode = Opcode;
+ fpi->I.SaturateMode = Saturate;
+ fpi->I.DstReg = DstReg;
+ fpi->I.SrcReg[0] = SrcReg;
return fpi;
}
-static struct prog_instruction *emit2(struct gl_program* p,
+static struct rc_instruction *emit2(
+ struct radeon_compiler * c, struct rc_instruction * after,
gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
struct prog_src_register SrcReg0, struct prog_src_register SrcReg1)
{
- struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
+ struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
- fpi->Opcode = Opcode;
- fpi->SaturateMode = Saturate;
- fpi->DstReg = DstReg;
- fpi->SrcReg[0] = SrcReg0;
- fpi->SrcReg[1] = SrcReg1;
+ fpi->I.Opcode = Opcode;
+ fpi->I.SaturateMode = Saturate;
+ fpi->I.DstReg = DstReg;
+ fpi->I.SrcReg[0] = SrcReg0;
+ fpi->I.SrcReg[1] = SrcReg1;
return fpi;
}
-static struct prog_instruction *emit3(struct gl_program* p,
+static struct rc_instruction *emit3(
+ struct radeon_compiler * c, struct rc_instruction * after,
gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
struct prog_src_register SrcReg0, struct prog_src_register SrcReg1,
struct prog_src_register SrcReg2)
{
- struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
-
- fpi->Opcode = Opcode;
- fpi->SaturateMode = Saturate;
- fpi->DstReg = DstReg;
- fpi->SrcReg[0] = SrcReg0;
- fpi->SrcReg[1] = SrcReg1;
- fpi->SrcReg[2] = SrcReg2;
+ struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+ fpi->I.Opcode = Opcode;
+ fpi->I.SaturateMode = Saturate;
+ fpi->I.DstReg = DstReg;
+ fpi->I.SrcReg[0] = SrcReg0;
+ fpi->I.SrcReg[1] = SrcReg1;
+ fpi->I.SrcReg[2] = SrcReg2;
return fpi;
}
@@ -171,44 +174,63 @@ static struct prog_src_register scalar(struct prog_src_register reg)
return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
}
-static void transform_ABS(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_ABS(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- struct prog_src_register src = inst->SrcReg[0];
+ struct prog_src_register src = inst->I.SrcReg[0];
src.Abs = 1;
src.Negate = NEGATE_NONE;
- emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src);
+ emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, inst->I.DstReg, src);
+ rc_remove_instruction(inst);
}
-static void transform_DPH(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_DP3(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- struct prog_src_register src0 = inst->SrcReg[0];
+ struct prog_src_register src0 = inst->I.SrcReg[0];
+ struct prog_src_register src1 = inst->I.SrcReg[1];
+ src0.Negate &= ~NEGATE_W;
+ src0.Swizzle &= ~(7 << (3 * 3));
+ src0.Swizzle |= SWIZZLE_ZERO << (3 * 3);
+ src1.Negate &= ~NEGATE_W;
+ src1.Swizzle &= ~(7 << (3 * 3));
+ src1.Swizzle |= SWIZZLE_ZERO << (3 * 3);
+ emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, src1);
+ rc_remove_instruction(inst);
+}
+
+static void transform_DPH(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ struct prog_src_register src0 = inst->I.SrcReg[0];
src0.Negate &= ~NEGATE_W;
src0.Swizzle &= ~(7 << (3 * 3));
src0.Swizzle |= SWIZZLE_ONE << (3 * 3);
- emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]);
+ emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, inst->I.SrcReg[1]);
+ rc_remove_instruction(inst);
}
/**
* [1, src0.y*src1.y, src0.z, src1.w]
* So basically MUL with lotsa swizzling.
*/
-static void transform_DST(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_DST(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg,
- swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE),
- swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W));
+ emit2(c, inst->Prev, OPCODE_MUL, inst->I.SaturateMode, inst->I.DstReg,
+ swizzle(inst->I.SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE),
+ swizzle(inst->I.SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W));
+ rc_remove_instruction(inst);
}
-static void transform_FLR(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_FLR(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- int tempreg = radeonFindFreeTemporary(t);
- emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]);
- emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg,
- inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
+ int tempreg = rc_find_free_temporary(c);
+ emit1(c, inst->Prev, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0]);
+ emit2(c, inst->Prev, OPCODE_ADD, inst->I.SaturateMode, inst->I.DstReg,
+ inst->I.SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
+ rc_remove_instruction(inst);
}
/**
@@ -229,152 +251,159 @@ static void transform_FLR(struct radeon_transform_context* t,
* 5 slots, if the subsequent optimization passes are clever enough
* to pair instructions correctly.
*/
-static void transform_LIT(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_LIT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- static const GLfloat LitConst[4] = { -127.999999 };
-
GLuint constant;
GLuint constant_swizzle;
GLuint temp;
- int needTemporary = 0;
struct prog_src_register srctemp;
- constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle);
+ constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle);
- if (inst->DstReg.WriteMask != WRITEMASK_XYZW) {
- needTemporary = 1;
- } else if (inst->DstReg.File != PROGRAM_TEMPORARY) {
- // LIT is typically followed by DP3/DP4, so there's no point
- // in creating special code for this case
- needTemporary = 1;
- }
+ if (inst->I.DstReg.WriteMask != WRITEMASK_XYZW || inst->I.DstReg.File != PROGRAM_TEMPORARY) {
+ struct rc_instruction * inst_mov;
- if (needTemporary) {
- temp = radeonFindFreeTemporary(t);
- } else {
- temp = inst->DstReg.Index;
+ inst_mov = emit1(c, inst,
+ OPCODE_MOV, 0, inst->I.DstReg,
+ srcreg(PROGRAM_TEMPORARY, rc_find_free_temporary(c)));
+
+ inst->I.DstReg.File = PROGRAM_TEMPORARY;
+ inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
+ inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
}
+
+ temp = inst->I.DstReg.Index;
srctemp = srcreg(PROGRAM_TEMPORARY, temp);
// tmp.x = max(0.0, Src.x);
// tmp.y = max(0.0, Src.y);
// tmp.w = clamp(Src.z, -128+eps, 128-eps);
- emit2(t->Program, OPCODE_MAX, 0,
+ emit2(c, inst->Prev, OPCODE_MAX, 0,
dstregtmpmask(temp, WRITEMASK_XYW),
- inst->SrcReg[0],
+ inst->I.SrcReg[0],
swizzle(srcreg(PROGRAM_CONSTANT, constant),
SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3));
- emit2(t->Program, OPCODE_MIN, 0,
+ emit2(c, inst->Prev, OPCODE_MIN, 0,
dstregtmpmask(temp, WRITEMASK_Z),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)));
// tmp.w = Pow(tmp.y, tmp.w)
- emit1(t->Program, OPCODE_LG2, 0,
+ emit1(c, inst->Prev, OPCODE_LG2, 0,
dstregtmpmask(temp, WRITEMASK_W),
swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
- emit2(t->Program, OPCODE_MUL, 0,
+ emit2(c, inst->Prev, OPCODE_MUL, 0,
dstregtmpmask(temp, WRITEMASK_W),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z));
- emit1(t->Program, OPCODE_EX2, 0,
+ emit1(c, inst->Prev, OPCODE_EX2, 0,
dstregtmpmask(temp, WRITEMASK_W),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
// tmp.z = (tmp.x > 0) ? tmp.w : 0.0
- emit3(t->Program, OPCODE_CMP, inst->SaturateMode,
+ emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode,
dstregtmpmask(temp, WRITEMASK_Z),
negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
builtin_zero);
// tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0
- emit1(t->Program, OPCODE_MOV, inst->SaturateMode,
+ emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode,
dstregtmpmask(temp, WRITEMASK_XYW),
swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE));
- if (needTemporary)
- emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp);
+ rc_remove_instruction(inst);
}
-static void transform_LRP(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_LRP(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- int tempreg = radeonFindFreeTemporary(t);
+ int tempreg = rc_find_free_temporary(c);
- emit2(t->Program, OPCODE_ADD, 0,
+ emit2(c, inst->Prev, OPCODE_ADD, 0,
dstreg(PROGRAM_TEMPORARY, tempreg),
- inst->SrcReg[1], negate(inst->SrcReg[2]));
- emit3(t->Program, OPCODE_MAD, inst->SaturateMode,
- inst->DstReg,
- inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]);
+ inst->I.SrcReg[1], negate(inst->I.SrcReg[2]));
+ emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode,
+ inst->I.DstReg,
+ inst->I.SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[2]);
+
+ rc_remove_instruction(inst);
}
-static void transform_POW(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_POW(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- int tempreg = radeonFindFreeTemporary(t);
+ int tempreg = rc_find_free_temporary(c);
struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg);
struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg);
tempdst.WriteMask = WRITEMASK_W;
tempsrc.Swizzle = SWIZZLE_WWWW;
- emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0]));
- emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1]));
- emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc);
+ emit1(c, inst->Prev, OPCODE_LG2, 0, tempdst, scalar(inst->I.SrcReg[0]));
+ emit2(c, inst->Prev, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->I.SrcReg[1]));
+ emit1(c, inst->Prev, OPCODE_EX2, inst->I.SaturateMode, inst->I.DstReg, tempsrc);
+
+ rc_remove_instruction(inst);
}
-static void transform_RSQ(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_RSQ(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0]));
+ inst->I.SrcReg[0] = absolute(inst->I.SrcReg[0]);
}
-static void transform_SGE(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_SGE(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- int tempreg = radeonFindFreeTemporary(t);
+ int tempreg = rc_find_free_temporary(c);
- emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
- emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
+ emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1]));
+ emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg,
srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one);
+
+ rc_remove_instruction(inst);
}
-static void transform_SLT(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_SLT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- int tempreg = radeonFindFreeTemporary(t);
+ int tempreg = rc_find_free_temporary(c);
- emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
- emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
+ emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1]));
+ emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg,
srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero);
+
+ rc_remove_instruction(inst);
}
-static void transform_SUB(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_SUB(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1]));
+ inst->I.Opcode = OPCODE_ADD;
+ inst->I.SrcReg[1] = negate(inst->I.SrcReg[1]);
}
-static void transform_SWZ(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_SWZ(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]);
+ inst->I.Opcode = OPCODE_MOV;
}
-static void transform_XPD(struct radeon_transform_context* t,
- struct prog_instruction* inst)
+static void transform_XPD(struct radeon_compiler* c,
+ struct rc_instruction* inst)
{
- int tempreg = radeonFindFreeTemporary(t);
-
- emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg),
- swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
- swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
- emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg,
- swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
- swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg),
+ swizzle(inst->I.SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
+ swizzle(inst->I.SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
+ emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, inst->I.DstReg,
+ swizzle(inst->I.SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
+ swizzle(inst->I.SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
+
+ rc_remove_instruction(inst);
}
@@ -392,31 +421,64 @@ static void transform_XPD(struct radeon_transform_context* t,
*
* @note should be applicable to R300 and R500 fragment programs.
*/
-GLboolean radeonTransformALU(struct radeon_transform_context* t,
- struct prog_instruction* inst,
+GLboolean radeonTransformALU(
+ struct radeon_compiler * c,
+ struct rc_instruction* inst,
void* unused)
{
- switch(inst->Opcode) {
- case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE;
- case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;
- case OPCODE_DST: transform_DST(t, inst); return GL_TRUE;
- case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;
- case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE;
- case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE;
- case OPCODE_POW: transform_POW(t, inst); return GL_TRUE;
- case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE;
- case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE;
- case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE;
- case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE;
- case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE;
- case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE;
+ switch(inst->I.Opcode) {
+ case OPCODE_ABS: transform_ABS(c, inst); return GL_TRUE;
+ case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE;
+ case OPCODE_DST: transform_DST(c, inst); return GL_TRUE;
+ case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE;
+ case OPCODE_LIT: transform_LIT(c, inst); return GL_TRUE;
+ case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE;
+ case OPCODE_POW: transform_POW(c, inst); return GL_TRUE;
+ case OPCODE_RSQ: transform_RSQ(c, inst); return GL_TRUE;
+ case OPCODE_SGE: transform_SGE(c, inst); return GL_TRUE;
+ case OPCODE_SLT: transform_SLT(c, inst); return GL_TRUE;
+ case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE;
+ case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE;
+ case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE;
default:
return GL_FALSE;
}
}
-static void sincos_constants(struct radeon_transform_context* t, GLuint *constants)
+static void transform_r300_vertex_ABS(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* Note: r500 can take absolute values, but r300 cannot. */
+ inst->I.Opcode = OPCODE_MAX;
+ inst->I.SrcReg[1] = inst->I.SrcReg[0];
+ inst->I.SrcReg[1].Negate ^= NEGATE_XYZW;
+}
+
+/**
+ * For use with radeonLocalTransform, this transforms non-native ALU
+ * instructions of the r300 up to r500 vertex engine.
+ */
+GLboolean r300_transform_vertex_alu(
+ struct radeon_compiler * c,
+ struct rc_instruction* inst,
+ void* unused)
+{
+ switch(inst->I.Opcode) {
+ case OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return GL_TRUE;
+ case OPCODE_DP3: transform_DP3(c, inst); return GL_TRUE;
+ case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE;
+ case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE;
+ case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE;
+ case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE;
+ case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE;
+ case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE;
+ default:
+ return GL_FALSE;
+ }
+}
+
+static void sincos_constants(struct radeon_compiler* c, GLuint *constants)
{
static const GLfloat SinCosConsts[2][4] = {
{
@@ -434,11 +496,8 @@ static void sincos_constants(struct radeon_transform_context* t, GLuint *constan
};
int i;
- for(i = 0; i < 2; ++i) {
- GLuint swz;
- constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz);
- ASSERT(swz == SWIZZLE_NOOP);
- }
+ for(i = 0; i < 2; ++i)
+ constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]);
}
/**
@@ -449,23 +508,24 @@ static void sincos_constants(struct radeon_transform_context* t, GLuint *constan
* MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
* MAD dest, tmp.y, weight, tmp.x
*/
-static void sin_approx(struct radeon_transform_context* t,
+static void sin_approx(
+ struct radeon_compiler* c, struct rc_instruction * after,
struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants)
{
- GLuint tempreg = radeonFindFreeTemporary(t);
+ GLuint tempreg = rc_find_free_temporary(c);
- emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ emit2(c, after->Prev, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
srcreg(PROGRAM_CONSTANT, constants[0]));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X),
+ emit3(c, after->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y),
+ emit3(c, after->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)));
- emit3(t->Program, OPCODE_MAD, 0, dst,
+ emit3(c, after->Prev, OPCODE_MAD, 0, dst,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
@@ -476,78 +536,80 @@ static void sin_approx(struct radeon_transform_context* t,
* using only the basic instructions
* MOV, ADD, MUL, MAD, FRC
*/
-GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
- struct prog_instruction* inst,
+GLboolean radeonTransformTrigSimple(struct radeon_compiler* c,
+ struct rc_instruction* inst,
void* unused)
{
- if (inst->Opcode != OPCODE_COS &&
- inst->Opcode != OPCODE_SIN &&
- inst->Opcode != OPCODE_SCS)
+ if (inst->I.Opcode != OPCODE_COS &&
+ inst->I.Opcode != OPCODE_SIN &&
+ inst->I.Opcode != OPCODE_SCS)
return GL_FALSE;
GLuint constants[2];
- GLuint tempreg = radeonFindFreeTemporary(t);
+ GLuint tempreg = rc_find_free_temporary(c);
- sincos_constants(t, constants);
+ sincos_constants(c, constants);
- if (inst->Opcode == OPCODE_COS) {
+ if (inst->I.Opcode == OPCODE_COS) {
// MAD tmp.x, src, 1/(2*PI), 0.75
// FRC tmp.x, tmp.x
// MAD tmp.z, tmp.x, 2*PI, -PI
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
- emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
- sin_approx(t, inst->DstReg,
+ sin_approx(c, inst->Prev, inst->I.DstReg,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
constants);
- } else if (inst->Opcode == OPCODE_SIN) {
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ } else if (inst->I.Opcode == OPCODE_SIN) {
+ emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
- emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
+ emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
- sin_approx(t, inst->DstReg,
+ sin_approx(c, inst->Prev, inst->I.DstReg,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
constants);
} else {
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
- swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W));
- emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
srcreg(PROGRAM_TEMPORARY, tempreg));
- emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
+ emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
srcreg(PROGRAM_TEMPORARY, tempreg),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
- struct prog_dst_register dst = inst->DstReg;
+ struct prog_dst_register dst = inst->I.DstReg;
- dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X;
- sin_approx(t, dst,
+ dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_X;
+ sin_approx(c, inst->Prev, dst,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
constants);
- dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y;
- sin_approx(t, dst,
+ dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_Y;
+ sin_approx(c, inst->Prev, dst,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
constants);
}
+ rc_remove_instruction(inst);
+
return GL_TRUE;
}
@@ -560,50 +622,52 @@ GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
*
* @warning This transformation implicitly changes the semantics of SIN and COS!
*/
-GLboolean radeonTransformTrigScale(struct radeon_transform_context* t,
- struct prog_instruction* inst,
+GLboolean radeonTransformTrigScale(struct radeon_compiler* c,
+ struct rc_instruction* inst,
void* unused)
{
- if (inst->Opcode != OPCODE_COS &&
- inst->Opcode != OPCODE_SIN &&
- inst->Opcode != OPCODE_SCS)
+ if (inst->I.Opcode != OPCODE_COS &&
+ inst->I.Opcode != OPCODE_SIN &&
+ inst->I.Opcode != OPCODE_SCS)
return GL_FALSE;
- static const GLfloat RCP_2PI[] = { 0.15915494309189535 };
+ static const GLfloat RCP_2PI = 0.15915494309189535;
GLuint temp;
GLuint constant;
GLuint constant_swizzle;
- temp = radeonFindFreeTemporary(t);
- constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle);
+ temp = rc_find_free_temporary(c);
+ constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);
- emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W),
- swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ emit2(c, inst->Prev, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W),
+ swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle));
- emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W),
+ emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W),
srcreg(PROGRAM_TEMPORARY, temp));
- if (inst->Opcode == OPCODE_COS) {
- emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg,
+ if (inst->I.Opcode == OPCODE_COS) {
+ emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, inst->I.DstReg,
srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
- } else if (inst->Opcode == OPCODE_SIN) {
- emit1(t->Program, OPCODE_SIN, inst->SaturateMode,
- inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
- } else if (inst->Opcode == OPCODE_SCS) {
- struct prog_dst_register moddst = inst->DstReg;
+ } else if (inst->I.Opcode == OPCODE_SIN) {
+ emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode,
+ inst->I.DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+ } else if (inst->I.Opcode == OPCODE_SCS) {
+ struct prog_dst_register moddst = inst->I.DstReg;
- if (inst->DstReg.WriteMask & WRITEMASK_X) {
+ if (inst->I.DstReg.WriteMask & WRITEMASK_X) {
moddst.WriteMask = WRITEMASK_X;
- emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst,
+ emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, moddst,
srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
}
- if (inst->DstReg.WriteMask & WRITEMASK_Y) {
+ if (inst->I.DstReg.WriteMask & WRITEMASK_Y) {
moddst.WriteMask = WRITEMASK_Y;
- emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst,
+ emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, moddst,
srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
}
}
+ rc_remove_instruction(inst);
+
return GL_TRUE;
}
@@ -615,21 +679,15 @@ GLboolean radeonTransformTrigScale(struct radeon_transform_context* t,
* @warning This explicitly changes the form of DDX and DDY!
*/
-GLboolean radeonTransformDeriv(struct radeon_transform_context* t,
- struct prog_instruction* inst,
+GLboolean radeonTransformDeriv(struct radeon_compiler* c,
+ struct rc_instruction* inst,
void* unused)
{
- if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY)
+ if (inst->I.Opcode != OPCODE_DDX && inst->I.Opcode != OPCODE_DDY)
return GL_FALSE;
- struct prog_src_register B = inst->SrcReg[1];
-
- B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE,
- SWIZZLE_ONE, SWIZZLE_ONE);
- B.Negate = NEGATE_XYZW;
-
- emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg,
- inst->SrcReg[0], B);
+ inst->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE);
+ inst->I.SrcReg[1].Negate = NEGATE_XYZW;
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
index b45958115cf..147efec6fcb 100644
--- a/src/mesa/drivers/dri/r300/radeon_program_alu.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
@@ -31,23 +31,28 @@
#include "radeon_program.h"
GLboolean radeonTransformALU(
- struct radeon_transform_context *t,
- struct prog_instruction*,
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void*);
+
+GLboolean r300_transform_vertex_alu(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
void*);
GLboolean radeonTransformTrigSimple(
- struct radeon_transform_context *t,
- struct prog_instruction*,
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
void*);
GLboolean radeonTransformTrigScale(
- struct radeon_transform_context *t,
- struct prog_instruction*,
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
void*);
GLboolean radeonTransformDeriv(
- struct radeon_transform_context *t,
- struct prog_instruction*,
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
void*);
#endif /* __RADEON_PROGRAM_ALU_H_ */
diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index d6fb474cf23..48616ac461c 100644
--- a/src/mesa/drivers/dri/r300/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -35,17 +35,19 @@
#include "radeon_program_pair.h"
-#include "radeon_common.h"
-
+#include "memory_pool.h"
+#include "radeon_compiler.h"
#include "shader/prog_print.h"
#define error(fmt, args...) do { \
- _mesa_problem(s->Ctx, "%s::%s(): " fmt "\n", \
+ rc_error(&s->Compiler->Base, "%s::%s(): " fmt "\n", \
__FILE__, __FUNCTION__, ##args); \
- s->Error = GL_TRUE; \
} while(0)
struct pair_state_instruction {
+ struct prog_instruction Instruction;
+ GLuint IP; /**< Position of this instruction in original program */
+
GLuint IsTex:1; /**< Is a texture instruction */
GLuint NeedRGB:1; /**< Needs the RGB ALU */
GLuint NeedAlpha:1; /**< Needs the Alpha ALU */
@@ -73,7 +75,7 @@ struct pair_state_instruction {
* Used to keep track of which instructions read a value.
*/
struct reg_value_reader {
- GLuint IP; /**< IP of the instruction that performs this access */
+ struct pair_state_instruction *Reader;
struct reg_value_reader *Next;
};
@@ -82,7 +84,7 @@ struct reg_value_reader {
* PROGRAM_TEMPORARY.
*/
struct reg_value {
- GLuint IP; /**< IP of the instruction that writes this value */
+ struct pair_state_instruction *Writer;
struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */
/**
@@ -116,11 +118,8 @@ struct pair_register_translation {
};
struct pair_state {
- GLcontext *Ctx;
- struct gl_program *Program;
+ struct r300_fragment_program_compiler * Compiler;
const struct radeon_pair_handler *Handler;
- GLboolean Error;
- GLboolean Debug;
GLboolean Verbose;
void *UserData;
@@ -130,11 +129,6 @@ struct pair_state {
struct pair_register_translation Inputs[FRAG_ATTRIB_MAX];
struct pair_register_translation Temps[MAX_PROGRAM_TEMPS];
- /**
- * Derived information about program instructions.
- */
- struct pair_state_instruction *Instructions;
-
struct {
GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */
} HwTemps[128];
@@ -147,14 +141,6 @@ struct pair_state {
struct pair_state_instruction *ReadyRGB;
struct pair_state_instruction *ReadyAlpha;
struct pair_state_instruction *ReadyTEX;
-
- /**
- * Pool of @ref reg_value structures for fast allocation.
- */
- struct reg_value *ValuePool;
- GLuint ValuePoolUsed;
- struct reg_value_reader *ReaderPool;
- GLuint ReaderPoolUsed;
};
@@ -183,7 +169,7 @@ static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index)
struct pair_register_translation *t = get_register(s, file, index);
if (!t) {
- _mesa_problem(s->Ctx, "get_hw_reg: %i[%i]\n", file, index);
+ error("get_hw_reg: %i[%i]\n", file, index);
return 0;
}
@@ -221,15 +207,13 @@ static void add_pairinst_to_list(struct pair_state_instruction **list, struct pa
}
/**
- * The instruction at the given IP has become ready. Link it into the ready
+ * The given instruction has become ready. Link it into the ready
* instructions.
*/
-static void instruction_ready(struct pair_state *s, int ip)
+static void instruction_ready(struct pair_state *s, struct pair_state_instruction *pairinst)
{
- struct pair_state_instruction *pairinst = s->Instructions + ip;
-
if (s->Verbose)
- _mesa_printf("instruction_ready(%i)\n", ip);
+ _mesa_printf("instruction_ready(%i)\n", pairinst->IP);
if (pairinst->IsTex)
add_pairinst_to_list(&s->ReadyTEX, pairinst);
@@ -296,12 +280,12 @@ static void final_rewrite(struct pair_state *s, struct prog_instruction *inst)
* Classify an instruction according to which ALUs etc. it needs
*/
static void classify_instruction(struct pair_state *s,
- struct prog_instruction *inst, struct pair_state_instruction *pairinst)
+ struct pair_state_instruction *psi)
{
- pairinst->NeedRGB = (inst->DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0;
- pairinst->NeedAlpha = (inst->DstReg.WriteMask & WRITEMASK_W) ? 1 : 0;
+ psi->NeedRGB = (psi->Instruction.DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0;
+ psi->NeedAlpha = (psi->Instruction.DstReg.WriteMask & WRITEMASK_W) ? 1 : 0;
- switch(inst->Opcode) {
+ switch(psi->Instruction.Opcode) {
case OPCODE_ADD:
case OPCODE_CMP:
case OPCODE_DDX:
@@ -319,24 +303,24 @@ static void classify_instruction(struct pair_state *s,
case OPCODE_RCP:
case OPCODE_RSQ:
case OPCODE_SIN:
- pairinst->IsTranscendent = 1;
- pairinst->NeedAlpha = 1;
+ psi->IsTranscendent = 1;
+ psi->NeedAlpha = 1;
break;
case OPCODE_DP4:
- pairinst->NeedAlpha = 1;
+ psi->NeedAlpha = 1;
/* fall through */
case OPCODE_DP3:
- pairinst->NeedRGB = 1;
+ psi->NeedRGB = 1;
break;
case OPCODE_KIL:
case OPCODE_TEX:
case OPCODE_TXB:
case OPCODE_TXP:
case OPCODE_END:
- pairinst->IsTex = 1;
+ psi->IsTex = 1;
break;
default:
- error("Unknown opcode %d\n", inst->Opcode);
+ error("Unknown opcode %d\n", psi->Instruction.Opcode);
break;
}
}
@@ -348,30 +332,34 @@ static void classify_instruction(struct pair_state *s,
*/
static void scan_instructions(struct pair_state *s)
{
- struct prog_instruction *inst;
- struct pair_state_instruction *pairinst;
+ struct rc_instruction *source;
GLuint ip;
- for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0;
- inst->Opcode != OPCODE_END;
- ++inst, ++pairinst, ++ip) {
- final_rewrite(s, inst);
- classify_instruction(s, inst, pairinst);
+ for(source = s->Compiler->Base.Program.Instructions.Next, ip = 0;
+ source != &s->Compiler->Base.Program.Instructions;
+ source = source->Next, ++ip) {
+ struct pair_state_instruction *pairinst = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*pairinst));
+ memset(pairinst, 0, sizeof(struct pair_state_instruction));
+
+ pairinst->Instruction = source->I;
+ pairinst->IP = ip;
+ final_rewrite(s, &pairinst->Instruction);
+ classify_instruction(s, pairinst);
- int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
+ int nsrc = _mesa_num_inst_src_regs(pairinst->Instruction.Opcode);
int j;
for(j = 0; j < nsrc; j++) {
struct pair_register_translation *t =
- get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index);
+ get_register(s, pairinst->Instruction.SrcReg[j].File, pairinst->Instruction.SrcReg[j].Index);
if (!t)
continue;
t->RefCount++;
- if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
+ if (pairinst->Instruction.SrcReg[j].File == PROGRAM_TEMPORARY) {
int i;
for(i = 0; i < 4; ++i) {
- GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i);
+ GLuint swz = GET_SWZ(pairinst->Instruction.SrcReg[j].Swizzle, i);
if (swz >= 4)
continue; /* constant or NIL swizzle */
if (!t->Value[swz])
@@ -381,36 +369,37 @@ static void scan_instructions(struct pair_state *s)
* also rewrites the value. The code below adds
* a dependency for the DstReg, which is a superset
* of the SrcReg dependency. */
- if (inst->DstReg.File == PROGRAM_TEMPORARY &&
- inst->DstReg.Index == inst->SrcReg[j].Index &&
- GET_BIT(inst->DstReg.WriteMask, swz))
+ if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY &&
+ pairinst->Instruction.DstReg.Index == pairinst->Instruction.SrcReg[j].Index &&
+ GET_BIT(pairinst->Instruction.DstReg.WriteMask, swz))
continue;
- struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++];
+ struct reg_value_reader* r = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*r));
pairinst->NumDependencies++;
t->Value[swz]->NumReaders++;
- r->IP = ip;
+ r->Reader = pairinst;
r->Next = t->Value[swz]->Readers;
t->Value[swz]->Readers = r;
}
}
}
- int ndst = _mesa_num_inst_dst_regs(inst->Opcode);
+ int ndst = _mesa_num_inst_dst_regs(pairinst->Instruction.Opcode);
if (ndst) {
struct pair_register_translation *t =
- get_register(s, inst->DstReg.File, inst->DstReg.Index);
+ get_register(s, pairinst->Instruction.DstReg.File, pairinst->Instruction.DstReg.Index);
if (t) {
t->RefCount++;
- if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY) {
int j;
for(j = 0; j < 4; ++j) {
- if (!GET_BIT(inst->DstReg.WriteMask, j))
+ if (!GET_BIT(pairinst->Instruction.DstReg.WriteMask, j))
continue;
- struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++];
- v->IP = ip;
+ struct reg_value* v = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*v));
+ memset(v, 0, sizeof(struct reg_value));
+ v->Writer = pairinst;
if (t->Value[j]) {
pairinst->NumDependencies++;
t->Value[j]->Next = v;
@@ -426,7 +415,7 @@ static void scan_instructions(struct pair_state *s)
_mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies);
if (!pairinst->NumDependencies)
- instruction_ready(s, ip);
+ instruction_ready(s, pairinst);
}
/* Clear the PROGRAM_TEMPORARY state */
@@ -438,70 +427,23 @@ static void scan_instructions(struct pair_state *s)
}
-/**
- * Reserve hardware temporary registers for the program inputs.
- *
- * @note This allocation is performed explicitly, because the order of inputs
- * is determined by the RS hardware.
- */
-static void allocate_input_registers(struct pair_state *s)
-{
- GLuint InputsRead = s->Program->InputsRead;
- int i;
- GLuint hwindex = 0;
-
- /* Primary colour */
- if (InputsRead & FRAG_BIT_COL0)
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++);
- InputsRead &= ~FRAG_BIT_COL0;
-
- /* Secondary color */
- if (InputsRead & FRAG_BIT_COL1)
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++);
- InputsRead &= ~FRAG_BIT_COL1;
-
- /* Texcoords */
- for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) {
- if (InputsRead & (FRAG_BIT_TEX0 << i))
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++);
- }
- InputsRead &= ~FRAG_BITS_TEX_ANY;
-
- /* Fogcoords treated as a texcoord */
- if (InputsRead & FRAG_BIT_FOGC)
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++);
- InputsRead &= ~FRAG_BIT_FOGC;
-
- /* fragment position treated as a texcoord */
- if (InputsRead & FRAG_BIT_WPOS)
- alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++);
- InputsRead &= ~FRAG_BIT_WPOS;
-
- /* Anything else */
- if (InputsRead)
- error("Don't know how to handle inputs 0x%x\n", InputsRead);
-}
-
-
-static void decrement_dependencies(struct pair_state *s, int ip)
+static void decrement_dependencies(struct pair_state *s, struct pair_state_instruction *pairinst)
{
- struct pair_state_instruction *pairinst = s->Instructions + ip;
ASSERT(pairinst->NumDependencies > 0);
if (!--pairinst->NumDependencies)
- instruction_ready(s, ip);
+ instruction_ready(s, pairinst);
}
/**
* Update the dependency tracking state based on what the instruction
* at the given IP does.
*/
-static void commit_instruction(struct pair_state *s, int ip)
+static void commit_instruction(struct pair_state *s, struct pair_state_instruction *pairinst)
{
- struct prog_instruction *inst = s->Program->Instructions + ip;
- struct pair_state_instruction *pairinst = s->Instructions + ip;
+ struct prog_instruction *inst = &pairinst->Instruction;
if (s->Verbose)
- _mesa_printf("commit_instruction(%i)\n", ip);
+ _mesa_printf("commit_instruction(%i)\n", pairinst->IP);
if (inst->DstReg.File == PROGRAM_TEMPORARY) {
struct pair_register_translation *t = &s->Temps[inst->DstReg.Index];
@@ -516,11 +458,11 @@ static void commit_instruction(struct pair_state *s, int ip)
if (t->Value[i]->NumReaders) {
struct reg_value_reader *r;
for(r = pairinst->Values[i]->Readers; r; r = r->Next)
- decrement_dependencies(s, r->IP);
+ decrement_dependencies(s, r->Reader);
} else if (t->Value[i]->Next) {
/* This happens when the only reader writes
* the register at the same time */
- decrement_dependencies(s, t->Value[i]->Next->IP);
+ decrement_dependencies(s, t->Value[i]->Next->Writer);
}
}
}
@@ -554,7 +496,7 @@ static void commit_instruction(struct pair_state *s, int ip)
if (!--t->Value[swz]->NumReaders) {
if (t->Value[swz]->Next)
- decrement_dependencies(s, t->Value[swz]->Next->IP);
+ decrement_dependencies(s, t->Value[swz]->Next->Writer);
}
}
}
@@ -585,36 +527,52 @@ static void emit_all_tex(struct pair_state *s)
// Allocate destination hardware registers in one block to avoid conflicts.
for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
- int ip = pairinst - s->Instructions;
- struct prog_instruction *inst = s->Program->Instructions + ip;
+ struct prog_instruction *inst = &pairinst->Instruction;
if (inst->Opcode != OPCODE_KIL)
get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
}
- if (s->Debug)
+ if (s->Compiler->Base.Debug)
_mesa_printf(" BEGIN_TEX\n");
if (s->Handler->BeginTexBlock)
- s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData);
+ s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->BeginTexBlock(s->UserData);
for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
- int ip = pairinst - s->Instructions;
- struct prog_instruction *inst = s->Program->Instructions + ip;
- commit_instruction(s, ip);
+ struct prog_instruction *inst = &pairinst->Instruction;
+ commit_instruction(s, pairinst);
if (inst->Opcode != OPCODE_KIL)
inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index);
- if (s->Debug) {
+ if (s->Compiler->Base.Debug) {
_mesa_printf(" ");
_mesa_print_instruction(inst);
fflush(stdout);
}
- s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst);
+
+ struct radeon_pair_texture_instruction rpti;
+
+ switch(inst->Opcode) {
+ case OPCODE_TEX: rpti.Opcode = RADEON_OPCODE_TEX; break;
+ case OPCODE_TXB: rpti.Opcode = RADEON_OPCODE_TXB; break;
+ case OPCODE_TXP: rpti.Opcode = RADEON_OPCODE_TXP; break;
+ default:
+ case OPCODE_KIL: rpti.Opcode = RADEON_OPCODE_KIL; break;
+ }
+
+ rpti.DestIndex = inst->DstReg.Index;
+ rpti.WriteMask = inst->DstReg.WriteMask;
+ rpti.TexSrcUnit = inst->TexSrcUnit;
+ rpti.TexSrcTarget = inst->TexSrcTarget;
+ rpti.SrcIndex = inst->SrcReg[0].Index;
+ rpti.SrcSwizzle = inst->SrcReg[0].Swizzle;
+
+ s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitTex(s->UserData, &rpti);
}
- if (s->Debug)
+ if (s->Compiler->Base.Debug)
_mesa_printf(" END_TEX\n");
}
@@ -637,7 +595,7 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio
index = get_hw_reg(s, src.File, src.Index);
} else {
constant = 1;
- s->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index);
+ index = src.Index;
}
for(i = 0; i < 3; ++i) {
@@ -684,10 +642,12 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio
* Fill the given ALU instruction's opcodes and source operands into the given pair,
* if possible.
*/
-static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip)
+static GLboolean fill_instruction_into_pair(
+ struct pair_state *s,
+ struct radeon_pair_instruction *pair,
+ struct pair_state_instruction *pairinst)
{
- struct pair_state_instruction *pairinst = s->Instructions + ip;
- struct prog_instruction *inst = s->Program->Instructions + ip;
+ struct prog_instruction *inst = &pairinst->Instruction;
ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP);
ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP);
@@ -768,16 +728,18 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_
* we are absolutely certain that we're going to emit a certain
* instruction pairing.
*/
-static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip)
+static void fill_dest_into_pair(
+ struct pair_state *s,
+ struct radeon_pair_instruction *pair,
+ struct pair_state_instruction *pairinst)
{
- struct pair_state_instruction *pairinst = s->Instructions + ip;
- struct prog_instruction *inst = s->Program->Instructions + ip;
+ struct prog_instruction *inst = &pairinst->Instruction;
if (inst->DstReg.File == PROGRAM_OUTPUT) {
- if (inst->DstReg.Index == FRAG_RESULT_COLOR) {
+ if (inst->DstReg.Index == s->Compiler->OutputColor) {
pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ;
pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
- } else if (inst->DstReg.Index == FRAG_RESULT_DEPTH) {
+ } else if (inst->DstReg.Index == s->Compiler->OutputDepth) {
pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
}
} else {
@@ -804,24 +766,24 @@ static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruc
static void emit_alu(struct pair_state *s)
{
struct radeon_pair_instruction pair;
+ struct pair_state_instruction *psi;
if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) {
- int ip;
if (s->ReadyFullALU) {
- ip = s->ReadyFullALU - s->Instructions;
+ psi = s->ReadyFullALU;
s->ReadyFullALU = s->ReadyFullALU->NextReady;
} else if (s->ReadyRGB) {
- ip = s->ReadyRGB - s->Instructions;
+ psi = s->ReadyRGB;
s->ReadyRGB = s->ReadyRGB->NextReady;
} else {
- ip = s->ReadyAlpha - s->Instructions;
+ psi = s->ReadyAlpha;
s->ReadyAlpha = s->ReadyAlpha->NextReady;
}
_mesa_bzero(&pair, sizeof(pair));
- fill_instruction_into_pair(s, &pair, ip);
- fill_dest_into_pair(s, &pair, ip);
- commit_instruction(s, ip);
+ fill_instruction_into_pair(s, &pair, psi);
+ fill_dest_into_pair(s, &pair, psi);
+ commit_instruction(s, psi);
} else {
struct pair_state_instruction **prgb;
struct pair_state_instruction **palpha;
@@ -830,65 +792,65 @@ static void emit_alu(struct pair_state *s)
* many source slots; try all possible pairings if necessary */
for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
- int rgbip = *prgb - s->Instructions;
- int alphaip = *palpha - s->Instructions;
+ struct pair_state_instruction * psirgb = *prgb;
+ struct pair_state_instruction * psialpha = *palpha;
_mesa_bzero(&pair, sizeof(pair));
- fill_instruction_into_pair(s, &pair, rgbip);
- if (!fill_instruction_into_pair(s, &pair, alphaip))
+ fill_instruction_into_pair(s, &pair, psirgb);
+ if (!fill_instruction_into_pair(s, &pair, psialpha))
continue;
*prgb = (*prgb)->NextReady;
*palpha = (*palpha)->NextReady;
- fill_dest_into_pair(s, &pair, rgbip);
- fill_dest_into_pair(s, &pair, alphaip);
- commit_instruction(s, rgbip);
- commit_instruction(s, alphaip);
+ fill_dest_into_pair(s, &pair, psirgb);
+ fill_dest_into_pair(s, &pair, psialpha);
+ commit_instruction(s, psirgb);
+ commit_instruction(s, psialpha);
goto success;
}
}
/* No success in pairing; just take the first RGB instruction */
- int ip = s->ReadyRGB - s->Instructions;
+ psi = s->ReadyRGB;
s->ReadyRGB = s->ReadyRGB->NextReady;
+
_mesa_bzero(&pair, sizeof(pair));
- fill_instruction_into_pair(s, &pair, ip);
- fill_dest_into_pair(s, &pair, ip);
- commit_instruction(s, ip);
+ fill_instruction_into_pair(s, &pair, psi);
+ fill_dest_into_pair(s, &pair, psi);
+ commit_instruction(s, psi);
success: ;
}
- if (s->Debug)
+ if (s->Compiler->Base.Debug)
radeonPrintPairInstruction(&pair);
- s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair);
+ s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitPaired(s->UserData, &pair);
}
+/* Callback function for assigning input registers to hardware registers */
+static void alloc_helper(void * data, unsigned input, unsigned hwreg)
+{
+ struct pair_state * s = data;
+ alloc_hw_reg(s, PROGRAM_INPUT, input, hwreg);
+}
-GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
+void radeonPairProgram(
+ struct r300_fragment_program_compiler * compiler,
const struct radeon_pair_handler* handler, void *userdata)
{
struct pair_state s;
_mesa_bzero(&s, sizeof(s));
- s.Ctx = ctx;
- s.Program = _mesa_clone_program(ctx, program);
+ s.Compiler = compiler;
s.Handler = handler;
s.UserData = userdata;
- s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE;
- s.Verbose = GL_FALSE && s.Debug;
+ s.Verbose = GL_FALSE && s.Compiler->Base.Debug;
- s.Instructions = (struct pair_state_instruction*)_mesa_calloc(
- sizeof(struct pair_state_instruction)*s.Program->NumInstructions);
- s.ValuePool = (struct reg_value*)_mesa_calloc(sizeof(struct reg_value)*s.Program->NumInstructions*4);
- s.ReaderPool = (struct reg_value_reader*)_mesa_calloc(
- sizeof(struct reg_value_reader)*s.Program->NumInstructions*12);
-
- if (s.Debug)
+ if (s.Compiler->Base.Debug)
_mesa_printf("Emit paired program\n");
scan_instructions(&s);
- allocate_input_registers(&s);
+ s.Compiler->AllocateHwInputs(s.Compiler, &alloc_helper, &s);
- while(!s.Error &&
+ while(!s.Compiler->Base.Error &&
(s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
if (s.ReadyTEX)
emit_all_tex(&s);
@@ -897,16 +859,8 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
emit_alu(&s);
}
- if (s.Debug)
+ if (s.Compiler->Base.Debug)
_mesa_printf(" END\n");
-
- _mesa_free(s.Instructions);
- _mesa_free(s.ValuePool);
- _mesa_free(s.ReaderPool);
-
- _mesa_reference_program(ctx, &s.Program, NULL);
-
- return !s.Error;
}
diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index 4624a246298..ff761785518 100644
--- a/src/mesa/drivers/dri/r300/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -30,6 +30,8 @@
#include "radeon_program.h"
+struct r300_fragment_program_compiler;
+
/**
* Represents a paired instruction, as found in R300 and R500
@@ -82,18 +84,32 @@ struct radeon_pair_instruction {
};
+enum {
+ RADEON_OPCODE_TEX = 0,
+ RADEON_OPCODE_TXB,
+ RADEON_OPCODE_TXP,
+ RADEON_OPCODE_KIL
+};
+
+struct radeon_pair_texture_instruction {
+ GLuint Opcode:2; /**< one of RADEON_OPCODE_xxx */
+
+ GLuint DestIndex:8;
+ GLuint WriteMask:4;
+
+ GLuint TexSrcUnit:5;
+ GLuint TexSrcTarget:3;
+
+ GLuint SrcIndex:8;
+ GLuint SrcSwizzle:12;
+};
+
+
/**
*
*/
struct radeon_pair_handler {
/**
- * Fill in the proper hardware index for the given constant register.
- *
- * @return GL_FALSE on error.
- */
- GLboolean (*EmitConst)(void*, GLuint file, GLuint index, GLuint *hwindex);
-
- /**
* Write a paired instruction to the hardware.
*
* @return GL_FALSE on error.
@@ -107,7 +123,7 @@ struct radeon_pair_handler {
*
* @return GL_FALSE on error.
*/
- GLboolean (*EmitTex)(void*, struct prog_instruction*);
+ GLboolean (*EmitTex)(void*, struct radeon_pair_texture_instruction*);
/**
* Called before a block of contiguous, independent texture
@@ -115,10 +131,11 @@ struct radeon_pair_handler {
*/
GLboolean (*BeginTexBlock)(void*);
- GLuint MaxHwTemps;
+ unsigned MaxHwTemps;
};
-GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
+void radeonPairProgram(
+ struct r300_fragment_program_compiler * compiler,
const struct radeon_pair_handler*, void *userdata);
void radeonPrintPairInstruction(struct radeon_pair_instruction *inst);
diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
index af535037d06..bd46f9acf2e 100644
--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c
+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
@@ -164,47 +164,46 @@ static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom)
r300ContextPtr r300 = R300_CONTEXT(ctx);
BATCH_LOCALS(&r300->radeon);
int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd);
- int notexture = 0;
-
- if (numtmus) {
- int i;
-
- for(i = 0; i < numtmus; ++i) {
- radeonTexObj *t = r300->hw.textures[i];
-
- if (!t)
- notexture = 1;
- }
-
- if (r300->radeon.radeonScreen->kernel_mm && notexture) {
- return;
- }
- for(i = 0; i < numtmus; ++i) {
- radeonTexObj *t = r300->hw.textures[i];
- if (t && !t->image_override) {
- BEGIN_BATCH_NO_AUTOSTATE(4);
- OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
- OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
- RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
- END_BATCH();
- } else if (!t) {
- /* Texture unit hasn't a texture bound nothings to do */
- } else { /* override cases */
- if (t->bo) {
- BEGIN_BATCH_NO_AUTOSTATE(4);
- OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
- OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
- RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
- END_BATCH();
- } else if (!r300->radeon.radeonScreen->kernel_mm) {
- BEGIN_BATCH_NO_AUTOSTATE(2);
- OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
- OUT_BATCH(t->override_offset);
- END_BATCH();
- } else {
- /* Texture unit hasn't a texture bound nothings to do */
- }
- }
+ int i;
+
+ for(i = 0; i < numtmus; ++i) {
+ radeonTexObj *t = r300->hw.textures[i];
+ if (t && !t->image_override) {
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
+ OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ END_BATCH();
+ } else if (!t) {
+ /* Texture unit hasn't a texture bound.
+ * We assign the current color buffer as a fakery to make
+ * KIL work on KMS (without it, the CS checker will complain).
+ */
+ if (r300->radeon.radeonScreen->kernel_mm) {
+ struct radeon_renderbuffer *rrb = radeon_get_colorbuffer(&r300->radeon);
+ if (rrb && rrb->bo) {
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
+ OUT_BATCH_RELOC(0, rrb->bo, 0,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ END_BATCH();
+ }
+ }
+ } else { /* override cases */
+ if (t->bo) {
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
+ OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ END_BATCH();
+ } else if (!r300->radeon.radeonScreen->kernel_mm) {
+ BEGIN_BATCH_NO_AUTOSTATE(2);
+ OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
+ OUT_BATCH(t->override_offset);
+ END_BATCH();
+ } else {
+ /* Texture unit hasn't a texture bound nothings to do */
+ }
}
}
}
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index 6f3aab986d2..91fa77a1690 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -64,9 +64,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_ioctl.h"
#include "r300_tex.h"
#include "r300_emit.h"
-#include "r300_render.h"
+#include "r300_queryobj.h"
#include "r300_swtcl.h"
#include "radeon_bocs_wrapper.h"
+#include "radeon_buffer_objects.h"
#include "vblank.h"
@@ -74,6 +75,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "xmlpool.h" /* for symbolic values of enum-type options */
#define need_GL_VERSION_2_0
+#define need_GL_ARB_occlusion_query
#define need_GL_ARB_point_parameters
#define need_GL_ARB_vertex_program
#define need_GL_EXT_blend_equation_separate
@@ -94,6 +96,7 @@ const struct dri_extension card_extensions[] = {
/* *INDENT-OFF* */
{"GL_ARB_depth_texture", NULL},
{"GL_ARB_fragment_program", NULL},
+ {"GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions},
{"GL_ARB_multitexture", NULL},
{"GL_ARB_point_parameters", GL_ARB_point_parameters_functions},
{"GL_ARB_shadow", NULL},
@@ -154,7 +157,6 @@ const struct dri_extension gl_20_extension[] = {
};
static const struct tnl_pipeline_stage *r300_pipeline[] = {
-
/* Catch any t&l fallbacks
*/
&_tnl_vertex_transform_stage,
@@ -165,21 +167,7 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = {
&_tnl_texture_transform_stage,
&_tnl_point_attenuation_stage,
&_tnl_vertex_program_stage,
-
- /* Try again to go to tcl?
- * - no good for asymmetric-twoside (do with multipass)
- * - no good for asymmetric-unfilled (do with multipass)
- * - good for material
- * - good for texgen
- * - need to manipulate a bit of state
- *
- * - worth it/not worth it?
- */
-
- /* Else do them here.
- */
- &_r300_render_stage,
- &_tnl_render_stage, /* FALLBACK */
+ &_tnl_render_stage,
0,
};
@@ -325,6 +313,11 @@ static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
ctx->Const.FragmentProgram.MaxNativeTexIndirections = R300_PFS_MAX_TEX_INDIRECT;
ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
}
+
+ if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530)
+ r300->num_z_pipes = 2;
+ else
+ r300->num_z_pipes = r300->radeon.radeonScreen->num_gb_pipes;
}
static void r300ParseOptions(r300ContextPtr r300, radeonScreenPtr screen)
@@ -367,6 +360,11 @@ static void r300InitGLExtensions(GLcontext *ctx)
} else if (r300->options.s3tc_force_disabled) {
_mesa_disable_extension(ctx, "GL_EXT_texture_compression_s3tc");
}
+
+ if (!r300->radeon.radeonScreen->drmSupportsOcclusionQueries ||
+ !r300->options.hw_tcl_enabled) {
+ _mesa_disable_extension(ctx, "GL_ARB_occlusion_query");
+ }
}
/* Create the device specific rendering context.
@@ -398,6 +396,8 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
r300InitStateFuncs(&functions);
r300InitTextureFuncs(&functions);
r300InitShaderFuncs(&functions);
+ r300InitQueryObjFunctions(&functions);
+ radeonInitBufferObjectFuncs(&functions);
if (!radeonInitContext(&r300->radeon, &functions,
glVisual, driContextPriv,
@@ -451,13 +451,10 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
r300InitState(r300);
r300InitShaderFunctions(r300);
- if (screen->chip_family == CHIP_FAMILY_RS600 || screen->chip_family == CHIP_FAMILY_RS690 ||
- screen->chip_family == CHIP_FAMILY_RS740) {
- r300->radeon.texture_row_align = 64;
- }
-
r300InitGLExtensions(ctx);
+ make_empty_list(&r300->query.not_flushed_head);
+
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index f7af7d4e574..3ba34266084 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/mtypes.h"
#include "shader/prog_instruction.h"
+#include "compiler/radeon_code.h"
struct r300_context;
typedef struct r300_context r300ContextRec;
@@ -389,46 +390,25 @@ struct r300_hw_state {
/* Vertex shader state */
-/* Perhaps more if we store programs in vmem? */
-/* drm_r300_cmd_header_t->vpu->count is unsigned char */
-#define VSF_MAX_FRAGMENT_LENGTH (255*4)
-
-/* Can be tested with colormat currently. */
-#define VSF_MAX_FRAGMENT_TEMPS (14)
-
-#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
-#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1)
-
#define COLOR_IS_RGBA
#define TAG(x) r300##x
#include "tnl_dd/t_dd_vertex.h"
#undef TAG
+struct r300_vertex_program_key {
+ GLbitfield FpReads;
+ GLuint FogAttr;
+ GLuint WPosAttr;
+};
+
struct r300_vertex_program {
struct gl_vertex_program *Base;
struct r300_vertex_program *next;
- struct r300_vertex_program_key {
- GLuint FpReads;
- GLuint FogAttr;
- GLuint WPosAttr;
- } key;
-
- struct r300_vertex_shader_hw_code {
- int length;
- union {
- GLuint d[VSF_MAX_FRAGMENT_LENGTH];
- float f[VSF_MAX_FRAGMENT_LENGTH];
- } body;
- } hw_code;
-
- GLboolean translated;
- GLboolean error;
+ struct r300_vertex_program_key key;
+ struct r300_vertex_program_code code;
- int pos_end;
- int num_temporaries; /* Number of temp vars used by program */
- int inputs[VERT_ATTRIB_MAX];
- int outputs[VERT_RESULT_MAX];
+ GLboolean error;
};
struct r300_vertex_program_cont {
@@ -441,131 +421,18 @@ struct r300_vertex_program_cont {
struct r300_vertex_program *progs;
};
-#define R300_PFS_MAX_ALU_INST 64
-#define R300_PFS_MAX_TEX_INST 32
-#define R300_PFS_MAX_TEX_INDIRECT 4
-#define R300_PFS_NUM_TEMP_REGS 32
-#define R300_PFS_NUM_CONST_REGS 32
-
-#define R500_PFS_MAX_INST 512
-#define R500_PFS_NUM_TEMP_REGS 128
-#define R500_PFS_NUM_CONST_REGS 256
-
-struct r300_pfs_compile_state;
-struct r500_pfs_compile_state;
-
-/**
- * Stores state that influences the compilation of a fragment program.
- */
-struct r300_fragment_program_external_state {
- struct {
- /**
- * If the sampler is used as a shadow sampler,
- * this field is:
- * 0 - GL_LUMINANCE
- * 1 - GL_INTENSITY
- * 2 - GL_ALPHA
- * depending on the depth texture mode.
- */
- GLuint depth_texture_mode : 2;
-
- /**
- * If the sampler is used as a shadow sampler,
- * this field is (texture_compare_func - GL_NEVER).
- * [e.g. if compare function is GL_LEQUAL, this field is 3]
- *
- * Otherwise, this field is 0.
- */
- GLuint texture_compare_func : 3;
- } unit[16];
-};
-
-
-struct r300_fragment_program_node {
- int tex_offset; /**< first tex instruction */
- int tex_end; /**< last tex instruction, relative to tex_offset */
- int alu_offset; /**< first ALU instruction */
- int alu_end; /**< last ALU instruction, relative to alu_offset */
- int flags;
-};
-
-/**
- * Stores an R300 fragment program in its compiled-to-hardware form.
- */
-struct r300_fragment_program_code {
- struct {
- int length; /**< total # of texture instructions used */
- GLuint inst[R300_PFS_MAX_TEX_INST];
- } tex;
-
- struct {
- int length; /**< total # of ALU instructions used */
- struct {
- GLuint inst0;
- GLuint inst1;
- GLuint inst2;
- GLuint inst3;
- } inst[R300_PFS_MAX_ALU_INST];
- } alu;
-
- struct r300_fragment_program_node node[4];
- int cur_node;
- int first_node_has_tex;
-
- /**
- * Remember which program register a given hardware constant
- * belongs to.
- */
- struct prog_src_register constant[R300_PFS_NUM_CONST_REGS];
- int const_nr;
-
- int max_temp_idx;
-};
-
-
-struct r500_fragment_program_code {
- struct {
- GLuint inst0;
- GLuint inst1;
- GLuint inst2;
- GLuint inst3;
- GLuint inst4;
- GLuint inst5;
- } inst[R500_PFS_MAX_INST];
-
- int inst_offset;
- int inst_end;
-
- /**
- * Remember which program register a given hardware constant
- * belongs to.
- */
- struct prog_src_register constant[R500_PFS_NUM_CONST_REGS];
- int const_nr;
-
- int max_temp_idx;
-};
/**
* Store everything about a fragment program that is needed
* to render with that program.
*/
struct r300_fragment_program {
- struct gl_program *Base;
-
- GLboolean translated;
GLboolean error;
-
+ struct r300_fragment_program *next;
struct r300_fragment_program_external_state state;
- union rX00_fragment_program_code {
- struct r300_fragment_program_code r300;
- struct r500_fragment_program_code r500;
- } code;
- GLboolean writes_depth;
- GLuint optimization;
-
- struct r300_fragment_program *next;
+ struct rX00_fragment_program_code code;
+ GLbitfield InputsRead;
/* attribute that we are sending the WPOS in */
gl_frag_attrib wpos_attr;
@@ -583,12 +450,6 @@ struct r300_fragment_program_cont {
struct r300_fragment_program *progs;
};
-struct r300_fragment_program_compiler {
- r300ContextPtr r300;
- struct r300_fragment_program *fp;
- union rX00_fragment_program_code *code;
- struct gl_program *program;
-};
#define R300_MAX_AOS_ARRAYS 16
@@ -610,8 +471,6 @@ struct r300_swtcl_info {
struct r300_vtable {
void (* SetupRSUnit)(GLcontext *ctx);
void (* SetupFragmentShaderTextures)(GLcontext *ctx, int *tmu_mappings);
- GLboolean (* BuildFragmentProgramHwCode)(struct r300_fragment_program_compiler *compiler);
- void (* FragmentProgramDump)(union rX00_fragment_program_code *code);
void (* SetupPixelShader)(GLcontext *ctx);
};
@@ -619,11 +478,12 @@ struct r300_vertex_buffer {
struct vertex_attribute {
/* generic */
GLubyte element;
- GLvoid *data;
- GLboolean free_needed;
GLuint stride;
GLuint dwords;
GLubyte size; /* number of components */
+ GLboolean is_named_bo;
+ struct radeon_bo *bo;
+ GLint bo_offset;
/* hw specific */
uint32_t data_type:4;
@@ -638,12 +498,23 @@ struct r300_vertex_buffer {
};
struct r300_index_buffer {
- GLvoid *ptr;
+ struct radeon_bo *bo;
+ int bo_offset;
+
GLboolean is_32bit;
- GLboolean free_needed;
GLuint count;
};
+struct r300_query_object {
+ struct gl_query_object Base;
+ struct radeon_bo *bo;
+ int curr_offset;
+ GLboolean emitted_begin;
+
+ /* Double linked list of not flushed query objects */
+ struct r300_query_object *prev, *next;
+};
+
/**
* \brief R300 context structure.
*/
@@ -669,7 +540,7 @@ struct r300_context {
uint32_t s3tc_force_disabled:1;
uint32_t stencil_two_side_disabled:1;
} options;
-
+
struct r300_swtcl_info swtcl;
struct r300_vertex_buffer vbuf;
struct r300_index_buffer ind_buf;
@@ -678,6 +549,13 @@ struct r300_context {
uint32_t fallback;
DECLARE_RENDERINPUTS(render_inputs_bitset);
+
+ struct {
+ struct r300_query_object *current;
+ struct r300_query_object not_flushed_head;
+ } query;
+
+ int num_z_pipes;
};
#define R300_CONTEXT(ctx) ((r300ContextPtr)(ctx->DriverCtx))
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
index 9769ff53994..d524d602998 100644
--- a/src/mesa/drivers/dri/r300/r300_draw.c
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -36,41 +36,67 @@
#include "r300_context.h"
#include "r300_emit.h"
#include "r300_render.h"
+#include "r300_queryobj.h"
#include "r300_state.h"
#include "r300_tex.h"
+#include "radeon_buffer_objects.h"
+
#include "tnl/tnl.h"
#include "tnl/t_vp_build.h"
#include "vbo/vbo_context.h"
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
-static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf, struct gl_buffer_object **bo, GLuint *nr_bo)
+
+static int getTypeSize(GLenum type)
+{
+ switch (type) {
+ case GL_DOUBLE:
+ return sizeof(GLdouble);
+ case GL_FLOAT:
+ return sizeof(GLfloat);
+ case GL_INT:
+ return sizeof(GLint);
+ case GL_UNSIGNED_INT:
+ return sizeof(GLuint);
+ case GL_SHORT:
+ return sizeof(GLshort);
+ case GL_UNSIGNED_SHORT:
+ return sizeof(GLushort);
+ case GL_BYTE:
+ return sizeof(GLbyte);
+ case GL_UNSIGNED_BYTE:
+ return sizeof(GLubyte);
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
- struct r300_index_buffer *ind_buf = &r300->ind_buf;
GLvoid *src_ptr;
+ GLuint *out;
+ int i;
+ GLboolean mapped_named_bo = GL_FALSE;
- if (!mesa_ind_buf) {
- ind_buf->ptr = NULL;
- return;
- }
-
- ind_buf->count = mesa_ind_buf->count;
if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
- bo[*nr_bo] = mesa_ind_buf->obj;
- (*nr_bo)++;
ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ mapped_named_bo = GL_TRUE;
assert(mesa_ind_buf->obj->Pointer != NULL);
}
src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) {
+ GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
GLubyte *in = (GLubyte *)src_ptr;
- GLuint *out = _mesa_malloc(sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1));
- int i;
- ind_buf->ptr = out;
+ radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4);
+
+ assert(r300->ind_buf.bo->ptr != NULL);
+ out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset);
for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) {
*out++ = in[i] | in[i + 1] << 16;
@@ -80,16 +106,16 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
*out++ = in[i];
}
- ind_buf->free_needed = GL_TRUE;
- ind_buf->is_32bit = GL_FALSE;
- } else if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) {
#if MESA_BIG_ENDIAN
+ } else { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */
+ GLuint size;
GLushort *in = (GLushort *)src_ptr;
- GLuint *out = _mesa_malloc(sizeof(GLushort) *
- ((mesa_ind_buf->count + 1) & ~1));
- int i;
+ size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
+
+ radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offet, size, 4);
- ind_buf->ptr = out;
+ assert(r300->ind_buf.bo->ptr != NULL)
+ out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset);
for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) {
*out++ = in[i] | in[i + 1] << 16;
@@ -98,42 +124,60 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
if (i < mesa_ind_buf->count) {
*out++ = in[i];
}
-
- ind_buf->free_needed = GL_TRUE;
-#else
- ind_buf->ptr = src_ptr;
- ind_buf->free_needed = GL_FALSE;
#endif
- ind_buf->is_32bit = GL_FALSE;
- } else {
- ind_buf->ptr = src_ptr;
- ind_buf->free_needed = GL_FALSE;
- ind_buf->is_32bit = GL_TRUE;
+ }
+
+ r300->ind_buf.is_32bit = GL_FALSE;
+ r300->ind_buf.count = mesa_ind_buf->count;
+
+ if (mapped_named_bo) {
+ ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
}
}
-static int getTypeSize(GLenum type)
+
+static void r300SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
{
- switch (type) {
- case GL_DOUBLE:
- return sizeof(GLdouble);
- case GL_FLOAT:
- return sizeof(GLfloat);
- case GL_INT:
- return sizeof(GLint);
- case GL_UNSIGNED_INT:
- return sizeof(GLuint);
- case GL_SHORT:
- return sizeof(GLshort);
- case GL_UNSIGNED_SHORT:
- return sizeof(GLushort);
- case GL_BYTE:
- return sizeof(GLbyte);
- case GL_UNSIGNED_BYTE:
- return sizeof(GLubyte);
- default:
- assert(0);
- return 0;
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ if (!mesa_ind_buf) {
+ r300->ind_buf.bo = NULL;
+ return;
+ }
+
+#if MESA_BIG_ENDIAN
+ if (mesa_ind_buf->type == GL_UNSIGNED_INT) {
+#else
+ if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) {
+#endif
+ const GLvoid *src_ptr;
+ GLvoid *dst_ptr;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
+ ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ assert(mesa_ind_buf->obj->Pointer != NULL);
+ mapped_named_bo = GL_TRUE;
+ }
+
+ src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
+
+ const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type);
+
+ radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4);
+
+ assert(r300->ind_buf.bo->ptr != NULL);
+ dst_ptr = ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset);
+ _mesa_memcpy(dst_ptr, src_ptr, size);
+
+ r300->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT);
+ r300->ind_buf.count = mesa_ind_buf->count;
+
+ if (mapped_named_bo) {
+ ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ }
+ } else {
+ r300FixupIndexBuffer(ctx, mesa_ind_buf);
}
}
@@ -161,26 +205,118 @@ static int getTypeSize(GLenum type)
} \
} while (0)
-static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const struct gl_client_array *input, struct gl_buffer_object **bo, GLuint *nr_bo)
+/**
+ * Convert attribute data type to float
+ * If the attribute uses named buffer object replace the bo with newly allocated bo
+ */
+static void r300ConvertAttrib(GLcontext *ctx, int count, const struct gl_client_array *input, struct vertex_attribute *attr)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
- struct r300_vertex_buffer *vbuf = &r300->vbuf;
- struct vertex_attribute r300_attr;
- const void *src_ptr;
- GLenum type;
+ const GLvoid *src_ptr;
+ GLboolean mapped_named_bo = GL_FALSE;
+ GLfloat *dst_ptr;
GLuint stride;
+ stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB;
+
+ /* Convert value for first element only */
+ if (input->StrideB == 0)
+ count = 1;
+
if (input->BufferObj->Name) {
if (!input->BufferObj->Pointer) {
- bo[*nr_bo] = input->BufferObj;
- (*nr_bo)++;
ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
- assert(input->BufferObj->Pointer != NULL);
+ mapped_named_bo = GL_TRUE;
}
src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
- } else
+ } else {
src_ptr = input->Ptr;
+ }
+
+ radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, sizeof(GLfloat) * input->Size * count, 32);
+ dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
+
+ if (RADEON_DEBUG & DEBUG_FALLBACKS) {
+ fprintf(stderr, "%s: Converting vertex attributes, attribute data format %x,", __FUNCTION__, input->Type);
+ fprintf(stderr, "stride %d, components %d\n", stride, input->Size);
+ }
+
+ assert(src_ptr != NULL);
+
+ switch (input->Type) {
+ case GL_DOUBLE:
+ CONVERT(GLdouble, (GLfloat));
+ break;
+ case GL_UNSIGNED_INT:
+ CONVERT(GLuint, UINT_TO_FLOAT);
+ break;
+ case GL_INT:
+ CONVERT(GLint, INT_TO_FLOAT);
+ break;
+ case GL_UNSIGNED_SHORT:
+ CONVERT(GLushort, USHORT_TO_FLOAT);
+ break;
+ case GL_SHORT:
+ CONVERT(GLshort, SHORT_TO_FLOAT);
+ break;
+ case GL_UNSIGNED_BYTE:
+ assert(input->Format != GL_BGRA);
+ CONVERT(GLubyte, UBYTE_TO_FLOAT);
+ break;
+ case GL_BYTE:
+ CONVERT(GLbyte, BYTE_TO_FLOAT);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ if (mapped_named_bo) {
+ ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ }
+}
+
+static void r300AlignDataToDword(GLcontext *ctx, const struct gl_client_array *input, int count, struct vertex_attribute *attr)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ const int dst_stride = (input->StrideB + 3) & ~3;
+ const int size = getTypeSize(input->Type) * input->Size * count;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, size, 32);
+
+ if (!input->BufferObj->Pointer) {
+ ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ mapped_named_bo = GL_TRUE;
+ }
+
+ {
+ GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
+ GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
+ int i;
+
+ for (i = 0; i < count; ++i) {
+ _mesa_memcpy(dst_ptr, src_ptr, input->StrideB);
+ src_ptr += input->StrideB;
+ dst_ptr += dst_stride;
+ }
+ }
+
+ if (mapped_named_bo) {
+ ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ }
+
+ attr->stride = dst_stride;
+}
+
+static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const struct gl_client_array *input)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_vertex_buffer *vbuf = &r300->vbuf;
+ struct vertex_attribute r300_attr;
+ GLenum type;
+ GLuint stride;
stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB;
@@ -189,62 +325,57 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st
getTypeSize(input->Type) != 4 ||
#endif
stride < 4) {
- if (RADEON_DEBUG & DEBUG_FALLBACKS) {
- fprintf(stderr, "%s: Converting vertex attributes, attribute data format %x,", __FUNCTION__, input->Type);
- fprintf(stderr, "stride %d, components %d\n", stride, input->Size);
- }
-
- GLfloat *dst_ptr, *tmp;
-
- /* Convert value for first element only */
- if (input->StrideB == 0)
- count = 1;
-
- tmp = dst_ptr = _mesa_malloc(sizeof(GLfloat) * input->Size * count);
-
- switch (input->Type) {
- case GL_DOUBLE:
- CONVERT(GLdouble, (GLfloat));
- break;
- case GL_UNSIGNED_INT:
- CONVERT(GLuint, UINT_TO_FLOAT);
- break;
- case GL_INT:
- CONVERT(GLint, INT_TO_FLOAT);
- break;
- case GL_UNSIGNED_SHORT:
- CONVERT(GLushort, USHORT_TO_FLOAT);
- break;
- case GL_SHORT:
- CONVERT(GLshort, SHORT_TO_FLOAT);
- break;
- case GL_UNSIGNED_BYTE:
- assert(input->Format != GL_BGRA);
- CONVERT(GLubyte, UBYTE_TO_FLOAT);
- break;
- case GL_BYTE:
- CONVERT(GLbyte, BYTE_TO_FLOAT);
- break;
- default:
- assert(0);
- break;
- }
type = GL_FLOAT;
- r300_attr.free_needed = GL_TRUE;
- r300_attr.data = tmp;
+
+ r300ConvertAttrib(ctx, count, input, &r300_attr);
if (input->StrideB == 0) {
r300_attr.stride = 0;
} else {
r300_attr.stride = sizeof(GLfloat) * input->Size;
}
r300_attr.dwords = input->Size;
+ r300_attr.is_named_bo = GL_FALSE;
} else {
type = input->Type;
- r300_attr.free_needed = GL_FALSE;
- r300_attr.data = (GLvoid *)src_ptr;
- r300_attr.stride = input->StrideB;
- r300_attr.dwords = (getTypeSize(type) * input->Size + 3)/ 4;
+ r300_attr.dwords = (getTypeSize(type) * input->Size + 3)/ 4;
+ if (input->BufferObj->Name) {
+ if (stride % 4 != 0) {
+ assert(((int) input->Ptr) % input->StrideB == 0);
+ r300AlignDataToDword(ctx, input, count, &r300_attr);
+ r300_attr.is_named_bo = GL_FALSE;
+ } else {
+ r300_attr.stride = input->StrideB;
+ r300_attr.bo_offset = (GLuint) input->Ptr;
+ r300_attr.bo = get_radeon_buffer_object(input->BufferObj)->bo;
+ r300_attr.is_named_bo = GL_TRUE;
+ }
+ } else {
+ int size;
+ uint32_t *dst;
+
+ if (input->StrideB == 0) {
+ size = getTypeSize(input->Type) * input->Size;
+ count = 1;
+ r300_attr.stride = 0;
+ } else {
+ size = getTypeSize(input->Type) * input->Size * count;
+ r300_attr.stride = (getTypeSize(type) * input->Size + 3) & ~3;
+ }
+
+ radeonAllocDmaRegion(&r300->radeon, &r300_attr.bo, &r300_attr.bo_offset, size, 32);
+ assert(r300_attr.bo->ptr != NULL);
+ dst = (uint32_t *)ADD_POINTERS(r300_attr.bo->ptr, r300_attr.bo_offset);
+ switch (r300_attr.dwords) {
+ case 1: radeonEmitVec4(dst, input->Ptr, input->StrideB, count); break;
+ case 2: radeonEmitVec8(dst, input->Ptr, input->StrideB, count); break;
+ case 3: radeonEmitVec12(dst, input->Ptr, input->StrideB, count); break;
+ case 4: radeonEmitVec16(dst, input->Ptr, input->StrideB, count); break;
+ default: assert(0); break;
+ }
+
+ r300_attr.is_named_bo = GL_FALSE;
+ }
}
r300_attr.size = input->Size;
@@ -333,15 +464,15 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st
++vbuf->num_attribs;
}
-static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count, struct gl_buffer_object **bo, GLuint *nr_bo)
+static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
struct r300_vertex_buffer *vbuf = &r300->vbuf;
-
+ int ret;
{
int i, tmp;
- tmp = r300->selected_vp->Base->Base.InputsRead;
+ tmp = r300->selected_vp->code.InputsRead;
i = 0;
vbuf->num_attribs = 0;
while (tmp) {
@@ -351,7 +482,7 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar
++i;
}
- r300TranslateAttrib(ctx, i, count, arrays[i], bo, nr_bo);
+ r300TranslateAttrib(ctx, i, count, arrays[i]);
tmp >>= 1;
++i;
@@ -366,38 +497,47 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar
int i;
for (i = 0; i < vbuf->num_attribs; i++) {
- rcommon_emit_vector(ctx, &r300->radeon.tcl.aos[i],
- vbuf->attribs[i].data, vbuf->attribs[i].dwords,
- vbuf->attribs[i].stride, count);
+ struct radeon_aos *aos = &r300->radeon.tcl.aos[i];
+
+ aos->count = vbuf->attribs[i].stride == 0 ? 1 : count;
+ aos->stride = vbuf->attribs[i].stride / sizeof(float);
+ aos->offset = vbuf->attribs[i].bo_offset;
+ aos->components = vbuf->attribs[i].dwords;
+ aos->bo = vbuf->attribs[i].bo;
+
+ if (vbuf->attribs[i].is_named_bo) {
+ radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, r300->vbuf.attribs[i].bo, RADEON_GEM_DOMAIN_GTT, 0);
+ }
}
r300->radeon.tcl.aos_count = vbuf->num_attribs;
+ ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, r300->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0);
+ if (ret)
+ r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, GL_TRUE);
}
}
-static void r300FreeData(GLcontext *ctx, struct gl_buffer_object **bo, GLuint nr_bo)
+static void r300FreeData(GLcontext *ctx)
{
+ /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo
+ * to prevent double unref in radeonReleaseArrays
+ * called during context destroy
+ */
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
{
- struct r300_vertex_buffer *vbuf = &R300_CONTEXT(ctx)->vbuf;
int i;
- for (i = 0; i < vbuf->num_attribs; i++) {
- if (vbuf->attribs[i].free_needed)
- _mesa_free(vbuf->attribs[i].data);
+ for (i = 0; i < r300->vbuf.num_attribs; i++) {
+ if (!r300->vbuf.attribs[i].is_named_bo) {
+ radeon_bo_unref(r300->vbuf.attribs[i].bo);
+ }
+ r300->radeon.tcl.aos[i].bo = NULL;
}
}
{
- struct r300_index_buffer *ind_buf = &R300_CONTEXT(ctx)->ind_buf;
- if (ind_buf->free_needed)
- _mesa_free(ind_buf->ptr);
- }
-
- {
- int i;
-
- for (i = 0; i < nr_bo; ++i) {
- ctx->Driver.UnmapBuffer(ctx, 0, bo[i]);
+ if (r300->ind_buf.bo != NULL) {
+ radeon_bo_unref(r300->ind_buf.bo);
}
}
}
@@ -411,8 +551,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx,
GLuint max_index )
{
struct r300_context *r300 = R300_CONTEXT(ctx);
- struct gl_buffer_object *bo[VERT_ATTRIB_MAX+1];
- GLuint i, nr_bo = 0;
+ GLuint i;
if (ctx->NewState)
_mesa_update_state( ctx );
@@ -424,35 +563,37 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx,
r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, !r300ValidateBuffers(ctx));
- r300FixupIndexBuffer(ctx, ib, bo, &nr_bo);
-
/* ensure we have the cmd buf space in advance to cover
* the state + DMA AOS pointers */
rcommonEnsureCmdBufSpace(&r300->radeon,
- r300->radeon.hw.max_state_size + (50*sizeof(int)),
- __FUNCTION__);
+ r300->radeon.hw.max_state_size + (60*sizeof(int)),
+ __FUNCTION__);
+
+ r300SetupIndexBuffer(ctx, ib);
- r300SetVertexFormat(ctx, arrays, max_index + 1, bo, &nr_bo);
+ r300SetVertexFormat(ctx, arrays, max_index + 1);
if (r300->fallback)
return GL_FALSE;
- r300SetupVAP(ctx, r300->selected_vp->Base->Base.InputsRead, r300->selected_vp->Base->Base.OutputsWritten);
+ r300SetupVAP(ctx, r300->selected_vp->code.InputsRead, r300->selected_vp->code.OutputsWritten);
r300UpdateShaderStates(r300);
r300EmitCacheFlush(r300);
radeonEmitState(&r300->radeon);
+ r300EmitQueryBegin(ctx);
+
for (i = 0; i < nr_prims; ++i) {
r300RunRenderPrimitive(ctx, prim[i].start, prim[i].start + prim[i].count, prim[i].mode);
}
r300EmitCacheFlush(r300);
- radeonReleaseArrays(ctx, ~0);
+ r300EmitQueryEnd(ctx);
- r300FreeData(ctx, bo, nr_bo);
+ r300FreeData(ctx);
return GL_TRUE;
}
@@ -462,28 +603,23 @@ static void r300DrawPrims(GLcontext *ctx,
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
GLuint min_index,
GLuint max_index)
{
- struct split_limits limits;
GLboolean retval;
- if (ib)
- limits.max_verts = 0xffffffff;
- else
- limits.max_verts = 65535;
-
- limits.max_indices = 65535;
- limits.max_vb_size = 1024*1024;
+ /* This check should get folded into just the places that
+ * min/max index are really needed.
+ */
+ if (!index_bounds_valid) {
+ vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+ }
if (min_index) {
vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims );
return;
}
- if ((ib && ib->count > 65535)) {
- vbo_split_prims (ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims, &limits);
- return;
- }
/* Make an attempt at drawing */
retval = r300TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c
index feb3370f372..07e62230874 100644
--- a/src/mesa/drivers/dri/r300/r300_emit.c
+++ b/src/mesa/drivers/dri/r300/r300_emit.c
@@ -124,41 +124,6 @@ GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes)
return ret;
}
-GLboolean r300EmitArrays(GLcontext * ctx)
-{
- r300ContextPtr r300 = R300_CONTEXT(ctx);
- struct r300_vertex_buffer *vbuf = &r300->vbuf;
- GLuint InputsRead, OutputsWritten;
-
- r300ChooseSwtclVertexFormat(ctx, &InputsRead, &OutputsWritten);
-
- r300SwitchFallback(ctx, R300_FALLBACK_AOS_LIMIT, vbuf->num_attribs > R300_MAX_AOS_ARRAYS);
- if (r300->fallback & R300_RASTER_FALLBACK_MASK)
- return GL_FALSE;
-
- {
- struct vertex_buffer *mesa_vb = &TNL_CONTEXT(ctx)->vb;
- GLuint attr, i;
-
- for (i = 0; i < vbuf->num_attribs; i++) {
- attr = vbuf->attribs[i].element;
- rcommon_emit_vector(ctx, &r300->radeon.tcl.aos[i], mesa_vb->AttribPtr[attr]->data,
- mesa_vb->AttribPtr[attr]->size, mesa_vb->AttribPtr[attr]->stride, mesa_vb->Count);
- }
-
- r300->radeon.tcl.aos_count = vbuf->num_attribs;
-
- /* Fill index buffer info */
- r300->ind_buf.ptr = mesa_vb->Elts;
- r300->ind_buf.is_32bit = GL_TRUE;
- r300->ind_buf.free_needed = GL_FALSE;
- }
-
- r300SetupVAP(ctx, InputsRead, OutputsWritten);
-
- return GL_TRUE;
-}
-
void r300EmitCacheFlush(r300ContextPtr rmesa)
{
BATCH_LOCALS(&rmesa->radeon);
diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h
index 3f8c60ffae1..8e57e354d1d 100644
--- a/src/mesa/drivers/dri/r300/r300_emit.h
+++ b/src/mesa/drivers/dri/r300/r300_emit.h
@@ -104,7 +104,7 @@ static INLINE uint32_t cmdpacket3(struct radeon_screen *rscrn, int packet)
return cmd.u;
}
-static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn,
+static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn,
unsigned short count)
{
drm_r300_cmd_header_t cmd;
@@ -216,8 +216,6 @@ void static INLINE cp_wait(radeonContextPtr radeon, unsigned char flags)
}
}
-extern GLboolean r300EmitArrays(GLcontext * ctx);
-
extern int r300PrimitiveType(r300ContextPtr rmesa, int prim);
extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim);
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c
deleted file mode 100644
index 55c1cfe6317..00000000000
--- a/src/mesa/drivers/dri/r300/r300_fragprog.c
+++ /dev/null
@@ -1,451 +0,0 @@
-/*
- * Copyright (C) 2005 Ben Skeggs.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "r300_fragprog.h"
-
-#include "shader/prog_parameter.h"
-
-#include "r300_context.h"
-#include "r300_fragprog_swizzle.h"
-
-static void reset_srcreg(struct prog_src_register* reg)
-{
- _mesa_bzero(reg, sizeof(*reg));
- reg->Swizzle = SWIZZLE_NOOP;
-}
-
-static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
-{
- gl_state_index fail_value_tokens[STATE_LENGTH] = {
- STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
- };
- struct prog_src_register reg = { 0, };
-
- fail_value_tokens[2] = tmu;
- reg.File = PROGRAM_STATE_VAR;
- reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
- reg.Swizzle = SWIZZLE_WWWW;
- return reg;
-}
-
-/**
- * Transform TEX, TXP, TXB, and KIL instructions in the following way:
- * - premultiply texture coordinates for RECT
- * - extract operand swizzles
- * - introduce a temporary register when write masks are needed
- *
- * \todo If/when r5xx uses the radeon_program architecture, this can probably
- * be reused.
- */
-GLboolean r300_transform_TEX(
- struct radeon_transform_context *t,
- struct prog_instruction* orig_inst, void* data)
-{
- struct r300_fragment_program_compiler *compiler =
- (struct r300_fragment_program_compiler*)data;
- struct prog_instruction inst = *orig_inst;
- struct prog_instruction* tgt;
- GLboolean destredirect = GL_FALSE;
-
- if (inst.Opcode != OPCODE_TEX &&
- inst.Opcode != OPCODE_TXB &&
- inst.Opcode != OPCODE_TXP &&
- inst.Opcode != OPCODE_KIL)
- return GL_FALSE;
-
- if (inst.Opcode != OPCODE_KIL &&
- t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
-
- if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
- tgt = radeonAppendInstructions(t->Program, 1);
-
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg = inst.DstReg;
- if (comparefunc == GL_ALWAYS) {
- tgt->SrcReg[0].File = PROGRAM_BUILTIN;
- tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
- } else {
- tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
- }
- return GL_TRUE;
- }
-
- inst.DstReg.File = PROGRAM_TEMPORARY;
- inst.DstReg.Index = radeonFindFreeTemporary(t);
- inst.DstReg.WriteMask = WRITEMASK_XYZW;
- }
-
-
- /* Hardware uses [0..1]x[0..1] range for rectangle textures
- * instead of [0..Width]x[0..Height].
- * Add a scaling instruction.
- */
- if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) {
- gl_state_index tokens[STATE_LENGTH] = {
- STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
- 0
- };
-
- int tempreg = radeonFindFreeTemporary(t);
- int factor_index;
-
- tokens[2] = inst.TexSrcUnit;
- factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens);
-
- tgt = radeonAppendInstructions(t->Program, 1);
-
- tgt->Opcode = OPCODE_MUL;
- tgt->DstReg.File = PROGRAM_TEMPORARY;
- tgt->DstReg.Index = tempreg;
- tgt->SrcReg[0] = inst.SrcReg[0];
- tgt->SrcReg[1].File = PROGRAM_STATE_VAR;
- tgt->SrcReg[1].Index = factor_index;
-
- reset_srcreg(&inst.SrcReg[0]);
- inst.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst.SrcReg[0].Index = tempreg;
- }
-
- if (inst.Opcode != OPCODE_KIL) {
- if (inst.DstReg.File != PROGRAM_TEMPORARY ||
- inst.DstReg.WriteMask != WRITEMASK_XYZW) {
- int tempreg = radeonFindFreeTemporary(t);
-
- inst.DstReg.File = PROGRAM_TEMPORARY;
- inst.DstReg.Index = tempreg;
- inst.DstReg.WriteMask = WRITEMASK_XYZW;
- destredirect = GL_TRUE;
- } else if (inst.SaturateMode) {
- destredirect = GL_TRUE;
- }
- }
-
- if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
- int tmpreg = radeonFindFreeTemporary(t);
- tgt = radeonAppendInstructions(t->Program, 1);
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg.File = PROGRAM_TEMPORARY;
- tgt->DstReg.Index = tmpreg;
- tgt->SrcReg[0] = inst.SrcReg[0];
-
- reset_srcreg(&inst.SrcReg[0]);
- inst.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst.SrcReg[0].Index = tmpreg;
- }
-
- tgt = radeonAppendInstructions(t->Program, 1);
- _mesa_copy_instructions(tgt, &inst, 1);
-
- if (inst.Opcode != OPCODE_KIL &&
- t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
- GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode;
- int rcptemp = radeonFindFreeTemporary(t);
- int pass, fail;
-
- tgt = radeonAppendInstructions(t->Program, 3);
-
- tgt[0].Opcode = OPCODE_RCP;
- tgt[0].DstReg.File = PROGRAM_TEMPORARY;
- tgt[0].DstReg.Index = rcptemp;
- tgt[0].DstReg.WriteMask = WRITEMASK_W;
- tgt[0].SrcReg[0] = inst.SrcReg[0];
- tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
-
- tgt[1].Opcode = OPCODE_MAD;
- tgt[1].DstReg = inst.DstReg;
- tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
- tgt[1].SrcReg[0] = inst.SrcReg[0];
- tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
- tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
- tgt[1].SrcReg[1].Index = rcptemp;
- tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
- tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
- tgt[1].SrcReg[2].Index = inst.DstReg.Index;
- if (depthmode == 0) /* GL_LUMINANCE */
- tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
- else if (depthmode == 2) /* GL_ALPHA */
- tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
-
- /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
- * r < tex <=> -tex+r < 0
- * r >= tex <=> not (-tex+r < 0 */
- if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
- tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW;
- else
- tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW;
-
- tgt[2].Opcode = OPCODE_CMP;
- tgt[2].DstReg = orig_inst->DstReg;
- tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
- tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
-
- if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
- pass = 1;
- fail = 2;
- } else {
- pass = 2;
- fail = 1;
- }
-
- tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
- tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
- tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
- } else if (destredirect) {
- tgt = radeonAppendInstructions(t->Program, 1);
-
- tgt->Opcode = OPCODE_MOV;
- tgt->DstReg = orig_inst->DstReg;
- tgt->SaturateMode = inst.SaturateMode;
- tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
- tgt->SrcReg[0].Index = inst.DstReg.Index;
- }
-
- return GL_TRUE;
-}
-
-/* just some random things... */
-void r300FragmentProgramDump(union rX00_fragment_program_code *c)
-{
- struct r300_fragment_program_code *code = &c->r300;
- int n, i, j;
- static int pc = 0;
-
- fprintf(stderr, "pc=%d*************************************\n", pc++);
-
- fprintf(stderr, "Hardware program\n");
- fprintf(stderr, "----------------\n");
-
- for (n = 0; n < (code->cur_node + 1); n++) {
- fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
- "alu_end: %d, tex_end: %d, flags: %08x\n", n,
- code->node[n].alu_offset,
- code->node[n].tex_offset,
- code->node[n].alu_end, code->node[n].tex_end,
- code->node[n].flags);
-
- if (n > 0 || code->first_node_has_tex) {
- fprintf(stderr, " TEX:\n");
- for (i = code->node[n].tex_offset;
- i <= code->node[n].tex_offset + code->node[n].tex_end;
- ++i) {
- const char *instr;
-
- switch ((code->tex.
- inst[i] >> R300_TEX_INST_SHIFT) &
- 15) {
- case R300_TEX_OP_LD:
- instr = "TEX";
- break;
- case R300_TEX_OP_KIL:
- instr = "KIL";
- break;
- case R300_TEX_OP_TXP:
- instr = "TXP";
- break;
- case R300_TEX_OP_TXB:
- instr = "TXB";
- break;
- default:
- instr = "UNKNOWN";
- }
-
- fprintf(stderr,
- " %s t%i, %c%i, texture[%i] (%08x)\n",
- instr,
- (code->tex.
- inst[i] >> R300_DST_ADDR_SHIFT) & 31,
- 't',
- (code->tex.
- inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
- (code->tex.
- inst[i] & R300_TEX_ID_MASK) >>
- R300_TEX_ID_SHIFT,
- code->tex.inst[i]);
- }
- }
-
- for (i = code->node[n].alu_offset;
- i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) {
- char srcc[3][10], dstc[20];
- char srca[3][10], dsta[20];
- char argc[3][20];
- char arga[3][20];
- char flags[5], tmp[10];
-
- for (j = 0; j < 3; ++j) {
- int regc = code->alu.inst[i].inst1 >> (j * 6);
- int rega = code->alu.inst[i].inst3 >> (j * 6);
-
- sprintf(srcc[j], "%c%i",
- (regc & 32) ? 'c' : 't', regc & 31);
- sprintf(srca[j], "%c%i",
- (rega & 32) ? 'c' : 't', rega & 31);
- }
-
- dstc[0] = 0;
- sprintf(flags, "%s%s%s",
- (code->alu.inst[i].
- inst1 & R300_ALU_DSTC_REG_X) ? "x" : "",
- (code->alu.inst[i].
- inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "",
- (code->alu.inst[i].
- inst1 & R300_ALU_DSTC_REG_Z) ? "z" : "");
- if (flags[0] != 0) {
- sprintf(dstc, "t%i.%s ",
- (code->alu.inst[i].
- inst1 >> R300_ALU_DSTC_SHIFT) & 31,
- flags);
- }
- sprintf(flags, "%s%s%s",
- (code->alu.inst[i].
- inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
- (code->alu.inst[i].
- inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
- (code->alu.inst[i].
- inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
- if (flags[0] != 0) {
- sprintf(tmp, "o%i.%s",
- (code->alu.inst[i].
- inst1 >> R300_ALU_DSTC_SHIFT) & 31,
- flags);
- strcat(dstc, tmp);
- }
-
- dsta[0] = 0;
- if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) {
- sprintf(dsta, "t%i.w ",
- (code->alu.inst[i].
- inst3 >> R300_ALU_DSTA_SHIFT) & 31);
- }
- if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) {
- sprintf(tmp, "o%i.w ",
- (code->alu.inst[i].
- inst3 >> R300_ALU_DSTA_SHIFT) & 31);
- strcat(dsta, tmp);
- }
- if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) {
- strcat(dsta, "Z");
- }
-
- fprintf(stderr,
- "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
- " w: %3s %3s %3s -> %-20s (%08x)\n", i,
- srcc[0], srcc[1], srcc[2], dstc,
- code->alu.inst[i].inst1, srca[0], srca[1],
- srca[2], dsta, code->alu.inst[i].inst3);
-
- for (j = 0; j < 3; ++j) {
- int regc = code->alu.inst[i].inst0 >> (j * 7);
- int rega = code->alu.inst[i].inst2 >> (j * 7);
- int d;
- char buf[20];
-
- d = regc & 31;
- if (d < 12) {
- switch (d % 4) {
- case R300_ALU_ARGC_SRC0C_XYZ:
- sprintf(buf, "%s.xyz",
- srcc[d / 4]);
- break;
- case R300_ALU_ARGC_SRC0C_XXX:
- sprintf(buf, "%s.xxx",
- srcc[d / 4]);
- break;
- case R300_ALU_ARGC_SRC0C_YYY:
- sprintf(buf, "%s.yyy",
- srcc[d / 4]);
- break;
- case R300_ALU_ARGC_SRC0C_ZZZ:
- sprintf(buf, "%s.zzz",
- srcc[d / 4]);
- break;
- }
- } else if (d < 15) {
- sprintf(buf, "%s.www", srca[d - 12]);
- } else if (d == 20) {
- sprintf(buf, "0.0");
- } else if (d == 21) {
- sprintf(buf, "1.0");
- } else if (d == 22) {
- sprintf(buf, "0.5");
- } else if (d >= 23 && d < 32) {
- d -= 23;
- switch (d / 3) {
- case 0:
- sprintf(buf, "%s.yzx",
- srcc[d % 3]);
- break;
- case 1:
- sprintf(buf, "%s.zxy",
- srcc[d % 3]);
- break;
- case 2:
- sprintf(buf, "%s.Wzy",
- srcc[d % 3]);
- break;
- }
- } else {
- sprintf(buf, "%i", d);
- }
-
- sprintf(argc[j], "%s%s%s%s",
- (regc & 32) ? "-" : "",
- (regc & 64) ? "|" : "",
- buf, (regc & 64) ? "|" : "");
-
- d = rega & 31;
- if (d < 9) {
- sprintf(buf, "%s.%c", srcc[d / 3],
- 'x' + (char)(d % 3));
- } else if (d < 12) {
- sprintf(buf, "%s.w", srca[d - 9]);
- } else if (d == 16) {
- sprintf(buf, "0.0");
- } else if (d == 17) {
- sprintf(buf, "1.0");
- } else if (d == 18) {
- sprintf(buf, "0.5");
- } else {
- sprintf(buf, "%i", d);
- }
-
- sprintf(arga[j], "%s%s%s%s",
- (rega & 32) ? "-" : "",
- (rega & 64) ? "|" : "",
- buf, (rega & 64) ? "|" : "");
- }
-
- fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n"
- " w: %8s %8s %8s op: %08x\n",
- argc[0], argc[1], argc[2],
- code->alu.inst[i].inst0, arga[0], arga[1],
- arga[2], code->alu.inst[i].inst2);
- }
- }
-}
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h
deleted file mode 100644
index 5ce6f33cee7..00000000000
--- a/src/mesa/drivers/dri/r300/r300_fragprog.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (C) 2005 Ben Skeggs.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-/*
- * Authors:
- * Ben Skeggs <darktama@iinet.net.au>
- * Jerome Glisse <j.glisse@gmail.com>
- */
-#ifndef __R300_FRAGPROG_H_
-#define __R300_FRAGPROG_H_
-
-#include "shader/program.h"
-#include "shader/prog_instruction.h"
-
-#include "r300_context.h"
-#include "radeon_program.h"
-
-#define DRI_CONF_FP_OPTIMIZATION_SPEED 0
-#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1
-
-#if 1
-
-/**
- * Fragment program helper macros
- */
-
-/* Produce unshifted source selectors */
-#define FP_TMP(idx) (idx)
-#define FP_CONST(idx) ((idx) | (1 << 5))
-
-/* Produce source/dest selector dword */
-#define FP_SELC_MASK_NO 0
-#define FP_SELC_MASK_X 1
-#define FP_SELC_MASK_Y 2
-#define FP_SELC_MASK_XY 3
-#define FP_SELC_MASK_Z 4
-#define FP_SELC_MASK_XZ 5
-#define FP_SELC_MASK_YZ 6
-#define FP_SELC_MASK_XYZ 7
-
-#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \
- (((destidx) << R300_ALU_DSTC_SHIFT) | \
- (FP_SELC_MASK_##regmask << 23) | \
- (FP_SELC_MASK_##outmask << 26) | \
- ((src0) << R300_ALU_SRC0C_SHIFT) | \
- ((src1) << R300_ALU_SRC1C_SHIFT) | \
- ((src2) << R300_ALU_SRC2C_SHIFT))
-
-#define FP_SELA_MASK_NO 0
-#define FP_SELA_MASK_W 1
-
-#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \
- (((destidx) << R300_ALU_DSTA_SHIFT) | \
- (FP_SELA_MASK_##regmask << 23) | \
- (FP_SELA_MASK_##outmask << 24) | \
- ((src0) << R300_ALU_SRC0A_SHIFT) | \
- ((src1) << R300_ALU_SRC1A_SHIFT) | \
- ((src2) << R300_ALU_SRC2A_SHIFT))
-
-/* Produce unshifted argument selectors */
-#define FP_ARGC(source) R300_ALU_ARGC_##source
-#define FP_ARGA(source) R300_ALU_ARGA_##source
-#define FP_ABS(arg) ((arg) | (1 << 6))
-#define FP_NEG(arg) ((arg) ^ (1 << 5))
-
-/* Produce instruction dword */
-#define FP_INSTRC(opcode,arg0,arg1,arg2) \
- (R300_ALU_OUTC_##opcode | \
- ((arg0) << R300_ALU_ARG0C_SHIFT) | \
- ((arg1) << R300_ALU_ARG1C_SHIFT) | \
- ((arg2) << R300_ALU_ARG2C_SHIFT))
-
-#define FP_INSTRA(opcode,arg0,arg1,arg2) \
- (R300_ALU_OUTA_##opcode | \
- ((arg0) << R300_ALU_ARG0A_SHIFT) | \
- ((arg1) << R300_ALU_ARG1A_SHIFT) | \
- ((arg2) << R300_ALU_ARG2A_SHIFT))
-
-#endif
-
-extern GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
-
-extern void r300FragmentProgramDump(union rX00_fragment_program_code *c);
-
-extern GLboolean r300_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data);
-
-#endif
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index f5c4c0f4a0e..6674efc5bca 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -42,21 +42,51 @@
#include "shader/prog_parameter.h"
#include "shader/prog_print.h"
+#include "compiler/radeon_compiler.h"
+
#include "r300_state.h"
-#include "r300_fragprog.h"
-#include "r300_fragprog_swizzle.h"
-#include "r500_fragprog.h"
-#include "radeon_program.h"
-#include "radeon_program_alu.h"
-static void nqssadce_init(struct nqssadce_state* s)
+static GLuint build_dtm(GLuint depthmode)
+{
+ switch(depthmode) {
+ default:
+ case GL_LUMINANCE: return 0;
+ case GL_INTENSITY: return 1;
+ case GL_ALPHA: return 2;
+ }
+}
+
+static GLuint build_func(GLuint comparefunc)
{
- s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW;
- s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W;
+ return comparefunc - GL_NEVER;
}
/**
+ * Collect all external state that is relevant for compiling the given
+ * fragment program.
+ */
+static void build_state(
+ r300ContextPtr r300,
+ struct gl_fragment_program *fp,
+ struct r300_fragment_program_external_state *state)
+{
+ int unit;
+
+ _mesa_bzero(state, sizeof(*state));
+
+ for(unit = 0; unit < 16; ++unit) {
+ if (fp->Base.ShadowSamplers & (1 << unit)) {
+ struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
+
+ state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode);
+ state->unit[unit].texture_compare_func = build_func(tex->CompareFunc);
+ }
+ }
+}
+
+
+/**
* Transform the program to support fragment.position.
*
* Introduce a small fragment at the start of the program that will be
@@ -65,104 +95,26 @@ static void nqssadce_init(struct nqssadce_state* s)
* to read from a newly allocated temporary.
*
*/
-static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
+static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp)
{
- GLuint InputsRead = compiler->fp->Base->InputsRead;
+ int i;
- if (!(InputsRead & FRAG_BIT_WPOS)) {
- compiler->fp->wpos_attr = FRAG_ATTRIB_MAX;
+ if (!(compiler->Base.Program.InputsRead & FRAG_BIT_WPOS)) {
+ fp->wpos_attr = FRAG_ATTRIB_MAX;
return;
}
- static gl_state_index tokens[STATE_LENGTH] = {
- STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
- };
- struct prog_instruction *fpi;
- GLuint window_index;
- int i = 0;
-
for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
{
- if (!(InputsRead & (1 << i))) {
- InputsRead &= ~(1 << FRAG_ATTRIB_WPOS);
- InputsRead |= 1 << i;
- compiler->fp->Base->InputsRead = InputsRead;
- compiler->fp->wpos_attr = i;
+ if (!(compiler->Base.Program.InputsRead & (1 << i))) {
+ fp->wpos_attr = i;
break;
}
}
- GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY);
-
- _mesa_insert_instructions(compiler->program, 0, 3);
- fpi = compiler->program->Instructions;
- i = 0;
-
- /* perspective divide */
- fpi[i].Opcode = OPCODE_RCP;
-
- fpi[i].DstReg.File = PROGRAM_TEMPORARY;
- fpi[i].DstReg.Index = tempregi;
- fpi[i].DstReg.WriteMask = WRITEMASK_W;
- fpi[i].DstReg.CondMask = COND_TR;
-
- fpi[i].SrcReg[0].File = PROGRAM_INPUT;
- fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr;
- fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
- i++;
-
- fpi[i].Opcode = OPCODE_MUL;
-
- fpi[i].DstReg.File = PROGRAM_TEMPORARY;
- fpi[i].DstReg.Index = tempregi;
- fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
- fpi[i].DstReg.CondMask = COND_TR;
-
- fpi[i].SrcReg[0].File = PROGRAM_INPUT;
- fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr;
- fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
- fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
- fpi[i].SrcReg[1].Index = tempregi;
- fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW;
- i++;
-
- /* viewport transformation */
- window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);
-
- fpi[i].Opcode = OPCODE_MAD;
-
- fpi[i].DstReg.File = PROGRAM_TEMPORARY;
- fpi[i].DstReg.Index = tempregi;
- fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
- fpi[i].DstReg.CondMask = COND_TR;
-
- fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY;
- fpi[i].SrcReg[0].Index = tempregi;
- fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-
- fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR;
- fpi[i].SrcReg[1].Index = window_index;
- fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
-
- fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR;
- fpi[i].SrcReg[2].Index = window_index;
- fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
- i++;
-
- for (; i < compiler->program->NumInstructions; ++i) {
- int reg;
- for (reg = 0; reg < 3; reg++) {
- if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT &&
- fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) {
- fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY;
- fpi[i].SrcReg[reg].Index = tempregi;
- }
- }
- }
+ rc_transform_fragment_wpos(&compiler->Base, FRAG_ATTRIB_WPOS, fp->wpos_attr);
}
-
/**
* Rewrite fragment.fogcoord to use a texture coordinate slot.
* Note that fogcoord is forced into an X001 pattern, and this enforcement
@@ -170,205 +122,117 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
*
* See also the counterpart rewriting for vertex programs.
*/
-static void rewriteFog(struct r300_fragment_program_compiler *compiler)
+static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp)
{
- struct r300_fragment_program *fp = compiler->fp;
- GLuint InputsRead;
+ struct prog_src_register src;
int i;
- InputsRead = fp->Base->InputsRead;
-
- if (!(InputsRead & FRAG_BIT_FOGC)) {
+ if (!(compiler->Base.Program.InputsRead & FRAG_BIT_FOGC)) {
fp->fog_attr = FRAG_ATTRIB_MAX;
return;
}
for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
{
- if (!(InputsRead & (1 << i))) {
- InputsRead &= ~(1 << FRAG_ATTRIB_FOGC);
- InputsRead |= 1 << i;
- fp->Base->InputsRead = InputsRead;
+ if (!(compiler->Base.Program.InputsRead & (1 << i))) {
fp->fog_attr = i;
break;
}
}
- {
- struct prog_instruction *inst;
-
- inst = compiler->program->Instructions;
- while (inst->Opcode != OPCODE_END) {
- const int src_regs = _mesa_num_inst_src_regs(inst->Opcode);
- for (i = 0; i < src_regs; ++i) {
- if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == FRAG_ATTRIB_FOGC) {
- inst->SrcReg[i].Index = fp->fog_attr;
- inst->SrcReg[i].Swizzle = combine_swizzles(
- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE),
- inst->SrcReg[i].Swizzle);
- }
- }
- ++inst;
- }
- }
-}
-
-static GLuint build_dtm(GLuint depthmode)
-{
- switch(depthmode) {
- default:
- case GL_LUMINANCE: return 0;
- case GL_INTENSITY: return 1;
- case GL_ALPHA: return 2;
- }
+ memset(&src, 0, sizeof(src));
+ src.File = PROGRAM_INPUT;
+ src.Index = fp->fog_attr;
+ src.Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
+ rc_move_input(&compiler->Base, FRAG_ATTRIB_FOGC, src);
}
-static GLuint build_func(GLuint comparefunc)
-{
- return comparefunc - GL_NEVER;
-}
/**
- * Collect all external state that is relevant for compiling the given
- * fragment program.
+ * Reserve hardware temporary registers for the program inputs.
+ *
+ * @note This allocation is performed explicitly, because the order of inputs
+ * is determined by the RS hardware.
*/
-static void build_state(
- r300ContextPtr r300,
- struct gl_fragment_program *fp,
- struct r300_fragment_program_external_state *state)
+static void allocate_hw_inputs(
+ struct r300_fragment_program_compiler * c,
+ void (*allocate)(void * data, unsigned input, unsigned hwreg),
+ void * mydata)
{
- int unit;
-
- _mesa_bzero(state, sizeof(*state));
-
- for(unit = 0; unit < 16; ++unit) {
- if (fp->Base.ShadowSamplers & (1 << unit)) {
- struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
-
- state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode);
- state->unit[unit].texture_compare_func = build_func(tex->CompareFunc);
- }
+ GLuint InputsRead = c->Base.Program.InputsRead;
+ int i;
+ GLuint hwindex = 0;
+
+ /* Primary colour */
+ if (InputsRead & FRAG_BIT_COL0)
+ allocate(mydata, FRAG_ATTRIB_COL0, hwindex++);
+ InputsRead &= ~FRAG_BIT_COL0;
+
+ /* Secondary color */
+ if (InputsRead & FRAG_BIT_COL1)
+ allocate(mydata, FRAG_ATTRIB_COL1, hwindex++);
+ InputsRead &= ~FRAG_BIT_COL1;
+
+ /* Texcoords */
+ for (i = 0; i < 8; i++) {
+ if (InputsRead & (FRAG_BIT_TEX0 << i))
+ allocate(mydata, FRAG_ATTRIB_TEX0+i, hwindex++);
}
-}
+ InputsRead &= ~FRAG_BITS_TEX_ANY;
-static void rewrite_depth_out(struct gl_program *prog)
-{
- struct prog_instruction *inst;
+ /* Fogcoords treated as a texcoord */
+ if (InputsRead & FRAG_BIT_FOGC)
+ allocate(mydata, FRAG_ATTRIB_FOGC, hwindex++);
+ InputsRead &= ~FRAG_BIT_FOGC;
- for (inst = prog->Instructions; inst->Opcode != OPCODE_END; ++inst) {
- if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != FRAG_RESULT_DEPTH)
- continue;
+ /* fragment position treated as a texcoord */
+ if (InputsRead & FRAG_BIT_WPOS)
+ allocate(mydata, FRAG_ATTRIB_WPOS, hwindex++);
+ InputsRead &= ~FRAG_BIT_WPOS;
- if (inst->DstReg.WriteMask & WRITEMASK_Z) {
- inst->DstReg.WriteMask = WRITEMASK_W;
- } else {
- inst->DstReg.WriteMask = 0;
- continue;
- }
-
- switch (inst->Opcode) {
- case OPCODE_FRC:
- case OPCODE_MOV:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
- break;
- case OPCODE_ADD:
- case OPCODE_MAX:
- case OPCODE_MIN:
- case OPCODE_MUL:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
- break;
- case OPCODE_CMP:
- case OPCODE_MAD:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
- inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
- break;
- default:
- // Scalar instructions needn't be reswizzled
- break;
- }
- }
+ /* Anything else */
+ if (InputsRead)
+ rc_error(&c->Base, "Don't know how to handle inputs 0x%x\n", InputsRead);
}
-void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp)
+
+static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_program_cont *cont, struct r300_fragment_program *fp)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
struct r300_fragment_program_compiler compiler;
- compiler.r300 = r300;
- compiler.fp = fp;
+ rc_init(&compiler.Base);
+ compiler.Base.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE;
+
compiler.code = &fp->code;
- compiler.program = fp->Base;
+ compiler.state = fp->state;
+ compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE;
+ compiler.OutputDepth = FRAG_RESULT_DEPTH;
+ compiler.OutputColor = FRAG_RESULT_COLOR;
+ compiler.AllocateHwInputs = &allocate_hw_inputs;
- if (RADEON_DEBUG & DEBUG_PIXEL) {
+ if (compiler.Base.Debug) {
fflush(stdout);
_mesa_printf("Fragment Program: Initial program:\n");
- _mesa_print_program(compiler.program);
+ _mesa_print_program(&cont->Base.Base);
fflush(stdout);
}
- insert_WPOS_trailer(&compiler);
-
- rewriteFog(&compiler);
-
- rewrite_depth_out(compiler.program);
-
- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
- struct radeon_program_transformation transformations[] = {
- { &r500_transform_TEX, &compiler },
- { &radeonTransformALU, 0 },
- { &radeonTransformDeriv, 0 },
- { &radeonTransformTrigScale, 0 }
- };
- radeonLocalTransform(ctx, compiler.program, 4, transformations);
- } else {
- struct radeon_program_transformation transformations[] = {
- { &r300_transform_TEX, &compiler },
- { &radeonTransformALU, 0 },
- { &radeonTransformTrigSimple, 0 }
- };
- radeonLocalTransform(ctx, compiler.program, 3, transformations);
- }
+ rc_mesa_to_rc_program(&compiler.Base, &cont->Base.Base);
- if (RADEON_DEBUG & DEBUG_PIXEL) {
- _mesa_printf("Fragment Program: After native rewrite:\n");
- _mesa_print_program(compiler.program);
- fflush(stdout);
- }
-
- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadce_init,
- .IsNativeSwizzle = &r500FPIsNativeSwizzle,
- .BuildSwizzle = &r500FPBuildSwizzle
- };
- radeonNqssaDce(ctx, compiler.program, &nqssadce);
- } else {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadce_init,
- .IsNativeSwizzle = &r300FPIsNativeSwizzle,
- .BuildSwizzle = &r300FPBuildSwizzle
- };
- radeonNqssaDce(ctx, compiler.program, &nqssadce);
- }
+ insert_WPOS_trailer(&compiler, fp);
- if (RADEON_DEBUG & DEBUG_PIXEL) {
- _mesa_printf("Compiler: after NqSSA-DCE:\n");
- _mesa_print_program(compiler.program);
- fflush(stdout);
- }
+ rewriteFog(&compiler, fp);
- if (!r300->vtbl.BuildFragmentProgramHwCode(&compiler))
- fp->error = GL_TRUE;
+ r3xx_compile_fragment_program(&compiler);
+ fp->error = compiler.Base.Error;
- fp->translated = GL_TRUE;
+ fp->InputsRead = compiler.Base.Program.InputsRead;
- if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL))
- r300->vtbl.FragmentProgramDump(&fp->code);
+ rc_destroy(&compiler.Base);
}
-struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx)
+struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
struct r300_fragment_program_cont *fp_list;
@@ -389,11 +253,11 @@ struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx)
fp = _mesa_calloc(sizeof(struct r300_fragment_program));
fp->state = state;
- fp->translated = GL_FALSE;
- fp->Base = _mesa_clone_program(ctx, &ctx->FragmentProgram._Current->Base);
fp->next = fp_list->progs;
fp_list->progs = fp;
+ translate_fragment_program(ctx, fp_list, fp);
+
return r300->selected_fp = fp;
}
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.h b/src/mesa/drivers/dri/r300/r300_fragprog_common.h
index 5e103ee4086..3d64c08cee9 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.h
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.h
@@ -32,8 +32,6 @@
#include "r300_context.h"
-extern void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp);
-
-struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx);
+struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ctx);
#endif
diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c
index ddabd539925..da801f42e4c 100644
--- a/src/mesa/drivers/dri/r300/r300_ioctl.c
+++ b/src/mesa/drivers/dri/r300/r300_ioctl.c
@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/imports.h"
#include "main/macros.h"
#include "main/context.h"
+#include "main/simple_list.h"
#include "swrast/swrast.h"
#include "radeon_common.h"
@@ -55,8 +56,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_vertprog.h"
#include "radeon_reg.h"
#include "r300_emit.h"
-#include "r300_fragprog.h"
#include "r300_context.h"
+#include "r300_queryobj.h"
#include "vblank.h"
@@ -66,6 +67,66 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define CLEARBUFFER_DEPTH 0x2
#define CLEARBUFFER_STENCIL 0x4
+#if 1
+
+/**
+ * Fragment program helper macros
+ */
+
+/* Produce unshifted source selectors */
+#define FP_TMP(idx) (idx)
+#define FP_CONST(idx) ((idx) | (1 << 5))
+
+/* Produce source/dest selector dword */
+#define FP_SELC_MASK_NO 0
+#define FP_SELC_MASK_X 1
+#define FP_SELC_MASK_Y 2
+#define FP_SELC_MASK_XY 3
+#define FP_SELC_MASK_Z 4
+#define FP_SELC_MASK_XZ 5
+#define FP_SELC_MASK_YZ 6
+#define FP_SELC_MASK_XYZ 7
+
+#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \
+ (((destidx) << R300_ALU_DSTC_SHIFT) | \
+ (FP_SELC_MASK_##regmask << 23) | \
+ (FP_SELC_MASK_##outmask << 26) | \
+ ((src0) << R300_ALU_SRC0C_SHIFT) | \
+ ((src1) << R300_ALU_SRC1C_SHIFT) | \
+ ((src2) << R300_ALU_SRC2C_SHIFT))
+
+#define FP_SELA_MASK_NO 0
+#define FP_SELA_MASK_W 1
+
+#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \
+ (((destidx) << R300_ALU_DSTA_SHIFT) | \
+ (FP_SELA_MASK_##regmask << 23) | \
+ (FP_SELA_MASK_##outmask << 24) | \
+ ((src0) << R300_ALU_SRC0A_SHIFT) | \
+ ((src1) << R300_ALU_SRC1A_SHIFT) | \
+ ((src2) << R300_ALU_SRC2A_SHIFT))
+
+/* Produce unshifted argument selectors */
+#define FP_ARGC(source) R300_ALU_ARGC_##source
+#define FP_ARGA(source) R300_ALU_ARGA_##source
+#define FP_ABS(arg) ((arg) | (1 << 6))
+#define FP_NEG(arg) ((arg) ^ (1 << 5))
+
+/* Produce instruction dword */
+#define FP_INSTRC(opcode,arg0,arg1,arg2) \
+ (R300_ALU_OUTC_##opcode | \
+ ((arg0) << R300_ALU_ARG0C_SHIFT) | \
+ ((arg1) << R300_ALU_ARG1C_SHIFT) | \
+ ((arg2) << R300_ALU_ARG2C_SHIFT))
+
+#define FP_INSTRA(opcode,arg0,arg1,arg2) \
+ (R300_ALU_OUTA_##opcode | \
+ ((arg0) << R300_ALU_ARG0A_SHIFT) | \
+ ((arg1) << R300_ALU_ARG1A_SHIFT) | \
+ ((arg2) << R300_ALU_ARG2A_SHIFT))
+
+#endif
+
static void r300EmitClearState(GLcontext * ctx);
static void r300ClearBuffer(r300ContextPtr r300, int flags,
@@ -109,18 +170,21 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags,
}
#if 1
if (flags & (CLEARBUFFER_DEPTH | CLEARBUFFER_STENCIL)) {
- assert(rrbd != 0);
- cbpitch = (rrbd->pitch / rrbd->cpp);
+ uint32_t zbpitch = (rrbd->pitch / rrbd->cpp);
if (rrbd->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){
- cbpitch |= R300_DEPTHMACROTILE_ENABLE;
+ zbpitch |= R300_DEPTHMACROTILE_ENABLE;
}
if (rrbd->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
- cbpitch |= R300_DEPTHMICROTILE_TILED;
+ zbpitch |= R300_DEPTHMICROTILE_TILED;
}
BEGIN_BATCH_NO_AUTOSTATE(6);
OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
OUT_BATCH_RELOC(0, rrbd->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
- OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, cbpitch);
+ OUT_BATCH_REGSEQ(R300_ZB_DEPTHPITCH, 1);
+ if (!r300->radeon.radeonScreen->kernel_mm)
+ OUT_BATCH(zbpitch);
+ else
+ OUT_BATCH_RELOC(zbpitch, rrbd->bo, zbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
END_BATCH();
}
#endif
@@ -508,12 +572,12 @@ static void r300EmitClearState(GLcontext * ctx)
0, 0xf, PVS_DST_REG_OUT);
vpu.cmd[2] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
- PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
+ PVS_SRC_REG_INPUT, NEGATE_NONE);
vpu.cmd[3] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0,
PVS_SRC_SELECT_FORCE_0,
PVS_SRC_SELECT_FORCE_0,
PVS_SRC_SELECT_FORCE_0,
- PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
+ PVS_SRC_REG_INPUT, NEGATE_NONE);
vpu.cmd[4] = 0x0;
vpu.cmd[5] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf,
@@ -521,13 +585,12 @@ static void r300EmitClearState(GLcontext * ctx)
vpu.cmd[6] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X,
PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z,
PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT,
-
- VSF_FLAG_NONE);
+ NEGATE_NONE);
vpu.cmd[7] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0,
PVS_SRC_SELECT_FORCE_0,
PVS_SRC_SELECT_FORCE_0,
PVS_SRC_SELECT_FORCE_0,
- PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
+ PVS_SRC_REG_INPUT, NEGATE_NONE);
vpu.cmd[8] = 0x0;
r300->vap_flush_needed = GL_TRUE;
@@ -546,7 +609,7 @@ static int r300KernelClear(GLcontext *ctx, GLuint flags)
/* Make sure it fits there. */
radeon_cs_space_reset_bos(r300->radeon.cmdbuf.cs);
-
+
if (flags & BUFFER_BIT_COLOR0) {
rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_COLOR0);
radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs,
@@ -692,10 +755,19 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
}
}
+static void r300Flush(GLcontext *ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ radeonFlush(ctx);
+
+ make_empty_list(&r300->query.not_flushed_head);
+}
+
void r300InitIoctlFuncs(struct dd_function_table *functions)
{
functions->Clear = r300Clear;
functions->Finish = radeonFinish;
- functions->Flush = radeonFlush;
+ functions->Flush = r300Flush;
}
diff --git a/src/mesa/drivers/dri/r300/r300_queryobj.c b/src/mesa/drivers/dri/r300/r300_queryobj.c
new file mode 100644
index 00000000000..df1fb32ee78
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_queryobj.c
@@ -0,0 +1,250 @@
+/*
+ * Copyright © 2008-2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Maciej Cencora <m.cencora@gmail.com>
+ *
+ */
+
+#include "r300_queryobj.h"
+#include "r300_emit.h"
+
+#include "main/imports.h"
+#include "main/simple_list.h"
+
+#define DDEBUG 0
+
+#define PAGE_SIZE 4096
+
+static void r300QueryGetResult(GLcontext *ctx, struct gl_query_object *q)
+{
+ struct r300_query_object *query = (struct r300_query_object *)q;
+ uint32_t *result;
+ int i;
+
+ if (DDEBUG) fprintf(stderr, "%s: query id %d, result %d\n", __FUNCTION__, query->Base.Id, (int) query->Base.Result);
+
+ radeon_bo_map(query->bo, GL_FALSE);
+
+ result = query->bo->ptr;
+
+ query->Base.Result = 0;
+ for (i = 0; i < query->curr_offset/sizeof(uint32_t); ++i) {
+ query->Base.Result += result[i];
+ if (DDEBUG) fprintf(stderr, "result[%d] = %d\n", i, result[i]);
+ }
+
+ radeon_bo_unmap(query->bo);
+}
+
+static struct gl_query_object * r300NewQueryObject(GLcontext *ctx, GLuint id)
+{
+ struct r300_query_object *query;
+
+ query = _mesa_calloc(sizeof(struct r300_query_object));
+
+ query->Base.Id = id;
+ query->Base.Result = 0;
+ query->Base.Active = GL_FALSE;
+ query->Base.Ready = GL_TRUE;
+
+ if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, query->Base.Id);
+
+ return &query->Base;
+}
+
+static void r300DeleteQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+ struct r300_query_object *query = (struct r300_query_object *)q;
+
+ if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, q->Id);
+
+ if (query->bo) {
+ radeon_bo_unref(query->bo);
+ }
+
+ _mesa_free(query);
+}
+
+static void r300BeginQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_query_object *query = (struct r300_query_object *)q;
+
+ if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, q->Id);
+
+ assert(r300->query.current == NULL);
+
+ if (!query->bo) {
+ query->bo = radeon_bo_open(r300->radeon.radeonScreen->bom, 0, PAGE_SIZE, PAGE_SIZE, RADEON_GEM_DOMAIN_GTT, 0);
+ }
+ query->curr_offset = 0;
+
+ r300->query.current = query;
+ insert_at_tail(&r300->query.not_flushed_head, query);
+}
+
+static void r300EndQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, q->Id);
+
+ r300EmitQueryEnd(ctx);
+
+ r300->query.current = NULL;
+}
+
+static void r300WaitQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_query_object *tmp, *query = (struct r300_query_object *)q;
+
+ /* If the cmdbuf with packets for this query hasn't been flushed yet, do it now */
+ {
+ GLboolean found = GL_FALSE;
+ foreach(tmp, &r300->query.not_flushed_head) {
+ if (tmp == query) {
+ found = GL_TRUE;
+ break;
+ }
+ }
+
+ if (found)
+ ctx->Driver.Flush(ctx);
+ }
+
+ if (DDEBUG) fprintf(stderr, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, q->Id, query->bo, query->curr_offset);
+
+ r300QueryGetResult(ctx, q);
+
+ query->Base.Ready = GL_TRUE;
+}
+
+
+/**
+ * TODO:
+ * should check if bo is idle, bo there's no interface to do it
+ * just wait for result now
+ */
+static void r300CheckQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+ if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, q->Id);
+
+ r300WaitQuery(ctx, q);
+}
+
+void r300EmitQueryBegin(GLcontext *ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_query_object *query = r300->query.current;
+ BATCH_LOCALS(&r300->radeon);
+
+ if (!query || query->emitted_begin)
+ return;
+
+ if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, query->Base.Id);
+
+ if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) {
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
+ OUT_BATCH_REGVAL(R300_ZB_ZPASS_DATA, 0);
+ END_BATCH();
+ } else {
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL);
+ OUT_BATCH_REGVAL(R300_ZB_ZPASS_DATA, 0);
+ END_BATCH();
+ }
+
+ query->emitted_begin = GL_TRUE;
+}
+
+void r300EmitQueryEnd(GLcontext *ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_query_object *query = r300->query.current;
+ BATCH_LOCALS(&r300->radeon);
+
+ if (!query || !query->emitted_begin)
+ return;
+
+ if (DDEBUG) fprintf(stderr, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, query->Base.Id, query->bo, query->curr_offset);
+
+ radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs,
+ query->bo,
+ 0, RADEON_GEM_DOMAIN_GTT);
+
+ if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) {
+ BEGIN_BATCH_NO_AUTOSTATE(14);
+ OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1);
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
+ OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
+ END_BATCH();
+ } else {
+ BEGIN_BATCH_NO_AUTOSTATE(3 * 2 *r300->num_z_pipes + 2);
+ switch (r300->num_z_pipes) {
+ case 4:
+ OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3);
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset+3*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
+ case 3:
+ OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_2);
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset+2*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
+ case 2:
+ if (r300->radeon.radeonScreen->chip_family <= CHIP_FAMILY_RV380) {
+ OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3);
+ } else {
+ OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_1);
+ }
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset+1*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
+ case 1:
+ default:
+ OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_0);
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ break;
+ }
+ OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL);
+ END_BATCH();
+ }
+
+ query->curr_offset += r300->num_z_pipes * sizeof(uint32_t);
+ assert(query->curr_offset < PAGE_SIZE);
+ query->emitted_begin = GL_FALSE;
+}
+
+void r300InitQueryObjFunctions(struct dd_function_table *functions)
+{
+ functions->NewQueryObject = r300NewQueryObject;
+ functions->DeleteQuery = r300DeleteQuery;
+ functions->BeginQuery = r300BeginQuery;
+ functions->EndQuery = r300EndQuery;
+ functions->CheckQuery = r300CheckQuery;
+ functions->WaitQuery = r300WaitQuery;
+}
diff --git a/src/mesa/drivers/dri/r300/r300_queryobj.h b/src/mesa/drivers/dri/r300/r300_queryobj.h
new file mode 100644
index 00000000000..f301f0b1133
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_queryobj.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright © 2008 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Maciej Cencora <m.cencora@gmail.com>
+ *
+ */
+
+#include "main/imports.h"
+#include "r300_context.h"
+
+extern void r300EmitQueryBegin(GLcontext *ctx);
+extern void r300EmitQueryEnd(GLcontext *ctx);
+
+extern void r300InitQueryObjFunctions(struct dd_function_table *functions);
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
index 357c600af97..39b4b61a105 100644
--- a/src/mesa/drivers/dri/r300/r300_reg.h
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -1128,6 +1128,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* SU Depth Offset value */
#define R300_SU_DEPTH_OFFSET 0x42c4
+#define R300_SU_REG_DEST 0x42c8
+# define R300_RASTER_PIPE_SELECT_0 (1 << 0)
+# define R300_RASTER_PIPE_SELECT_1 (1 << 1)
+# define R300_RASTER_PIPE_SELECT_2 (1 << 2)
+# define R300_RASTER_PIPE_SELECT_3 (1 << 3)
+# define R300_RASTER_PIPE_SELECT_ALL 0xf
+
/* BEGIN: Rasterization / Interpolators - many guesses */
@@ -2014,6 +2021,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R500_FG_ALPHA_VALUE 0x4be0
# define R500_FG_ALPHA_VALUE_MASK 0x0000ffff
+#define RV530_FG_ZBREG_DEST 0x4be8
+# define RV530_FG_ZBREG_DEST_PIPE_SELECT_0 (1 << 0)
+# define RV530_FG_ZBREG_DEST_PIPE_SELECT_1 (1 << 1)
+# define RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL (3 << 0)
+
/* gap */
/* Fragment program parameters in 7.16 floating point */
@@ -2667,6 +2679,24 @@ enum {
PVS_SRC_ADDR_MODE_1_SHIFT = 32,
};
+
+#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \
+ (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \
+ | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \
+ | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \
+ | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \
+ | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \
+ | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT))
+
+#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \
+ (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \
+ | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \
+ | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \
+ | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \
+ | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \
+ | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \
+ | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT))
+
/*\}*/
/* BEGIN: Packet 3 commands */
diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
index 08d67b73edc..45330cda3c1 100644
--- a/src/mesa/drivers/dri/r300/r300_render.c
+++ b/src/mesa/drivers/dri/r300/r300_render.c
@@ -64,6 +64,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
#include "vbo/vbo.h"
+#include "vbo/vbo_split.h"
#include "tnl/tnl.h"
#include "tnl/t_vp_build.h"
#include "radeon_reg.h"
@@ -75,6 +76,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_tex.h"
#include "r300_emit.h"
#include "r300_fragprog_common.h"
+#include "r300_queryobj.h"
#include "r300_swtcl.h"
/**
@@ -172,64 +174,45 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
return num_verts - verts_off;
}
-static void r300EmitElts(GLcontext * ctx, unsigned long n_elts)
-{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
- void *out;
- GLuint size;
-
- size = ((rmesa->ind_buf.is_32bit ? 4 : 2) * n_elts + 3) & ~3;
-
- radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo,
- &rmesa->radeon.tcl.elt_dma_offset, size, 4);
- radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1);
- out = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset;
- memcpy(out, rmesa->ind_buf.ptr, size);
- radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo);
-}
-
-static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
+static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type, int offset)
{
BATCH_LOCALS(&rmesa->radeon);
+ int size;
+
+ /* offset is in indices */
+ BEGIN_BATCH(10);
+ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
+ if (rmesa->ind_buf.is_32bit) {
+ /* convert to bytes */
+ offset *= 4;
+ size = vertex_count;
+ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
+ (vertex_count << 16) | type |
+ R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
+ } else {
+ /* convert to bytes */
+ offset *= 2;
+ size = (vertex_count + 1) >> 1;
+ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
+ (vertex_count << 16) | type);
+ }
- r300_emit_scissor(rmesa->radeon.glCtx);
- if (vertex_count > 0) {
- int size;
-
- BEGIN_BATCH(10);
- OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
- if (rmesa->ind_buf.is_32bit) {
- size = vertex_count;
- OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
- ((vertex_count + 0) << 16) | type |
- R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
- } else {
- size = (vertex_count + 1) >> 1;
- OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
- ((vertex_count + 0) << 16) | type);
- }
-
- if (!rmesa->radeon.radeonScreen->kernel_mm) {
- OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
- OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
- (R300_VAP_PORT_IDX0 >> 2));
- OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset,
- rmesa->radeon.tcl.elt_dma_bo,
- rmesa->radeon.tcl.elt_dma_offset,
- RADEON_GEM_DOMAIN_GTT, 0, 0);
- OUT_BATCH(size);
- } else {
- OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
- OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
- (R300_VAP_PORT_IDX0 >> 2));
- OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset);
- OUT_BATCH(size);
- radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
- rmesa->radeon.tcl.elt_dma_bo,
- RADEON_GEM_DOMAIN_GTT, 0, 0);
- }
- END_BATCH();
+ if (!rmesa->radeon.radeonScreen->kernel_mm) {
+ OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
+ OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
+ (R300_VAP_PORT_IDX0 >> 2));
+ OUT_BATCH_RELOC(0, rmesa->ind_buf.bo, rmesa->ind_buf.bo_offset + offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+ OUT_BATCH(size);
+ } else {
+ OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
+ OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
+ (R300_VAP_PORT_IDX0 >> 2));
+ OUT_BATCH(rmesa->ind_buf.bo_offset + offset);
+ OUT_BATCH(size);
+ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+ rmesa->ind_buf.bo, RADEON_GEM_DOMAIN_GTT, 0, 0);
}
+ END_BATCH();
}
static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
@@ -340,7 +323,7 @@ static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
{
BATCH_LOCALS(&rmesa->radeon);
- r300_emit_scissor(rmesa->radeon.glCtx);
+ r300_emit_scissor(rmesa->radeon.glCtx);
BEGIN_BATCH(3);
OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
@@ -365,8 +348,16 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
*/
rcommonEnsureCmdBufSpace(&rmesa->radeon, 128, __FUNCTION__);
- if (rmesa->ind_buf.ptr) {
- r300EmitElts(ctx, num_verts);
+ if (rmesa->ind_buf.bo) {
+ GLuint first, incr, offset = 0;
+
+ if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) &&
+ num_verts > 65500) {
+ WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim);
+ return;
+ }
+
+
r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, 0);
if (rmesa->radeon.radeonScreen->kernel_mm) {
BEGIN_BATCH_NO_AUTOSTATE(2);
@@ -374,45 +365,56 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
OUT_BATCH(rmesa->radeon.tcl.aos[0].count);
END_BATCH();
}
- r300FireEB(rmesa, num_verts, type);
- } else {
- r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start);
- r300FireAOS(rmesa, num_verts, type);
- }
- COMMIT_BATCH();
-}
-
-static void r300RunRender(GLcontext * ctx, struct tnl_pipeline_stage *stage)
-{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
- int i;
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *vb = &tnl->vb;
-
- if (RADEON_DEBUG & DEBUG_PRIMS)
- fprintf(stderr, "%s\n", __FUNCTION__);
- r300UpdateShaders(rmesa);
- r300EmitArrays(ctx);
+ r300_emit_scissor(rmesa->radeon.glCtx);
+ while (num_verts > 0) {
+ int nr;
+ int align;
+
+ nr = MIN2(num_verts, 65535);
+ nr -= (nr - first) % incr;
+
+ /* get alignment for IB correct */
+ if (nr != num_verts) {
+ do {
+ align = nr * (rmesa->ind_buf.is_32bit ? 4 : 2);
+ if (align % 4)
+ nr -= incr;
+ } while(align % 4);
+ if (nr <= 0) {
+ WARN_ONCE("did the impossible happen? we never aligned nr to dword\n");
+ return;
+ }
+
+ }
+ r300FireEB(rmesa, nr, type, offset);
- r300UpdateShaderStates(rmesa);
+ num_verts -= nr;
+ offset += nr;
+ }
- r300EmitCacheFlush(rmesa);
- radeonEmitState(&rmesa->radeon);
+ } else {
+ GLuint first, incr, offset = 0;
- for (i = 0; i < vb->PrimitiveCount; i++) {
- GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
- GLuint start = vb->Primitive[i].start;
- GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
- r300RunRenderPrimitive(ctx, start, end, prim);
+ if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) &&
+ num_verts > 65500) {
+ WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim);
+ return;
+ }
+ r300_emit_scissor(rmesa->radeon.glCtx);
+ while (num_verts > 0) {
+ int nr;
+ nr = MIN2(num_verts, 65535);
+ nr -= (nr - first) % incr;
+ r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start + offset);
+ r300FireAOS(rmesa, nr, type);
+ num_verts -= nr;
+ offset += nr;
+ }
}
-
- r300EmitCacheFlush(rmesa);
-
- radeonReleaseArrays(ctx, ~0);
+ COMMIT_BATCH();
}
-
static const char *getFallbackString(uint32_t bit)
{
switch (bit) {
@@ -449,7 +451,7 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode)
r300ContextPtr rmesa = R300_CONTEXT(ctx);
uint32_t old_fallback = rmesa->fallback;
static uint32_t fallback_warn = 0;
-
+
if (mode) {
if ((fallback_warn & bit) == 0) {
if (RADEON_DEBUG & DEBUG_FALLBACKS)
@@ -470,7 +472,7 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode)
/* update only if we change from no raster fallbacks to some raster fallbacks */
if (((old_fallback & R300_RASTER_FALLBACK_MASK) == 0) &&
((bit & R300_RASTER_FALLBACK_MASK) > 0)) {
-
+
radeon_firevertices(&rmesa->radeon);
rmesa->radeon.swtcl.RenderIndex = ~0;
_swsetup_Wakeup( ctx );
@@ -489,7 +491,7 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode)
/* update only if we have disabled all raster fallbacks */
if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) {
_swrast_flush( ctx );
-
+
tnl->Driver.Render.Start = r300RenderStart;
tnl->Driver.Render.Finish = r300RenderFinish;
tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
@@ -497,38 +499,10 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode)
tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
tnl->Driver.Render.CopyPV = _tnl_copy_pv;
tnl->Driver.Render.Interp = _tnl_interp;
-
+
_tnl_invalidate_vertex_state( ctx, ~0 );
_tnl_invalidate_vertices( ctx, ~0 );
}
}
-
-}
-static GLboolean r300RunNonTCLRender(GLcontext * ctx,
- struct tnl_pipeline_stage *stage)
-{
- r300ContextPtr rmesa = R300_CONTEXT(ctx);
-
- if (RADEON_DEBUG & DEBUG_PRIMS)
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- if (rmesa->fallback & R300_RASTER_FALLBACK_MASK)
- return GL_TRUE;
-
- if (rmesa->options.hw_tcl_enabled == GL_FALSE)
- return GL_TRUE;
-
- r300RunRender(ctx, stage);
-
- return GL_FALSE;
}
-
-const struct tnl_pipeline_stage _r300_render_stage = {
- "r300 Hardware Rasterization",
- NULL,
- NULL,
- NULL,
- NULL,
- r300RunNonTCLRender
-};
diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c
index 62228a3786e..a4f9db13ecf 100644
--- a/src/mesa/drivers/dri/r300/r300_shader.c
+++ b/src/mesa/drivers/dri/r300/r300_shader.c
@@ -38,7 +38,7 @@ static void freeFragProgCache(GLcontext *ctx, struct r300_fragment_program_cont
while (fp) {
tmp = fp->next;
- _mesa_reference_program(ctx, &fp->Base, NULL);
+ rc_constants_destroy(&fp->code.constants);
_mesa_free(fp);
fp = tmp;
}
@@ -50,6 +50,7 @@ static void freeVertProgCache(GLcontext *ctx, struct r300_vertex_program_cont *c
while (vp) {
tmp = vp->next;
+ rc_constants_destroy(&vp->code.constants);
_mesa_reference_vertprog(ctx, &vp->Base, NULL);
_mesa_free(vp);
vp = tmp;
@@ -122,15 +123,11 @@ static GLboolean
r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
{
if (target == GL_FRAGMENT_PROGRAM_ARB) {
- struct r300_fragment_program *fp = r300SelectFragmentShader(ctx);
- if (!fp->translated)
- r300TranslateFragmentShader(ctx, fp);
+ struct r300_fragment_program *fp = r300SelectAndTranslateFragmentShader(ctx);
return !fp->error;
} else {
- struct r300_vertex_program *vp = r300SelectVertexShader(ctx);
- if (!vp->translated)
- r300TranslateVertexShader(vp);
+ struct r300_vertex_program *vp = r300SelectAndTranslateVertexShader(ctx);
return !vp->error;
}
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index 12fbf281d99..6081c337864 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -62,8 +62,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_emit.h"
#include "r300_tex.h"
#include "r300_fragprog_common.h"
-#include "r300_fragprog.h"
-#include "r500_fragprog.h"
#include "r300_render.h"
#include "r300_vertprog.h"
@@ -458,7 +456,7 @@ static GLboolean current_fragment_program_writes_depth(GLcontext* ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
- return ctx->FragmentProgram._Current && r300->selected_fp->writes_depth;
+ return ctx->FragmentProgram._Current && r300->selected_fp->code.writes_depth;
}
static void r300SetEarlyZState(GLcontext * ctx)
@@ -473,6 +471,8 @@ static void r300SetEarlyZState(GLcontext * ctx)
topZ = R300_ZTOP_DISABLE;
else if (ctx->FragmentProgram._Current && ctx->FragmentProgram._Current->UsesKill)
topZ = R300_ZTOP_DISABLE;
+ else if (r300->query.current)
+ topZ = R300_ZTOP_DISABLE;
if (topZ != r300->hw.zstencil_format.cmd[2]) {
/* Note: This completely reemits the stencil format.
@@ -1046,53 +1046,6 @@ void r300UpdateViewportOffset(GLcontext * ctx)
radeonUpdateScissor(ctx);
}
-static void
-r300FetchStateParameter(GLcontext * ctx,
- const gl_state_index state[STATE_LENGTH],
- GLfloat * value)
-{
- r300ContextPtr r300 = R300_CONTEXT(ctx);
-
- switch (state[0]) {
- case STATE_INTERNAL:
- switch (state[1]) {
- case STATE_R300_WINDOW_DIMENSION: {
- __DRIdrawablePrivate * drawable = radeon_get_drawable(&r300->radeon);
- value[0] = drawable->w * 0.5f; /* width*0.5 */
- value[1] = drawable->h * 0.5f; /* height*0.5 */
- value[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */
- value[3] = 1.0F; /* not used */
- break;
- }
-
- case STATE_R300_TEXRECT_FACTOR:{
- struct gl_texture_object *t =
- ctx->Texture.Unit[state[2]].CurrentTex[TEXTURE_RECT_INDEX];
-
- if (t && t->Image[0][t->BaseLevel]) {
- struct gl_texture_image *image =
- t->Image[0][t->BaseLevel];
- value[0] = 1.0 / image->Width2;
- value[1] = 1.0 / image->Height2;
- } else {
- value[0] = 1.0;
- value[1] = 1.0;
- }
- value[2] = 1.0;
- value[3] = 1.0;
- break;
- }
-
- default:
- break;
- }
- break;
-
- default:
- break;
- }
-}
-
/**
* Update R300's own internal state parameters.
* For now just STATE_R300_WINDOW_DIMENSION
@@ -1101,7 +1054,6 @@ static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
struct gl_program_parameter_list *paramList;
- GLuint i;
if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)))
return;
@@ -1109,21 +1061,12 @@ static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
if (!ctx->FragmentProgram._Current || !rmesa->selected_fp)
return;
- paramList = rmesa->selected_fp->Base->Parameters;
+ paramList = ctx->FragmentProgram._Current->Base.Parameters;
if (!paramList)
return;
_mesa_load_state_parameters(ctx, paramList);
-
- for (i = 0; i < paramList->NumParameters; i++) {
- if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) {
- r300FetchStateParameter(ctx,
- paramList->Parameters[i].
- StateIndexes,
- paramList->ParameterValues[i]);
- }
- }
}
/* =============================================================
@@ -1230,7 +1173,7 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
int i;
- struct r300_fragment_program_code *code = &r300->selected_fp->code.r300;
+ struct r300_fragment_program_code *code = &r300->selected_fp->code.code.r300;
R300_STATECHANGE(r300, fpt);
@@ -1272,7 +1215,7 @@ static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
int i;
- struct r500_fragment_program_code *code = &r300->selected_fp->code.r500;
+ struct r500_fragment_program_code *code = &r300->selected_fp->code.code.r500;
/* find all the texture instructions and relocate the texture units */
for (i = 0; i < code->inst_end + 1; i++) {
@@ -1312,6 +1255,7 @@ static GLuint translate_lod_bias(GLfloat bias)
return (((GLuint)b) << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK;
}
+
static void r300SetupTextures(GLcontext * ctx)
{
int i, mtu;
@@ -1404,6 +1348,28 @@ static void r300SetupTextures(GLcontext * ctx)
}
}
+ /* R3xx and R4xx chips require that the texture unit corresponding to
+ * KIL instructions is really enabled.
+ *
+ * We do some fakery here and in the state atom emit logic to enable
+ * the texture without tripping up the CS checker in the kernel.
+ */
+ if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
+ if (ctx->FragmentProgram._Current->UsesKill && last_hw_tmu < 0) {
+ last_hw_tmu++;
+
+ r300->hw.txe.cmd[R300_TXE