summaryrefslogtreecommitdiff
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/cell/ppu/cell_clear.c6
-rw-r--r--src/gallium/drivers/cell/ppu/cell_draw_arrays.c30
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fp.c278
-rw-r--r--src/gallium/drivers/cell/ppu/cell_pipe_state.c4
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_derived.c8
-rw-r--r--src/gallium/drivers/cell/ppu/cell_state_emit.c9
-rw-r--r--src/gallium/drivers/cell/ppu/cell_texture.c59
-rw-r--r--src/gallium/drivers/cell/spu/spu_command.c4
-rw-r--r--src/gallium/drivers/cell/spu/spu_exec.c103
-rw-r--r--src/gallium/drivers/cell/spu/spu_util.c104
-rw-r--r--src/gallium/drivers/failover/fo_context.c42
-rw-r--r--src/gallium/drivers/failover/fo_context.h6
-rw-r--r--src/gallium/drivers/failover/fo_state.c89
-rw-r--r--src/gallium/drivers/failover/fo_state_emit.c14
-rw-r--r--src/gallium/drivers/failover/fo_winsys.h3
-rw-r--r--src/gallium/drivers/i915/i915_buffer.c1
-rw-r--r--src/gallium/drivers/i915/i915_context.c30
-rw-r--r--src/gallium/drivers/i915/i915_debug.c3
-rw-r--r--src/gallium/drivers/i915/i915_fpc_translate.c176
-rw-r--r--src/gallium/drivers/i915/i915_screen.c1
-rw-r--r--src/gallium/drivers/i915/i915_state.c22
-rw-r--r--src/gallium/drivers/i915/i915_state_derived.c10
-rw-r--r--src/gallium/drivers/i915/i915_state_sampler.c2
-rw-r--r--src/gallium/drivers/i915/i915_surface.c22
-rw-r--r--src/gallium/drivers/i915/i915_texture.c196
-rw-r--r--src/gallium/drivers/i915/intel_winsys.h6
-rw-r--r--src/gallium/drivers/i965/Makefile74
-rw-r--r--src/gallium/drivers/i965/SConscript77
-rw-r--r--src/gallium/drivers/i965/brw_batchbuffer.c202
-rw-r--r--src/gallium/drivers/i965/brw_batchbuffer.h148
-rw-r--r--src/gallium/drivers/i965/brw_cc.c111
-rw-r--r--src/gallium/drivers/i965/brw_clip.c224
-rw-r--r--src/gallium/drivers/i965/brw_clip.h199
-rw-r--r--src/gallium/drivers/i965/brw_clip_line.c271
-rw-r--r--src/gallium/drivers/i965/brw_clip_point.c48
-rw-r--r--src/gallium/drivers/i965/brw_clip_state.c209
-rw-r--r--src/gallium/drivers/i965/brw_clip_tri.c595
-rw-r--r--src/gallium/drivers/i965/brw_clip_unfilled.c497
-rw-r--r--src/gallium/drivers/i965/brw_clip_util.c388
-rw-r--r--src/gallium/drivers/i965/brw_context.c154
-rw-r--r--src/gallium/drivers/i965/brw_context.h858
-rw-r--r--src/gallium/drivers/i965/brw_curbe.c390
-rw-r--r--src/gallium/drivers/i965/brw_debug.h43
-rw-r--r--src/gallium/drivers/i965/brw_defines.h847
-rw-r--r--src/gallium/drivers/i965/brw_disasm.c922
-rw-r--r--src/gallium/drivers/i965/brw_disasm.h36
-rw-r--r--src/gallium/drivers/i965/brw_draw.c289
-rw-r--r--src/gallium/drivers/i965/brw_draw.h39
-rw-r--r--src/gallium/drivers/i965/brw_draw_upload.c542
-rw-r--r--src/gallium/drivers/i965/brw_eu.c262
-rw-r--r--src/gallium/drivers/i965/brw_eu.h992
-rw-r--r--src/gallium/drivers/i965/brw_eu_debug.c94
-rw-r--r--src/gallium/drivers/i965/brw_eu_emit.c1433
-rw-r--r--src/gallium/drivers/i965/brw_eu_util.c126
-rw-r--r--src/gallium/drivers/i965/brw_gs.c216
-rw-r--r--src/gallium/drivers/i965/brw_gs.h76
-rw-r--r--src/gallium/drivers/i965/brw_gs_emit.c181
-rw-r--r--src/gallium/drivers/i965/brw_gs_state.c169
-rw-r--r--src/gallium/drivers/i965/brw_misc_state.c513
-rw-r--r--src/gallium/drivers/i965/brw_pipe_blend.c208
-rw-r--r--src/gallium/drivers/i965/brw_pipe_clear.c218
-rw-r--r--src/gallium/drivers/i965/brw_pipe_depth.c172
-rw-r--r--src/gallium/drivers/i965/brw_pipe_fb.c84
-rw-r--r--src/gallium/drivers/i965/brw_pipe_flush.c83
-rw-r--r--src/gallium/drivers/i965/brw_pipe_misc.c54
-rw-r--r--src/gallium/drivers/i965/brw_pipe_query.c263
-rw-r--r--src/gallium/drivers/i965/brw_pipe_rast.c161
-rw-r--r--src/gallium/drivers/i965/brw_pipe_rast.h16
-rw-r--r--src/gallium/drivers/i965/brw_pipe_sampler.c231
-rw-r--r--src/gallium/drivers/i965/brw_pipe_shader.c303
-rw-r--r--src/gallium/drivers/i965/brw_pipe_vertex.c71
-rw-r--r--src/gallium/drivers/i965/brw_reg.h115
-rw-r--r--src/gallium/drivers/i965/brw_screen.c403
-rw-r--r--src/gallium/drivers/i965/brw_screen.h199
-rw-r--r--src/gallium/drivers/i965/brw_screen_buffers.c202
-rw-r--r--src/gallium/drivers/i965/brw_screen_surface.c262
-rw-r--r--src/gallium/drivers/i965/brw_screen_tex_layout.c414
-rw-r--r--src/gallium/drivers/i965/brw_screen_texture.c573
-rw-r--r--src/gallium/drivers/i965/brw_sf.c216
-rw-r--r--src/gallium/drivers/i965/brw_sf.h122
-rw-r--r--src/gallium/drivers/i965/brw_sf_emit.c765
-rw-r--r--src/gallium/drivers/i965/brw_sf_state.c333
-rw-r--r--src/gallium/drivers/i965/brw_state.h174
-rw-r--r--src/gallium/drivers/i965/brw_state_batch.c98
-rw-r--r--src/gallium/drivers/i965/brw_state_cache.c617
-rw-r--r--src/gallium/drivers/i965/brw_state_debug.c153
-rw-r--r--src/gallium/drivers/i965/brw_state_upload.c270
-rw-r--r--src/gallium/drivers/i965/brw_structs.h1576
-rw-r--r--src/gallium/drivers/i965/brw_structs_dump.c1247
-rw-r--r--src/gallium/drivers/i965/brw_structs_dump.h276
-rwxr-xr-xsrc/gallium/drivers/i965/brw_structs_dump.py291
-rw-r--r--src/gallium/drivers/i965/brw_swtnl.c95
-rw-r--r--src/gallium/drivers/i965/brw_types.h21
-rw-r--r--src/gallium/drivers/i965/brw_urb.c263
-rw-r--r--src/gallium/drivers/i965/brw_util.c38
-rw-r--r--src/gallium/drivers/i965/brw_util.h44
-rw-r--r--src/gallium/drivers/i965/brw_vs.c131
-rw-r--r--src/gallium/drivers/i965/brw_vs.h106
-rw-r--r--src/gallium/drivers/i965/brw_vs_emit.c1654
-rw-r--r--src/gallium/drivers/i965/brw_vs_state.c201
-rw-r--r--src/gallium/drivers/i965/brw_vs_surface_state.c232
-rw-r--r--src/gallium/drivers/i965/brw_winsys.h309
-rw-r--r--src/gallium/drivers/i965/brw_winsys_debug.c87
-rw-r--r--src/gallium/drivers/i965/brw_wm.c319
-rw-r--r--src/gallium/drivers/i965/brw_wm.h344
-rw-r--r--src/gallium/drivers/i965/brw_wm_constant_buffer.c165
-rw-r--r--src/gallium/drivers/i965/brw_wm_debug.c256
-rw-r--r--src/gallium/drivers/i965/brw_wm_emit.c1521
-rw-r--r--src/gallium/drivers/i965/brw_wm_fp.c1224
-rw-r--r--src/gallium/drivers/i965/brw_wm_glsl.c2032
-rw-r--r--src/gallium/drivers/i965/brw_wm_iz.c156
-rw-r--r--src/gallium/drivers/i965/brw_wm_pass0.c366
-rw-r--r--src/gallium/drivers/i965/brw_wm_pass1.c292
-rw-r--r--src/gallium/drivers/i965/brw_wm_pass2.c334
-rw-r--r--src/gallium/drivers/i965/brw_wm_sampler_state.c229
-rw-r--r--src/gallium/drivers/i965/brw_wm_state.c339
-rw-r--r--src/gallium/drivers/i965/brw_wm_surface_state.c294
-rw-r--r--src/gallium/drivers/i965/intel_decode.c1790
-rw-r--r--src/gallium/drivers/i965/intel_decode.h29
-rw-r--r--src/gallium/drivers/i965/intel_structs.h132
-rw-r--r--src/gallium/drivers/identity/id_context.c120
-rw-r--r--src/gallium/drivers/identity/id_objects.c39
-rw-r--r--src/gallium/drivers/identity/id_objects.h25
-rw-r--r--src/gallium/drivers/identity/id_public.h2
-rw-r--r--src/gallium/drivers/identity/id_screen.c33
-rw-r--r--src/gallium/drivers/llvmpipe/Makefile6
-rw-r--r--src/gallium/drivers/llvmpipe/README36
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript32
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_arit.c212
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_arit.h7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_const.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_conv.c240
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_conv.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_debug.c13
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format.h74
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format_aos.c207
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format_query.c72
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format_soa.c114
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.c30
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_logic.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_misc.cpp61
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_misc.h56
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_pack.c418
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_pack.h95
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_sample.c190
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_sample.h20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c374
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_swizzle.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c95
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_type.c31
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_type.h98
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c16
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_draw_arrays.c32
-rw-r--r--src/gallium/drivers/llvmpipe/lp_fence.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.c21
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.h13
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c62
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state.h20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_blend.c18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_derived.c28
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c81
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_rasterizer.c9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_sampler.c58
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_surface.c7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_vs.c13
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test.h20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_blend.c25
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_conv.c11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_format.c95
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_main.c19
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample.h22
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.c79
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_winsys.h2
-rw-r--r--src/gallium/drivers/nouveau/Makefile3
-rw-r--r--src/gallium/drivers/nouveau/nouveau_context.c41
-rw-r--r--src/gallium/drivers/nouveau/nouveau_context.h11
-rw-r--r--src/gallium/drivers/nouveau/nouveau_push.h93
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.c24
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.h3
-rw-r--r--src/gallium/drivers/nouveau/nouveau_stateobj.h287
-rw-r--r--src/gallium/drivers/nouveau/nouveau_winsys.h3
-rw-r--r--src/gallium/drivers/nv04/nv04_context.c78
-rw-r--r--src/gallium/drivers/nv04/nv04_context.h9
-rw-r--r--src/gallium/drivers/nv04/nv04_fragtex.c16
-rw-r--r--src/gallium/drivers/nv04/nv04_miptree.c29
-rw-r--r--src/gallium/drivers/nv04/nv04_prim_vbuf.c84
-rw-r--r--src/gallium/drivers/nv04/nv04_screen.c6
-rw-r--r--src/gallium/drivers/nv04/nv04_state.c64
-rw-r--r--src/gallium/drivers/nv04/nv04_state.h1
-rw-r--r--src/gallium/drivers/nv04/nv04_state_emit.c72
-rw-r--r--src/gallium/drivers/nv04/nv04_surface_2d.c96
-rw-r--r--src/gallium/drivers/nv04/nv04_surface_2d.h4
-rw-r--r--src/gallium/drivers/nv04/nv04_transfer.c37
-rw-r--r--src/gallium/drivers/nv04/nv04_vbo.c12
-rw-r--r--src/gallium/drivers/nv10/nv10_context.c400
-rw-r--r--src/gallium/drivers/nv10/nv10_context.h9
-rw-r--r--src/gallium/drivers/nv10/nv10_fragtex.c34
-rw-r--r--src/gallium/drivers/nv10/nv10_miptree.c26
-rw-r--r--src/gallium/drivers/nv10/nv10_prim_vbuf.c34
-rw-r--r--src/gallium/drivers/nv10/nv10_screen.c4
-rw-r--r--src/gallium/drivers/nv10/nv10_state.c4
-rw-r--r--src/gallium/drivers/nv10/nv10_state.h1
-rw-r--r--src/gallium/drivers/nv10/nv10_state_emit.c169
-rw-r--r--src/gallium/drivers/nv10/nv10_transfer.c37
-rw-r--r--src/gallium/drivers/nv10/nv10_vbo.c11
-rw-r--r--src/gallium/drivers/nv20/nv20_context.c564
-rw-r--r--src/gallium/drivers/nv20/nv20_context.h9
-rw-r--r--src/gallium/drivers/nv20/nv20_fragtex.c34
-rw-r--r--src/gallium/drivers/nv20/nv20_miptree.c58
-rw-r--r--src/gallium/drivers/nv20/nv20_prim_vbuf.c56
-rw-r--r--src/gallium/drivers/nv20/nv20_screen.c4
-rw-r--r--src/gallium/drivers/nv20/nv20_state.c4
-rw-r--r--src/gallium/drivers/nv20/nv20_state.h1
-rw-r--r--src/gallium/drivers/nv20/nv20_state_emit.c200
-rw-r--r--src/gallium/drivers/nv20/nv20_transfer.c39
-rw-r--r--src/gallium/drivers/nv20/nv20_vbo.c9
-rw-r--r--src/gallium/drivers/nv20/nv20_vertprog.c71
-rw-r--r--src/gallium/drivers/nv30/nv30_context.c54
-rw-r--r--src/gallium/drivers/nv30/nv30_context.h11
-rw-r--r--src/gallium/drivers/nv30/nv30_fragprog.c178
-rw-r--r--src/gallium/drivers/nv30/nv30_fragtex.c21
-rw-r--r--src/gallium/drivers/nv30/nv30_miptree.c70
-rw-r--r--src/gallium/drivers/nv30/nv30_query.c20
-rw-r--r--src/gallium/drivers/nv30/nv30_screen.c34
-rw-r--r--src/gallium/drivers/nv30/nv30_state.c21
-rw-r--r--src/gallium/drivers/nv30/nv30_state.h1
-rw-r--r--src/gallium/drivers/nv30/nv30_state_blend.c2
-rw-r--r--src/gallium/drivers/nv30/nv30_state_emit.c10
-rw-r--r--src/gallium/drivers/nv30/nv30_state_fb.c64
-rw-r--r--src/gallium/drivers/nv30/nv30_state_scissor.c2
-rw-r--r--src/gallium/drivers/nv30/nv30_state_stipple.c4
-rw-r--r--src/gallium/drivers/nv30/nv30_state_viewport.c2
-rw-r--r--src/gallium/drivers/nv30/nv30_transfer.c39
-rw-r--r--src/gallium/drivers/nv30/nv30_vbo.c133
-rw-r--r--src/gallium/drivers/nv30/nv30_vertprog.c91
-rw-r--r--src/gallium/drivers/nv40/nv40_context.c54
-rw-r--r--src/gallium/drivers/nv40/nv40_context.h13
-rw-r--r--src/gallium/drivers/nv40/nv40_draw.c73
-rw-r--r--src/gallium/drivers/nv40/nv40_fragprog.c179
-rw-r--r--src/gallium/drivers/nv40/nv40_fragtex.c11
-rw-r--r--src/gallium/drivers/nv40/nv40_miptree.c68
-rw-r--r--src/gallium/drivers/nv40/nv40_query.c20
-rw-r--r--src/gallium/drivers/nv40/nv40_screen.c10
-rw-r--r--src/gallium/drivers/nv40/nv40_state.c21
-rw-r--r--src/gallium/drivers/nv40/nv40_state.h1
-rw-r--r--src/gallium/drivers/nv40/nv40_state_blend.c2
-rw-r--r--src/gallium/drivers/nv40/nv40_state_emit.c22
-rw-r--r--src/gallium/drivers/nv40/nv40_state_fb.c5
-rw-r--r--src/gallium/drivers/nv40/nv40_state_scissor.c2
-rw-r--r--src/gallium/drivers/nv40/nv40_state_stipple.c4
-rw-r--r--src/gallium/drivers/nv40/nv40_state_viewport.c2
-rw-r--r--src/gallium/drivers/nv40/nv40_transfer.c39
-rw-r--r--src/gallium/drivers/nv40/nv40_vbo.c140
-rw-r--r--src/gallium/drivers/nv40/nv40_vertprog.c153
-rw-r--r--src/gallium/drivers/nv50/nv50_context.c73
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h48
-rw-r--r--src/gallium/drivers/nv50/nv50_miptree.c133
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c2037
-rw-r--r--src/gallium/drivers/nv50/nv50_program.h6
-rw-r--r--src/gallium/drivers/nv50/nv50_query.c6
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c173
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.h2
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c109
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c168
-rw-r--r--src/gallium/drivers/nv50/nv50_surface.c8
-rw-r--r--src/gallium/drivers/nv50/nv50_tex.c297
-rw-r--r--src/gallium/drivers/nv50/nv50_texture.h16
-rw-r--r--src/gallium/drivers/nv50/nv50_transfer.c178
-rw-r--r--src/gallium/drivers/nv50/nv50_vbo.c552
-rw-r--r--src/gallium/drivers/r300/Makefile10
-rw-r--r--src/gallium/drivers/r300/SConscript12
-rw-r--r--src/gallium/drivers/r300/r300_blit.c130
-rw-r--r--src/gallium/drivers/r300/r300_blit.h (renamed from src/gallium/drivers/r300/r300_clear.h)26
-rw-r--r--src/gallium/drivers/r300/r300_chipset.c3
-rw-r--r--src/gallium/drivers/r300/r300_chipset.h6
-rw-r--r--src/gallium/drivers/r300/r300_clear.c35
-rw-r--r--src/gallium/drivers/r300/r300_context.c185
-rw-r--r--src/gallium/drivers/r300/r300_context.h108
-rw-r--r--src/gallium/drivers/r300/r300_cs.h20
-rw-r--r--src/gallium/drivers/r300/r300_debug.c8
-rw-r--r--src/gallium/drivers/r300/r300_emit.c675
-rw-r--r--src/gallium/drivers/r300/r300_emit.h32
-rw-r--r--src/gallium/drivers/r300/r300_flush.c27
-rw-r--r--src/gallium/drivers/r300/r300_flush.h7
-rw-r--r--src/gallium/drivers/r300/r300_fs.c207
-rw-r--r--src/gallium/drivers/r300/r300_fs.h42
-rw-r--r--src/gallium/drivers/r300/r300_query.c73
-rw-r--r--src/gallium/drivers/r300/r300_query.h4
-rw-r--r--src/gallium/drivers/r300/r300_reg.h163
-rw-r--r--src/gallium/drivers/r300/r300_render.c432
-rw-r--r--src/gallium/drivers/r300/r300_render.h59
-rw-r--r--src/gallium/drivers/r300/r300_screen.c168
-rw-r--r--src/gallium/drivers/r300/r300_screen.h9
-rw-r--r--src/gallium/drivers/r300/r300_shader_inlines.h47
-rw-r--r--src/gallium/drivers/r300/r300_shader_semantics.h66
-rw-r--r--src/gallium/drivers/r300/r300_state.c592
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c782
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.h4
-rw-r--r--src/gallium/drivers/r300/r300_state_inlines.h209
-rw-r--r--src/gallium/drivers/r300/r300_state_invariant.c34
-rw-r--r--src/gallium/drivers/r300/r300_state_invariant.h6
-rw-r--r--src/gallium/drivers/r300/r300_surface.c372
-rw-r--r--src/gallium/drivers/r300/r300_surface.h124
-rw-r--r--src/gallium/drivers/r300/r300_texture.c189
-rw-r--r--src/gallium/drivers/r300/r300_texture.h40
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c56
-rw-r--r--src/gallium/drivers/r300/r300_vs.c430
-rw-r--r--src/gallium/drivers/r300/r300_vs.h20
-rw-r--r--src/gallium/drivers/r300/r300_winsys.h61
-rw-r--r--src/gallium/drivers/r300/r3xx_fs.c74
-rw-r--r--src/gallium/drivers/r300/r3xx_fs.h32
-rw-r--r--src/gallium/drivers/r300/r5xx_fs.c125
-rw-r--r--src/gallium/drivers/r300/r5xx_fs.h32
-rw-r--r--src/gallium/drivers/softpipe/sp_clear.c11
-rw-r--r--src/gallium/drivers/softpipe/sp_context.c69
-rw-r--r--src/gallium/drivers/softpipe/sp_context.h13
-rw-r--r--src/gallium/drivers/softpipe/sp_draw_arrays.c53
-rw-r--r--src/gallium/drivers/softpipe/sp_flush.c3
-rw-r--r--src/gallium/drivers/softpipe/sp_fs_exec.c5
-rw-r--r--src/gallium/drivers/softpipe/sp_prim_vbuf.c1
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_blend.c20
-rw-r--r--src/gallium/drivers/softpipe/sp_query.c26
-rw-r--r--src/gallium/drivers/softpipe/sp_query.h4
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c9
-rw-r--r--src/gallium/drivers/softpipe/sp_setup.c48
-rw-r--r--src/gallium/drivers/softpipe/sp_state.h37
-rw-r--r--src/gallium/drivers/softpipe/sp_state_blend.c5
-rw-r--r--src/gallium/drivers/softpipe/sp_state_derived.c51
-rw-r--r--src/gallium/drivers/softpipe/sp_state_fs.c76
-rw-r--r--src/gallium/drivers/softpipe/sp_state_rasterizer.c9
-rw-r--r--src/gallium/drivers/softpipe/sp_state_sampler.c69
-rw-r--r--src/gallium/drivers/softpipe/sp_state_surface.c9
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.c253
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.h7
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_tile_cache.c7
-rw-r--r--src/gallium/drivers/softpipe/sp_texture.c76
-rw-r--r--src/gallium/drivers/softpipe/sp_tile_cache.c18
-rw-r--r--src/gallium/drivers/softpipe/sp_winsys.h4
-rw-r--r--src/gallium/drivers/svga/Makefile60
-rw-r--r--src/gallium/drivers/svga/SConscript72
-rw-r--r--src/gallium/drivers/svga/include/README3
-rw-r--r--src/gallium/drivers/svga/include/svga3d_caps.h139
-rw-r--r--src/gallium/drivers/svga/include/svga3d_reg.h1793
-rw-r--r--src/gallium/drivers/svga/include/svga3d_shaderdefs.h519
-rw-r--r--src/gallium/drivers/svga/include/svga_escape.h89
-rw-r--r--src/gallium/drivers/svga/include/svga_overlay.h201
-rw-r--r--src/gallium/drivers/svga/include/svga_reg.h1346
-rw-r--r--src/gallium/drivers/svga/include/svga_types.h46
-rw-r--r--src/gallium/drivers/svga/svga_cmd.c1427
-rw-r--r--src/gallium/drivers/svga/svga_cmd.h235
-rw-r--r--src/gallium/drivers/svga/svga_context.c287
-rw-r--r--src/gallium/drivers/svga/svga_context.h446
-rw-r--r--src/gallium/drivers/svga/svga_debug.h75
-rw-r--r--src/gallium/drivers/svga/svga_draw.c378
-rw-r--r--src/gallium/drivers/svga/svga_draw.h83
-rw-r--r--src/gallium/drivers/svga/svga_draw_arrays.c297
-rw-r--r--src/gallium/drivers/svga/svga_draw_elements.c255
-rw-r--r--src/gallium/drivers/svga/svga_draw_private.h158
-rw-r--r--src/gallium/drivers/svga/svga_hw_reg.h42
-rw-r--r--src/gallium/drivers/svga/svga_pipe_blend.c246
-rw-r--r--src/gallium/drivers/svga/svga_pipe_blit.c92
-rw-r--r--src/gallium/drivers/svga/svga_pipe_clear.c125
-rw-r--r--src/gallium/drivers/svga/svga_pipe_constants.c74
-rw-r--r--src/gallium/drivers/svga/svga_pipe_depthstencil.c153
-rw-r--r--src/gallium/drivers/svga/svga_pipe_draw.c259
-rw-r--r--src/gallium/drivers/svga/svga_pipe_flush.c71
-rw-r--r--src/gallium/drivers/svga/svga_pipe_fs.c134
-rw-r--r--src/gallium/drivers/svga/svga_pipe_misc.c187
-rw-r--r--src/gallium/drivers/svga/svga_pipe_query.c267
-rw-r--r--src/gallium/drivers/svga/svga_pipe_rasterizer.c250
-rw-r--r--src/gallium/drivers/svga/svga_pipe_sampler.c245
-rw-r--r--src/gallium/drivers/svga/svga_pipe_vertex.c102
-rw-r--r--src/gallium/drivers/svga/svga_pipe_vs.c199
-rw-r--r--src/gallium/drivers/svga/svga_screen.c440
-rw-r--r--src/gallium/drivers/svga/svga_screen.h95
-rw-r--r--src/gallium/drivers/svga/svga_screen_buffer.c825
-rw-r--r--src/gallium/drivers/svga/svga_screen_buffer.h190
-rw-r--r--src/gallium/drivers/svga/svga_screen_cache.c347
-rw-r--r--src/gallium/drivers/svga/svga_screen_cache.h144
-rw-r--r--src/gallium/drivers/svga/svga_screen_texture.c1090
-rw-r--r--src/gallium/drivers/svga/svga_screen_texture.h195
-rw-r--r--src/gallium/drivers/svga/svga_state.c278
-rw-r--r--src/gallium/drivers/svga/svga_state.h95
-rw-r--r--src/gallium/drivers/svga/svga_state_constants.c240
-rw-r--r--src/gallium/drivers/svga/svga_state_framebuffer.c458
-rw-r--r--src/gallium/drivers/svga/svga_state_fs.c302
-rw-r--r--src/gallium/drivers/svga/svga_state_need_swtnl.c200
-rw-r--r--src/gallium/drivers/svga/svga_state_rss.c268
-rw-r--r--src/gallium/drivers/svga/svga_state_tss.c279
-rw-r--r--src/gallium/drivers/svga/svga_state_vdecl.c182
-rw-r--r--src/gallium/drivers/svga/svga_state_vs.c246
-rw-r--r--src/gallium/drivers/svga/svga_swtnl.h52
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_backend.c349
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_draw.c170
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_private.h93
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_state.c237
-rw-r--r--src/gallium/drivers/svga/svga_tgsi.c282
-rw-r--r--src/gallium/drivers/svga/svga_tgsi.h136
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_decl_sm20.c280
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_decl_sm30.c385
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_emit.h345
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_insn.c2716
-rw-r--r--src/gallium/drivers/svga/svga_winsys.h299
-rw-r--r--src/gallium/drivers/svga/svgadump/svga_dump.c1784
-rw-r--r--src/gallium/drivers/svga/svgadump/svga_dump.h37
-rwxr-xr-xsrc/gallium/drivers/svga/svgadump/svga_dump.py340
-rw-r--r--src/gallium/drivers/svga/svgadump/svga_shader.h220
-rw-r--r--src/gallium/drivers/svga/svgadump/svga_shader_dump.c654
-rw-r--r--src/gallium/drivers/svga/svgadump/svga_shader_dump.h42
-rw-r--r--src/gallium/drivers/svga/svgadump/svga_shader_op.c168
-rw-r--r--src/gallium/drivers/svga/svgadump/svga_shader_op.h46
-rw-r--r--src/gallium/drivers/trace/README5
-rw-r--r--src/gallium/drivers/trace/tr_context.c199
-rw-r--r--src/gallium/drivers/trace/tr_context.h3
-rw-r--r--src/gallium/drivers/trace/tr_drm.c4
-rw-r--r--src/gallium/drivers/trace/tr_dump.c4
-rw-r--r--src/gallium/drivers/trace/tr_dump_state.c32
-rw-r--r--src/gallium/drivers/trace/tr_dump_state.h3
-rw-r--r--src/gallium/drivers/trace/tr_rbug.c26
-rw-r--r--src/gallium/drivers/trace/tr_screen.c7
-rw-r--r--src/gallium/drivers/trace/tr_state.h2
428 files changed, 71543 insertions, 7368 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c
index 79ad687ea94..3a3f968a492 100644
--- a/src/gallium/drivers/cell/ppu/cell_clear.c
+++ b/src/gallium/drivers/cell/ppu/cell_clear.c
@@ -59,9 +59,9 @@ cell_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
if (buffers & PIPE_CLEAR_COLOR) {
uint surfIndex = 0;
- uint clearValue;
+ union util_color uc;
- util_pack_color(rgba, cell->framebuffer.cbufs[0]->format, &clearValue);
+ util_pack_color(rgba, cell->framebuffer.cbufs[0]->format, &uc);
/* Build a CLEAR command and place it in the current batch buffer */
STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0);
@@ -70,7 +70,7 @@ cell_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
cell_batch_alloc16(cell, sizeof(*clr));
clr->opcode[0] = CELL_CMD_CLEAR_SURFACE;
clr->surface = surfIndex;
- clr->value = clearValue;
+ clr->value = uc.ui;
}
if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
index 644496db40c..3fa8b975d39 100644
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
+++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
@@ -59,7 +59,7 @@ cell_map_constant_buffers(struct cell_context *sp)
}
}
- draw_set_mapped_constant_buffer(sp->draw,
+ draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX,
sp->mapped_constants[PIPE_SHADER_VERTEX],
sp->constants[PIPE_SHADER_VERTEX].buffer->size);
}
@@ -85,7 +85,7 @@ cell_unmap_constant_buffers(struct cell_context *sp)
*
* XXX should the element buffer be specified/bound with a separate function?
*/
-static boolean
+static void
cell_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -145,47 +145,35 @@ cell_draw_range_elements(struct pipe_context *pipe,
/* Note: leave drawing surfaces mapped */
cell_unmap_constant_buffers(sp);
-
- return TRUE;
}
-static boolean
+static void
cell_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
{
- return cell_draw_range_elements( pipe, indexBuffer,
- indexSize,
- 0, 0xffffffff,
- mode, start, count );
+ cell_draw_range_elements( pipe, indexBuffer,
+ indexSize,
+ 0, 0xffffffff,
+ mode, start, count );
}
-static boolean
+static void
cell_draw_arrays(struct pipe_context *pipe, unsigned mode,
unsigned start, unsigned count)
{
- return cell_draw_elements(pipe, NULL, 0, mode, start, count);
-}
-
-
-static void
-cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags)
-{
- struct cell_context *cell = cell_context(pipe);
- draw_set_edgeflags(cell->draw, edgeflags);
+ cell_draw_elements(pipe, NULL, 0, mode, start, count);
}
-
void
cell_init_draw_functions(struct cell_context *cell)
{
cell->pipe.draw_arrays = cell_draw_arrays;
cell->pipe.draw_elements = cell_draw_elements;
cell->pipe.draw_range_elements = cell_draw_range_elements;
- cell->pipe.set_edgeflags = cell_set_edgeflags;
}
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 312621fd534..1d8a11a4ac9 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -231,14 +231,14 @@ static boolean
is_register_src(struct codegen *gen, int channel,
const struct tgsi_full_src_register *src)
{
- int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
+ int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel);
int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) {
return FALSE;
}
- if (src->SrcRegister.File == TGSI_FILE_TEMPORARY ||
- src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
+ if (src->Register.File == TGSI_FILE_TEMPORARY ||
+ src->Register.File == TGSI_FILE_IMMEDIATE) {
return TRUE;
}
return FALSE;
@@ -249,7 +249,7 @@ static boolean
is_memory_dst(struct codegen *gen, int channel,
const struct tgsi_full_dst_register *dst)
{
- if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
+ if (dst->Register.File == TGSI_FILE_OUTPUT) {
return TRUE;
}
else {
@@ -271,32 +271,23 @@ get_src_reg(struct codegen *gen,
const struct tgsi_full_src_register *src)
{
int reg = -1;
- int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
+ int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel);
boolean reg_is_itemp = FALSE;
uint sign_op;
assert(swizzle >= TGSI_SWIZZLE_X);
- assert(swizzle <= TGSI_EXTSWIZZLE_ONE);
+ assert(swizzle <= TGSI_SWIZZLE_W);
- if (swizzle == TGSI_EXTSWIZZLE_ONE) {
- /* Load const one float and early out */
- reg = get_const_one_reg(gen);
- }
- else if (swizzle == TGSI_EXTSWIZZLE_ZERO) {
- /* Load const zero float and early out */
- reg = get_itemp(gen);
- spe_xor(gen->f, reg, reg, reg);
- }
- else {
- int index = src->SrcRegister.Index;
+ {
+ int index = src->Register.Index;
assert(swizzle < 4);
- if (src->SrcRegister.Indirect) {
+ if (src->Register.Indirect) {
/* XXX unfinished */
}
- switch (src->SrcRegister.File) {
+ switch (src->Register.File) {
case TGSI_FILE_TEMPORARY:
reg = gen->temp_regs[index][swizzle];
break;
@@ -383,12 +374,12 @@ get_dst_reg(struct codegen *gen,
{
int reg = -1;
- switch (dest->DstRegister.File) {
+ switch (dest->Register.File) {
case TGSI_FILE_TEMPORARY:
if (gen->if_nesting > 0 || gen->loop_nesting > 0)
reg = get_itemp(gen);
else
- reg = gen->temp_regs[dest->DstRegister.Index][channel];
+ reg = gen->temp_regs[dest->Register.Index][channel];
break;
case TGSI_FILE_OUTPUT:
reg = get_itemp(gen);
@@ -428,10 +419,10 @@ store_dest_reg(struct codegen *gen,
}
#endif
- switch (dest->DstRegister.File) {
+ switch (dest->Register.File) {
case TGSI_FILE_TEMPORARY:
if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
- int d_reg = gen->temp_regs[dest->DstRegister.Index][channel];
+ int d_reg = gen->temp_regs[dest->Register.Index][channel];
int exec_reg = get_exec_mask_reg(gen);
/* Mix d with new value according to exec mask:
* d[i] = mask_reg[i] ? value_reg : d_reg
@@ -446,7 +437,7 @@ store_dest_reg(struct codegen *gen,
case TGSI_FILE_OUTPUT:
{
/* offset is measured in quadwords, not bytes */
- int offset = dest->DstRegister.Index * 4 + channel;
+ int offset = dest->Register.Index * 4 + channel;
if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
int exec_reg = get_exec_mask_reg(gen);
int curval_reg = get_itemp(gen);
@@ -553,7 +544,7 @@ emit_epilogue(struct codegen *gen)
#define FOR_EACH_ENABLED_CHANNEL(inst, ch) \
for (ch = 0; ch < 4; ch++) \
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch))
+ if (inst->Dst[0].Register.WriteMask & (1 << ch))
static boolean
@@ -561,7 +552,7 @@ emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
int ch = 0, src_reg, addr_reg;
- src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ src_reg = get_src_reg(gen, ch, &inst->Src[0]);
addr_reg = get_address_reg(gen);
/* convert float to int */
@@ -579,19 +570,19 @@ emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
int ch, src_reg[4], dst_reg[4];
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ src_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+ dst_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
}
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) &&
- is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) {
+ if (is_register_src(gen, ch, &inst->Src[0]) &&
+ is_memory_dst(gen, ch, &inst->Dst[0])) {
/* special-case: register to memory store */
- store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, src_reg[ch], ch, &inst->Dst[0]);
}
else {
spe_move(gen->f, dst_reg[ch], src_reg[ch]);
- store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, dst_reg[ch], ch, &inst->Dst[0]);
}
}
@@ -610,9 +601,9 @@ emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst)
/* Loop over Red/Green/Blue/Alpha channels, fetch src operands */
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
}
/* Loop over Red/Green/Blue/Alpha channels, do the op, store results */
@@ -635,7 +626,7 @@ emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst)
/* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
}
/* Free any intermediate temps we allocated */
@@ -654,16 +645,16 @@ emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst)
int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4];
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
+ s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
}
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]);
}
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
}
free_itemps(gen);
return TRUE;
@@ -674,16 +665,16 @@ emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst)
* Emit linear interpolate. See emit_ADD for comments.
*/
static boolean
-emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst)
+emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4];
/* setup/get src/dst/temp regs */
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
+ s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
tmp_reg[ch] = get_itemp(gen);
}
@@ -696,7 +687,7 @@ emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst)
spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]);
}
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
}
free_itemps(gen);
return TRUE;
@@ -713,8 +704,8 @@ emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
int ch, s1_reg[4], d_reg[4], tmp_reg[4];
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
tmp_reg[ch] = get_itemp(gen);
}
@@ -735,7 +726,7 @@ emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
}
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
}
free_itemps(gen);
@@ -756,8 +747,8 @@ emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst)
spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
}
/* d = sign bit cleared in s1 */
@@ -766,7 +757,7 @@ emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst)
}
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
}
free_itemps(gen);
@@ -784,12 +775,12 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
int s2x_reg, s2y_reg, s2z_reg;
int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
- s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
- s2x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
- s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
- s2y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
- s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
- s2z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+ s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
+ s2x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
+ s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
+ s2y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
+ s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
+ s2z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
/* t0 = x0 * x1 */
spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg);
@@ -804,9 +795,9 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
spe_move(gen->f, d_reg, t0_reg);
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
}
free_itemps(gen);
@@ -824,14 +815,14 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
int s1x_reg, s1y_reg, s1z_reg, s1w_reg;
int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
- s0x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
- s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
- s0y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
- s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
- s0z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
- s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
- s0w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]);
- s1w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
+ s0x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
+ s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
+ s0y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
+ s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
+ s0z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
+ s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
+ s0w_reg = get_src_reg(gen, CHAN_W, &inst->Src[0]);
+ s1w_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]);
/* t0 = x0 * x1 */
spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg);
@@ -849,9 +840,9 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
spe_move(gen->f, d_reg, t0_reg);
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
}
free_itemps(gen);
@@ -866,31 +857,31 @@ emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
/* XXX rewrite this function to look more like DP3/DP4 */
int ch;
- int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
- int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ int s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
+ int s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
int tmp_reg = get_itemp(gen);
/* t = x0 * x1 */
spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
- s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
- s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
+ s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
/* t = y0 * y1 + t */
spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
- s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
- s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+ s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
+ s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
/* t = z0 * z1 + t */
spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
- s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
+ s2_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]);
/* t = w1 + t */
spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg);
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
spe_move(gen->f, d_reg, tmp_reg);
- store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, tmp_reg, ch, &inst->Dst[0]);
}
free_itemps(gen);
@@ -907,9 +898,9 @@ emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst)
int src_reg[3];
int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
- src_reg[0] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
- src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
- src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+ src_reg[0] = get_src_reg(gen, CHAN_X, &inst->Src[0]);
+ src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
+ src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
/* t0 = x * x */
spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]);
@@ -928,10 +919,10 @@ emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst)
spe_fi(gen->f, t1_reg, t0_reg, t1_reg);
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
/* dst = src[ch] * t1 */
spe_fm(gen->f, d_reg, src_reg[ch], t1_reg);
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
}
free_itemps(gen);
@@ -945,48 +936,48 @@ emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst)
static boolean
emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
- int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
- int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ int s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
+ int s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
int tmp_reg = get_itemp(gen);
/* t = z0 * y1 */
spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
- s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
- s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+ s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
+ s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
/* t = y0 * z1 - t */
spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) {
- store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]);
+ if (inst->Dst[0].Register.WriteMask & (1 << CHAN_X)) {
+ store_dest_reg(gen, tmp_reg, CHAN_X, &inst->Dst[0]);
}
- s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
- s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+ s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
+ s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
/* t = x0 * z1 */
spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
- s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
- s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
+ s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
/* t = z0 * x1 - t */
spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) {
- store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]);
+ if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Y)) {
+ store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->Dst[0]);
}
- s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
- s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
+ s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
/* t = y0 * x1 */
spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
- s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
- s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
+ s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
/* t = x0 * y1 - t */
spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) {
- store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]);
+ if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Z)) {
+ store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->Dst[0]);
}
free_itemps(gen);
@@ -1004,14 +995,14 @@ static boolean
emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg;
- bool complement = FALSE;
+ boolean complement = FALSE;
one_reg = get_const_one_reg(gen);
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
}
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
@@ -1052,7 +1043,7 @@ emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst)
}
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
}
free_itemps(gen);
@@ -1069,10 +1060,10 @@ emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
int ch;
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ int s1_reg = get_src_reg(gen, ch, &inst->Src[0]);
+ int s2_reg = get_src_reg(gen, ch, &inst->Src[1]);
+ int s3_reg = get_src_reg(gen, ch, &inst->Src[2]);
+ int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
int zero_reg = get_itemp(gen);
spe_zero(gen->f, zero_reg);
@@ -1081,7 +1072,7 @@ emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg);
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
free_itemps(gen);
}
@@ -1099,8 +1090,8 @@ emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
int ch, s1_reg[4], d_reg[4];
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
}
/* Convert float to int */
@@ -1114,7 +1105,7 @@ emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
}
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
}
free_itemps(gen);
@@ -1138,8 +1129,8 @@ emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst)
one_reg = get_const_one_reg(gen);
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
tmp_reg[ch] = get_itemp(gen);
}
@@ -1165,7 +1156,7 @@ emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst)
}
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
}
free_itemps(gen);
@@ -1186,8 +1177,8 @@ emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst)
one_reg = get_const_one_reg(gen);
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
tmp_reg[ch] = get_itemp(gen);
}
@@ -1219,7 +1210,7 @@ emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst)
/* store result */
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
}
free_itemps(gen);
@@ -1281,7 +1272,7 @@ emit_function_call(struct codegen *gen,
if (scalar) {
for (a = 0; a < num_args; a++) {
- s_regs[a] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[a]);
+ s_regs[a] = get_src_reg(gen, CHAN_X, &inst->Src[a]);
}
/* we'll call the function, put the return value in this register,
* then replicate it across all write-enabled components in d_reg.
@@ -1296,11 +1287,11 @@ emit_function_call(struct codegen *gen,
if (!scalar) {
for (a = 0; a < num_args; a++) {
- s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]);
+ s_regs[a] = get_src_reg(gen, ch, &inst->Src[a]);
}
}
- d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
if (!scalar || !func_called) {
/* for a scalar function, we'll really only call the function once */
@@ -1345,7 +1336,7 @@ emit_function_call(struct codegen *gen,
spe_move(gen->f, d_reg, retval_reg);
}
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
free_itemps(gen);
}
@@ -1360,8 +1351,8 @@ emit_function_call(struct codegen *gen,
static boolean
emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
- const uint target = inst->InstructionExtTexture.Texture;
- const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+ const uint target = inst->Texture.Texture;
+ const uint unit = inst->Src[1].Register.Index;
uint addr;
int ch;
int coord_regs[4], d_regs[4];
@@ -1382,14 +1373,14 @@ emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
return FALSE;
}
- assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER);
+ assert(inst->Src[1].Register.File == TGSI_FILE_SAMPLER);
spe_comment(gen->f, -4, "CALL tex:");
/* get src/dst reg info */
for (ch = 0; ch < 4; ch++) {
- coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ coord_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+ d_regs[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
}
{
@@ -1434,7 +1425,7 @@ emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
}
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, d_regs[ch], ch, &inst->Dst[0]);
free_itemps(gen);
}
@@ -1461,7 +1452,7 @@ emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst)
/* get src regs */
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]);
}
/* test if any src regs are < 0 */
@@ -1509,9 +1500,9 @@ emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4];
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ s0_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
tmp_reg[ch] = get_itemp(gen);
}
@@ -1527,7 +1518,7 @@ emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
}
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
}
free_itemps(gen);
@@ -1584,7 +1575,7 @@ emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
/* update conditional execution mask with the predicate register */
int tmp_reg = get_itemp(gen);
- int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]);
+ int s1_reg = get_src_reg(gen, channel, &inst->Src[0]);
/* tmp = (s1_reg == 0) */
spe_ceqi(gen->f, tmp_reg, s1_reg, 0);
@@ -1708,8 +1699,8 @@ emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst,
int ch;
FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ int s_reg = get_src_reg(gen, ch, &inst->Src[0]);
+ int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
int t1_reg = get_itemp(gen);
int t2_reg = get_itemp(gen);
@@ -1758,7 +1749,6 @@ emit_instruction(struct codegen *gen,
case TGSI_OPCODE_ARL:
return emit_ARL(gen, inst);
case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_SWZ:
return emit_MOV(gen, inst);
case TGSI_OPCODE_ADD:
case TGSI_OPCODE_SUB:
@@ -1766,7 +1756,7 @@ emit_instruction(struct codegen *gen,
return emit_binop(gen, inst);
case TGSI_OPCODE_MAD:
return emit_MAD(gen, inst);
- case TGSI_OPCODE_LERP:
+ case TGSI_OPCODE_LRP:
return emit_LRP(gen, inst);
case TGSI_OPCODE_DP3:
return emit_DP3(gen, inst);
@@ -1919,8 +1909,8 @@ emit_declaration(struct cell_context *cell,
switch (decl->Declaration.File) {
case TGSI_FILE_TEMPORARY:
- for (i = decl->DeclarationRange.First;
- i <= decl->DeclarationRange.Last;
+ for (i = decl->Range.First;
+ i <= decl->Range.Last;
i++) {
assert(i < MAX_TEMPS);
for (ch = 0; ch < 4; ch++) {
diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
index ccd0fef6e84..c18a5d0635e 100644
--- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c
+++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
@@ -383,10 +383,10 @@ cell_init_state_functions(struct cell_context *cell)
cell->pipe.delete_blend_state = cell_delete_blend_state;
cell->pipe.create_sampler_state = cell_create_sampler_state;
- cell->pipe.bind_sampler_states = cell_bind_sampler_states;
+ cell->pipe.bind_fragment_sampler_states = cell_bind_sampler_states;
cell->pipe.delete_sampler_state = cell_delete_sampler_state;
- cell->pipe.set_sampler_textures = cell_set_sampler_textures;
+ cell->pipe.set_fragment_sampler_textures = cell_set_sampler_textures;
cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state;
cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state;
diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c
index efc4f78364b..b723e794e71 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_derived.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c
@@ -66,7 +66,7 @@ calculate_vertex_layout( struct cell_context *cell )
vinfo->num_attribs = 0;
/* we always want to emit vertex pos */
- src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_POSITION, 0);
+ src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_POSITION, 0);
assert(src >= 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src);
@@ -82,14 +82,14 @@ calculate_vertex_layout( struct cell_context *cell )
break;
case TGSI_SEMANTIC_COLOR:
- src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_COLOR,
+ src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_COLOR,
fs->info.input_semantic_index[i]);
assert(src >= 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
break;
case TGSI_SEMANTIC_FOG:
- src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_FOG, 0);
+ src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_FOG, 0);
#if 1
if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */
src = 0;
@@ -100,7 +100,7 @@ calculate_vertex_layout( struct cell_context *cell )
case TGSI_SEMANTIC_GENERIC:
/* this includes texcoords and varying vars */
- src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_GENERIC,
+ src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_GENERIC,
fs->info.input_semantic_index[i]);
assert(src >= 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index 9479c0898fd..5b87286d4c5 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -27,6 +27,7 @@
#include "pipe/p_inlines.h"
#include "util/u_memory.h"
+#include "util/u_math.h"
#include "cell_context.h"
#include "cell_gen_fragment.h"
#include "cell_state.h"
@@ -299,9 +300,9 @@ cell_emit_state(struct cell_context *cell)
for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
texture->start[level] = (ct->mapped +
ct->level_offset[level]);
- texture->width[level] = ct->base.width[level];
- texture->height[level] = ct->base.height[level];
- texture->depth[level] = ct->base.depth[level];
+ texture->width[level] = u_minify(ct->base.width0, level);
+ texture->height[level] = u_minify(ct->base.height0, level);
+ texture->depth[level] = u_minify(ct->base.depth0, level);
}
texture->target = ct->base.target;
}
@@ -330,7 +331,7 @@ cell_emit_state(struct cell_context *cell)
const struct draw_context *const draw = cell->draw;
struct cell_shader_info info;
- info.num_outputs = draw_num_vs_outputs(draw);
+ info.num_outputs = draw_num_shader_outputs(draw);
info.declarations = (uintptr_t) draw->vs.machine.Declarations;
info.num_declarations = draw->vs.machine.NumDeclarations;
info.instructions = (uintptr_t) draw->vs.machine.Instructions;
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c
index ae4c61efb3b..998944f77a3 100644
--- a/src/gallium/drivers/cell/ppu/cell_texture.c
+++ b/src/gallium/drivers/cell/ppu/cell_texture.c
@@ -35,6 +35,8 @@
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
#include "pipe/internal/p_winsys_screen.h"
+
+#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -49,9 +51,9 @@ cell_texture_layout(struct cell_texture *ct)
{
struct pipe_texture *pt = &ct->base;
unsigned level;
- unsigned width = pt->width[0];
- unsigned height = pt->height[0];
- unsigned depth = pt->depth[0];
+ unsigned width = pt->width0;
+ unsigned height = pt->height0;
+ unsigned depth = pt->depth0;
ct->buffer_size = 0;
@@ -65,17 +67,11 @@ cell_texture_layout(struct cell_texture *ct)
w_tile = align(width, TILE_SIZE);
h_tile = align(height, TILE_SIZE);
- pt->width[level] = width;
- pt->height[level] = height;
- pt->depth[level] = depth;
- pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile);
- pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile);
-
- ct->stride[level] = pt->nblocksx[level] * pt->block.size;
+ ct->stride[level] = util_format_get_stride(pt->format, w_tile);
ct->level_offset[level] = ct->buffer_size;
- size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size;
+ size = ct->stride[level] * util_format_get_nblocksy(pt->format, h_tile);
if (pt->target == PIPE_TEXTURE_CUBE)
size *= 6;
else
@@ -83,9 +79,9 @@ cell_texture_layout(struct cell_texture *ct)
ct->buffer_size += size;
- width = minify(width);
- height = minify(height);
- depth = minify(depth);
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ depth = u_minify(depth, 1);
}
}
@@ -276,8 +272,8 @@ cell_get_tex_surface(struct pipe_screen *screen,
pipe_reference_init(&ps->reference, 1);
pipe_texture_reference(&ps->texture, pt);
ps->format = pt->format;
- ps->width = pt->width[level];
- ps->height = pt->height[level];
+ ps->width = u_minify(pt->width0, level);
+ ps->height = u_minify(pt->height0, level);
ps->offset = ct->level_offset[level];
/* XXX may need to override usage flags (see sp_texture.c) */
ps->usage = usage;
@@ -286,10 +282,12 @@ cell_get_tex_surface(struct pipe_screen *screen,
ps->zslice = zslice;
if (pt->target == PIPE_TEXTURE_CUBE) {
- ps->offset += face * pt->nblocksy[level] * ct->stride[level];
+ unsigned h_tile = align(ps->height, TILE_SIZE);
+ ps->offset += face * util_format_get_nblocksy(ps->format, h_tile) * ct->stride[level];
}
else if (pt->target == PIPE_TEXTURE_3D) {
- ps->offset += zslice * pt->nblocksy[level] * ct->stride[level];
+ unsigned h_tile = align(ps->height, TILE_SIZE);
+ ps->offset += zslice * util_format_get_nblocksy(ps->format, h_tile) * ct->stride[level];
}
else {
assert(face == 0);
@@ -330,14 +328,10 @@ cell_get_tex_transfer(struct pipe_screen *screen,
if (ctrans) {
struct pipe_transfer *pt = &ctrans->base;
pipe_texture_reference(&pt->texture, texture);
- pt->format = texture->format;
- pt->block = texture->block;
pt->x = x;
pt->y = y;
pt->width = w;
pt->height = h;
- pt->nblocksx = texture->nblocksx[level];
- pt->nblocksy = texture->nblocksy[level];
pt->stride = ct->stride[level];
pt->usage = usage;
pt->face = face;
@@ -347,10 +341,12 @@ cell_get_tex_transfer(struct pipe_screen *screen,
ctrans->offset = ct->level_offset[level];
if (texture->target == PIPE_TEXTURE_CUBE) {
- ctrans->offset += face * pt->nblocksy * pt->stride;
+ unsigned h_tile = align(u_minify(texture->height0, level), TILE_SIZE);
+ ctrans->offset += face * util_format_get_nblocksy(texture->format, h_tile) * pt->stride;
}
else if (texture->target == PIPE_TEXTURE_3D) {
- ctrans->offset += zslice * pt->nblocksy * pt->stride;
+ unsigned h_tile = align(u_minify(texture->height0, level), TILE_SIZE);
+ ctrans->offset += zslice * util_format_get_nblocksy(texture->format, h_tile) * pt->stride;
}
else {
assert(face == 0);
@@ -386,8 +382,8 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer)
struct pipe_texture *pt = transfer->texture;
struct cell_texture *ct = cell_texture(pt);
const uint level = ctrans->base.level;
- const uint texWidth = pt->width[level];
- const uint texHeight = pt->height[level];
+ const uint texWidth = u_minify(pt->width0, level);
+ const uint texHeight = u_minify(pt->height0, level);
const uint stride = ct->stride[level];
unsigned size;
@@ -403,7 +399,8 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer)
* Create a buffer of ordinary memory for the linear texture.
* This is the memory that the user will read/write.
*/
- size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size;
+ size = util_format_get_stride(pt->format, align(texWidth, TILE_SIZE)) *
+ util_format_get_nblocksy(pt->format, align(texHeight, TILE_SIZE));
ctrans->map = align_malloc(size, 16);
if (!ctrans->map)
@@ -411,7 +408,7 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer)
if (transfer->usage & PIPE_TRANSFER_READ) {
/* need to untwiddle the texture to make a linear version */
- const uint bpp = pf_get_size(ct->base.format);
+ const uint bpp = util_format_get_blocksize(ct->base.format);
if (bpp == 4) {
const uint *src = (uint *) (ct->mapped + ctrans->offset);
uint *dst = ctrans->map;
@@ -440,8 +437,8 @@ cell_transfer_unmap(struct pipe_screen *screen,
struct pipe_texture *pt = transfer->texture;
struct cell_texture *ct = cell_texture(pt);
const uint level = ctrans->base.level;
- const uint texWidth = pt->width[level];
- const uint texHeight = pt->height[level];
+ const uint texWidth = u_minify(pt->width0, level);
+ const uint texHeight = u_minify(pt->height0, level);
const uint stride = ct->stride[level];
if (!ct->mapped) {
@@ -454,7 +451,7 @@ cell_transfer_unmap(struct pipe_screen *screen,
/* The user wrote new texture data into the mapped buffer.
* We need to convert the new linear data into the twiddled/tiled format.
*/
- const uint bpp = pf_get_size(ct->base.format);
+ const uint bpp = util_format_get_blocksize(ct->base.format);
if (bpp == 4) {
const uint *src = ctrans->map;
uint *dst = (uint *) (ct->mapped + ctrans->offset);
diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c
index 5c0179d9546..12b855a3db2 100644
--- a/src/gallium/drivers/cell/spu/spu_command.c
+++ b/src/gallium/drivers/cell/spu/spu_command.c
@@ -405,8 +405,6 @@ cmd_state_sampler(const struct cell_command_sampler *sampler)
case PIPE_TEX_FILTER_LINEAR:
spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear;
break;
- case PIPE_TEX_FILTER_ANISO:
- /* fall-through, for now */
case PIPE_TEX_FILTER_NEAREST:
spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest;
break;
@@ -418,8 +416,6 @@ cmd_state_sampler(const struct cell_command_sampler *sampler)
case PIPE_TEX_FILTER_LINEAR:
spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear;
break;
- case PIPE_TEX_FILTER_ANISO:
- /* fall-through, for now */
case PIPE_TEX_FILTER_NEAREST:
spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest;
break;
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index 0eaae2e451b..d86d8e09a51 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -108,10 +108,10 @@
for (CHAN = 0; CHAN < 4; CHAN++)
#define IS_CHANNEL_ENABLED(INST, CHAN)\
- ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+ ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
#define IS_CHANNEL_ENABLED2(INST, CHAN)\
- ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
+ ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
FOR_EACH_CHANNEL( CHAN )\
@@ -346,10 +346,10 @@ fetch_src_file_channel(
union spu_exec_channel *chan )
{
switch( swizzle ) {
- case TGSI_EXTSWIZZLE_X:
- case TGSI_EXTSWIZZLE_Y:
- case TGSI_EXTSWIZZLE_Z:
- case TGSI_EXTSWIZZLE_W:
+ case TGSI_SWIZZLE_X:
+ case TGSI_SWIZZLE_Y:
+ case TGSI_SWIZZLE_Z:
+ case TGSI_SWIZZLE_W:
switch( file ) {
case TGSI_FILE_CONSTANT: {
unsigned i;
@@ -413,14 +413,6 @@ fetch_src_file_channel(
}
break;
- case TGSI_EXTSWIZZLE_ZERO:
- *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
- break;
-
- case TGSI_EXTSWIZZLE_ONE:
- *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
- break;
-
default:
ASSERT( 0 );
}
@@ -439,22 +431,22 @@ fetch_source(
index.i[0] =
index.i[1] =
index.i[2] =
- index.i[3] = reg->SrcRegister.Index;
+ index.i[3] = reg->Register.Index;
- if (reg->SrcRegister.Indirect) {
+ if (reg->Register.Indirect) {
union spu_exec_channel index2;
union spu_exec_channel indir_index;
index2.i[0] =
index2.i[1] =
index2.i[2] =
- index2.i[3] = reg->SrcRegisterInd.Index;
+ index2.i[3] = reg->Indirect.Index;
- swizzle = tgsi_util_get_src_register_swizzle(&reg->SrcRegisterInd,
+ swizzle = tgsi_util_get_src_register_swizzle(&reg->Indirect,
CHAN_X);
fetch_src_file_channel(
mach,
- reg->SrcRegisterInd.File,
+ reg->Indirect.File,
swizzle,
&index2,
&indir_index );
@@ -462,8 +454,8 @@ fetch_source(
index.q = si_a(index.q, indir_index.q);
}
- if( reg->SrcRegister.Dimension ) {
- switch( reg->SrcRegister.File ) {
+ if( reg->Register.Dimension ) {
+ switch( reg->Register.File ) {
case TGSI_FILE_INPUT:
index.q = si_mpyi(index.q, 17);
break;
@@ -474,24 +466,24 @@ fetch_source(
ASSERT( 0 );
}
- index.i[0] += reg->SrcRegisterDim.Index;
- index.i[1] += reg->SrcRegisterDim.Index;
- index.i[2] += reg->SrcRegisterDim.Index;
- index.i[3] += reg->SrcRegisterDim.Index;
+ index.i[0] += reg->Dimension.Index;
+ index.i[1] += reg->Dimension.Index;
+ index.i[2] += reg->Dimension.Index;
+ index.i[3] += reg->Dimension.Index;
- if (reg->SrcRegisterDim.Indirect) {
+ if (reg->Dimension.Indirect) {
union spu_exec_channel index2;
union spu_exec_channel indir_index;
index2.i[0] =
index2.i[1] =
index2.i[2] =
- index2.i[3] = reg->SrcRegisterDimInd.Index;
+ index2.i[3] = reg->DimIndirect.Index;
- swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
+ swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, CHAN_X );
fetch_src_file_channel(
mach,
- reg->SrcRegisterDimInd.File,
+ reg->DimIndirect.File,
swizzle,
&index2,
&indir_index );
@@ -500,10 +492,10 @@ fetch_source(
}
}
- swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+ swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
fetch_src_file_channel(
mach,
- reg->SrcRegister.File,
+ reg->Register.File,
swizzle,
&index,
chan );
@@ -525,7 +517,7 @@ fetch_source(
break;
}
- if (reg->SrcRegisterExtMod.Complement) {
+ if (reg->RegisterExtMod.Complement) {
chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q);
}
}
@@ -540,21 +532,21 @@ store_dest(
{
union spu_exec_channel *dst;
- switch( reg->DstRegister.File ) {
+ switch( reg->Register.File ) {
case TGSI_FILE_NULL:
return;
case TGSI_FILE_OUTPUT:
dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
- + reg->DstRegister.Index].xyzw[chan_index];
+ + reg->Register.Index].xyzw[chan_index];
break;
case TGSI_FILE_TEMPORARY:
- dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
+ dst = &mach->Temps[reg->Register.Index].xyzw[chan_index];
break;
case TGSI_FILE_ADDRESS:
- dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
+ dst = &mach->Addrs[reg->Register.Index].xyzw[chan_index];
break;
default:
@@ -591,10 +583,10 @@ store_dest(
}
#define FETCH(VAL,INDEX,CHAN)\
- fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
+ fetch_source (mach, VAL, &inst->Src[INDEX], CHAN)
#define STORE(VAL,INDEX,CHAN)\
- store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
+ store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN )
/**
@@ -610,10 +602,8 @@ exec_kil(struct spu_exec_machine *mach,
uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
union spu_exec_channel r[1];
- /* This mask stores component bits that were already tested. Note that
- * we test if the value is less than zero, so 1.0 and 0.0 need not to be
- * tested. */
- uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
+ /* This mask stores component bits that were already tested. */
+ uniquemask = 0;
for (chan_index = 0; chan_index < 4; chan_index++)
{
@@ -621,8 +611,8 @@ exec_kil(struct spu_exec_machine *mach,
uint i;
/* unswizzle channel */
- swizzle = tgsi_util_get_full_src_register_extswizzle (
- &inst->FullSrcRegisters[0],
+ swizzle = tgsi_util_get_full_src_register_swizzle (
+ &inst->Src[0],
chan_index);
/* check if the component has not been already tested */
@@ -687,7 +677,7 @@ exec_tex(struct spu_exec_machine *mach,
const struct tgsi_full_instruction *inst,
boolean biasLod, boolean projected)
{
- const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+ const uint unit = inst->Src[1].Register.Index;
union spu_exec_channel r[8];
uint chan_index;
float lodBias;
@@ -843,8 +833,8 @@ exec_declaration(struct spu_exec_machine *mach,
unsigned first, last, mask;
interpolation_func interp;
- first = decl->DeclarationRange.First;
- last = decl->DeclarationRange.Last;
+ first = decl->Range.First;
+ last = decl->Range.Last;
mask = decl->Declaration.UsageMask;
switch( decl->Declaration.Interpolate ) {
@@ -909,7 +899,6 @@ exec_instruction(
break;
case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_SWZ:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
STORE( &r[0], 0, chan_index );
@@ -1692,7 +1681,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_SHR:
+ case TGSI_OPCODE_ISHR:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
@@ -1807,22 +1796,6 @@ exec_instruction(
/* no-op */
break;
- case TGSI_OPCODE_NOISE1:
- ASSERT( 0 );
- break;
-
- case TGSI_OPCODE_NOISE2:
- ASSERT( 0 );
- break;
-
- case TGSI_OPCODE_NOISE3:
- ASSERT( 0 );
- break;
-
- case TGSI_OPCODE_NOISE4:
- ASSERT( 0 );
- break;
-
case TGSI_OPCODE_NOP:
break;
diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c
index af25dd3718a..24057e29e36 100644
--- a/src/gallium/drivers/cell/spu/spu_util.c
+++ b/src/gallium/drivers/cell/spu/spu_util.c
@@ -26,104 +26,17 @@ tgsi_util_get_src_register_swizzle(
return 0;
}
-unsigned
-tgsi_util_get_src_register_extswizzle(
- const struct tgsi_src_register_ext_swz *reg,
- unsigned component )
-{
- switch( component ) {
- case 0:
- return reg->ExtSwizzleX;
- case 1:
- return reg->ExtSwizzleY;
- case 2:
- return reg->ExtSwizzleZ;
- case 3:
- return reg->ExtSwizzleW;
- default:
- ASSERT( 0 );
- }
- return 0;
-}
unsigned
-tgsi_util_get_full_src_register_extswizzle(
+tgsi_util_get_full_src_register_swizzle(
const struct tgsi_full_src_register *reg,
unsigned component )
{
- unsigned swizzle;
-
- /*
- * First, calculate the extended swizzle for a given channel. This will give
- * us either a channel index into the simple swizzle or a constant 1 or 0.
- */
- swizzle = tgsi_util_get_src_register_extswizzle(
- &reg->SrcRegisterExtSwz,
+ return tgsi_util_get_src_register_swizzle(
+ reg->Register,
component );
-
- ASSERT (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X);
- ASSERT (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y);
- ASSERT (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z);
- ASSERT (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W);
- ASSERT (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W);
- ASSERT (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W);
-
- /*
- * Second, calculate the simple swizzle for the unswizzled channel index.
- * Leave the constants intact, they are not affected by the simple swizzle.
- */
- if( swizzle <= TGSI_SWIZZLE_W ) {
- swizzle = tgsi_util_get_src_register_swizzle(
- &reg->SrcRegister,
- component );
- }
-
- return swizzle;
}
-unsigned
-tgsi_util_get_src_register_extnegate(
- const struct tgsi_src_register_ext_swz *reg,
- unsigned component )
-{
- switch( component ) {
- case 0:
- return reg->NegateX;
- case 1:
- return reg->NegateY;
- case 2:
- return reg->NegateZ;
- case 3:
- return reg->NegateW;
- default:
- ASSERT( 0 );
- }
- return 0;
-}
-
-void
-tgsi_util_set_src_register_extnegate(
- struct tgsi_src_register_ext_swz *reg,
- unsigned negate,
- unsigned component )
-{
- switch( component ) {
- case 0:
- reg->NegateX = negate;
- break;
- case 1:
- reg->NegateY = negate;
- break;
- case 2:
- reg->NegateZ = negate;
- break;
- case 3:
- reg->NegateW = negate;
- break;
- default:
- ASSERT( 0 );
- }
-}
unsigned
tgsi_util_get_full_src_register_sign_mode(
@@ -132,10 +45,10 @@ tgsi_util_get_full_src_register_sign_mode(
{
unsigned sign_mode;
- if( reg->SrcRegisterExtMod.Absolute ) {
+ if( reg->RegisterExtMod.Absolute ) {
/* Consider only the post-abs negation. */
- if( reg->SrcRegisterExtMod.Negate ) {
+ if( reg->RegisterExtMod.Negate ) {
sign_mode = TGSI_UTIL_SIGN_SET;
}
else {
@@ -147,11 +60,8 @@ tgsi_util_get_full_src_register_sign_mode(
unsigned negate;
- negate = reg->SrcRegister.Negate;
- if( tgsi_util_get_src_register_extnegate( &reg->SrcRegisterExtSwz, component ) ) {
- negate = !negate;
- }
- if( reg->SrcRegisterExtMod.Negate ) {
+ negate = reg->Register.Negate;
+ if( reg->RegisterExtMod.Negate ) {
negate = !negate;
}
diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c
index 37184eac7b1..46e4338d98a 100644
--- a/src/gallium/drivers/failover/fo_context.c
+++ b/src/gallium/drivers/failover/fo_context.c
@@ -44,11 +44,19 @@ static void failover_destroy( struct pipe_context *pipe )
}
+void failover_fail_over( struct failover_context *failover )
+{
+ failover->dirty = TRUE;
+ failover->mode = FO_SW;
+}
+
-static boolean failover_draw_elements( struct pipe_context *pipe,
- struct pipe_buffer *indexBuffer,
- unsigned indexSize,
- unsigned prim, unsigned start, unsigned count)
+static void failover_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned prim,
+ unsigned start,
+ unsigned count)
{
struct failover_context *failover = failover_context( pipe );
@@ -62,24 +70,22 @@ static boolean failover_draw_elements( struct pipe_context *pipe,
/* Try hardware:
*/
if (failover->mode == FO_HW) {
- if (!failover->hw->draw_elements( failover->hw,
- indexBuffer,
- indexSize,
- prim,
- start,
- count )) {
-
- failover->hw->flush( failover->hw, ~0, NULL );
- failover->mode = FO_SW;
- }
+ failover->hw->draw_elements( failover->hw,
+ indexBuffer,
+ indexSize,
+ prim,
+ start,
+ count );
}
/* Possibly try software:
*/
if (failover->mode == FO_SW) {
- if (failover->dirty)
+ if (failover->dirty) {
+ failover->hw->flush( failover->hw, ~0, NULL );
failover_state_emit( failover );
+ }
failover->sw->draw_elements( failover->sw,
indexBuffer,
@@ -94,15 +100,13 @@ static boolean failover_draw_elements( struct pipe_context *pipe,
*/
failover->sw->flush( failover->sw, ~0, NULL );
}
-
- return TRUE;
}
-static boolean failover_draw_arrays( struct pipe_context *pipe,
+static void failover_draw_arrays( struct pipe_context *pipe,
unsigned prim, unsigned start, unsigned count)
{
- return failover_draw_elements(pipe, NULL, 0, prim, start, count);
+ failover_draw_elements(pipe, NULL, 0, prim, start, count);
}
static unsigned int
diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h
index 9ba86ba8664..149393712a3 100644
--- a/src/gallium/drivers/failover/fo_context.h
+++ b/src/gallium/drivers/failover/fo_context.h
@@ -72,6 +72,7 @@ struct failover_context {
*/
const struct fo_state *blend;
const struct fo_state *sampler[PIPE_MAX_SAMPLERS];
+ const struct fo_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS];
const struct fo_state *depth_stencil;
const struct fo_state *rasterizer;
const struct fo_state *fragment_shader;
@@ -83,6 +84,7 @@ struct failover_context {
struct pipe_poly_stipple poly_stipple;
struct pipe_scissor_state scissor;
struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+ struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
struct pipe_viewport_state viewport;
struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS];
@@ -92,11 +94,15 @@ struct failover_context {
void *sw_sampler_state[PIPE_MAX_SAMPLERS];
void *hw_sampler_state[PIPE_MAX_SAMPLERS];
+ void *sw_vertex_sampler_state[PIPE_MAX_VERTEX_SAMPLERS];
+ void *hw_vertex_sampler_state[PIPE_MAX_VERTEX_SAMPLERS];
unsigned dirty;
unsigned num_samplers;
+ unsigned num_vertex_samplers;
unsigned num_textures;
+ unsigned num_vertex_textures;
unsigned mode;
struct pipe_context *hw;
diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c
index c8eb9262994..3f5f5560323 100644
--- a/src/gallium/drivers/failover/fo_state.c
+++ b/src/gallium/drivers/failover/fo_state.c
@@ -322,8 +322,9 @@ failover_create_sampler_state(struct pipe_context *pipe,
}
static void
-failover_bind_sampler_states(struct pipe_context *pipe,
- unsigned num, void **sampler)
+failover_bind_fragment_sampler_states(struct pipe_context *pipe,
+ unsigned num,
+ void **sampler)
{
struct failover_context *failover = failover_context(pipe);
struct fo_state *state = (struct fo_state*)sampler;
@@ -339,10 +340,40 @@ failover_bind_sampler_states(struct pipe_context *pipe,
}
failover->dirty |= FO_NEW_SAMPLER;
failover->num_samplers = num;
- failover->sw->bind_sampler_states(failover->sw, num,
- failover->sw_sampler_state);
- failover->hw->bind_sampler_states(failover->hw, num,
- failover->hw_sampler_state);
+ failover->sw->bind_fragment_sampler_states(failover->sw, num,
+ failover->sw_sampler_state);
+ failover->hw->bind_fragment_sampler_states(failover->hw, num,
+ failover->hw_sampler_state);
+}
+
+static void
+failover_bind_vertex_sampler_states(struct pipe_context *pipe,
+ unsigned num_samplers,
+ void **samplers)
+{
+ struct failover_context *failover = failover_context(pipe);
+ struct fo_state *state = (struct fo_state*)samplers;
+ uint i;
+
+ assert(num_samplers <= PIPE_MAX_VERTEX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num_samplers == failover->num_vertex_samplers &&
+ !memcmp(failover->vertex_samplers, samplers, num_samplers * sizeof(void *))) {
+ return;
+ }
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ failover->sw_vertex_sampler_state[i] = i < num_samplers ? state[i].sw_state : NULL;
+ failover->hw_vertex_sampler_state[i] = i < num_samplers ? state[i].hw_state : NULL;
+ }
+ failover->dirty |= FO_NEW_SAMPLER;
+ failover->num_vertex_samplers = num_samplers;
+ failover->sw->bind_vertex_sampler_states(failover->sw,
+ num_samplers,
+ failover->sw_vertex_sampler_state);
+ failover->hw->bind_vertex_sampler_states(failover->hw,
+ num_samplers,
+ failover->hw_vertex_sampler_state);
}
static void
@@ -360,9 +391,9 @@ failover_delete_sampler_state(struct pipe_context *pipe, void *sampler)
static void
-failover_set_sampler_textures(struct pipe_context *pipe,
- unsigned num,
- struct pipe_texture **texture)
+failover_set_fragment_sampler_textures(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_texture **texture)
{
struct failover_context *failover = failover_context(pipe);
uint i;
@@ -381,8 +412,38 @@ failover_set_sampler_textures(struct pipe_context *pipe,
NULL);
failover->dirty |= FO_NEW_TEXTURE;
failover->num_textures = num;
- failover->sw->set_sampler_textures( failover->sw, num, texture );
- failover->hw->set_sampler_textures( failover->hw, num, texture );
+ failover->sw->set_fragment_sampler_textures( failover->sw, num, texture );
+ failover->hw->set_fragment_sampler_textures( failover->hw, num, texture );
+}
+
+
+static void
+failover_set_vertex_sampler_textures(struct pipe_context *pipe,
+ unsigned num_textures,
+ struct pipe_texture **textures)
+{
+ struct failover_context *failover = failover_context(pipe);
+ uint i;
+
+ assert(num_textures <= PIPE_MAX_VERTEX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num_textures == failover->num_vertex_textures &&
+ !memcmp(failover->vertex_textures, textures, num_textures * sizeof(struct pipe_texture *))) {
+ return;
+ }
+ for (i = 0; i < num_textures; i++) {
+ pipe_texture_reference((struct pipe_texture **)&failover->vertex_textures[i],
+ textures[i]);
+ }
+ for (i = num_textures; i < failover->num_vertex_textures; i++) {
+ pipe_texture_reference((struct pipe_texture **)&failover->vertex_textures[i],
+ NULL);
+ }
+ failover->dirty |= FO_NEW_TEXTURE;
+ failover->num_vertex_textures = num_textures;
+ failover->sw->set_vertex_sampler_textures(failover->sw, num_textures, textures);
+ failover->hw->set_vertex_sampler_textures(failover->hw, num_textures, textures);
}
@@ -453,7 +514,8 @@ failover_init_state_functions( struct failover_context *failover )
failover->pipe.bind_blend_state = failover_bind_blend_state;
failover->pipe.delete_blend_state = failover_delete_blend_state;
failover->pipe.create_sampler_state = failover_create_sampler_state;
- failover->pipe.bind_sampler_states = failover_bind_sampler_states;
+ failover->pipe.bind_fragment_sampler_states = failover_bind_fragment_sampler_states;
+ failover->pipe.bind_vertex_sampler_states = failover_bind_vertex_sampler_states;
failover->pipe.delete_sampler_state = failover_delete_sampler_state;
failover->pipe.create_depth_stencil_alpha_state = failover_create_depth_stencil_state;
failover->pipe.bind_depth_stencil_alpha_state = failover_bind_depth_stencil_state;
@@ -473,7 +535,8 @@ failover_init_state_functions( struct failover_context *failover )
failover->pipe.set_framebuffer_state = failover_set_framebuffer_state;
failover->pipe.set_polygon_stipple = failover_set_polygon_stipple;
failover->pipe.set_scissor_state = failover_set_scissor_state;
- failover->pipe.set_sampler_textures = failover_set_sampler_textures;
+ failover->pipe.set_fragment_sampler_textures = failover_set_fragment_sampler_textures;
+ failover->pipe.set_vertex_sampler_textures = failover_set_vertex_sampler_textures;
failover->pipe.set_viewport_state = failover_set_viewport_state;
failover->pipe.set_vertex_buffers = failover_set_vertex_buffers;
failover->pipe.set_vertex_elements = failover_set_vertex_elements;
diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c
index bd4fce9d209..a3341e33f80 100644
--- a/src/gallium/drivers/failover/fo_state_emit.c
+++ b/src/gallium/drivers/failover/fo_state_emit.c
@@ -92,13 +92,19 @@ failover_state_emit( struct failover_context *failover )
failover->sw->set_viewport_state( failover->sw, &failover->viewport );
if (failover->dirty & FO_NEW_SAMPLER) {
- failover->sw->bind_sampler_states( failover->sw, failover->num_samplers,
- failover->sw_sampler_state );
+ failover->sw->bind_fragment_sampler_states( failover->sw, failover->num_samplers,
+ failover->sw_sampler_state );
+ failover->sw->bind_vertex_sampler_states(failover->sw,
+ failover->num_vertex_samplers,
+ failover->sw_vertex_sampler_state);
}
if (failover->dirty & FO_NEW_TEXTURE) {
- failover->sw->set_sampler_textures( failover->sw, failover->num_textures,
- failover->texture );
+ failover->sw->set_fragment_sampler_textures( failover->sw, failover->num_textures,
+ failover->texture );
+ failover->sw->set_vertex_sampler_textures(failover->sw,
+ failover->num_vertex_textures,
+ failover->vertex_textures);
}
if (failover->dirty & FO_NEW_VERTEX_BUFFER) {
diff --git a/src/gallium/drivers/failover/fo_winsys.h b/src/gallium/drivers/failover/fo_winsys.h
index a8ce997a1f6..533122b69da 100644
--- a/src/gallium/drivers/failover/fo_winsys.h
+++ b/src/gallium/drivers/failover/fo_winsys.h
@@ -36,10 +36,13 @@
struct pipe_context;
+struct failover_context;
struct pipe_context *failover_create( struct pipe_context *hw,
struct pipe_context *sw );
+void failover_fail_over( struct failover_context *failover );
+
#endif /* FO_WINSYS_H */
diff --git a/src/gallium/drivers/i915/i915_buffer.c b/src/gallium/drivers/i915/i915_buffer.c
index effeba12972..669964770d4 100644
--- a/src/gallium/drivers/i915/i915_buffer.c
+++ b/src/gallium/drivers/i915/i915_buffer.c
@@ -111,6 +111,7 @@ i915_buffer_unmap(struct pipe_screen *screen,
{
struct i915_buffer *buf = i915_buffer(buffer);
assert(!buf->ibuf);
+ (void) buf;
}
static void
diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c
index e745f3342d1..89feeade756 100644
--- a/src/gallium/drivers/i915/i915_context.c
+++ b/src/gallium/drivers/i915/i915_context.c
@@ -45,7 +45,7 @@
*/
-static boolean
+static void
i915_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -84,7 +84,7 @@ i915_draw_range_elements(struct pipe_context *pipe,
}
- draw_set_mapped_constant_buffer(draw,
+ draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX,
i915->current.constants[PIPE_SHADER_VERTEX],
(i915->current.num_user_constants[PIPE_SHADER_VERTEX] *
4 * sizeof(float)));
@@ -106,27 +106,25 @@ i915_draw_range_elements(struct pipe_context *pipe,
pipe_buffer_unmap(pipe->screen, indexBuffer);
draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL);
}
-
- return TRUE;
}
-static boolean
+static void
i915_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count)
{
- return i915_draw_range_elements(pipe, indexBuffer,
- indexSize,
- 0, 0xffffffff,
- prim, start, count);
+ i915_draw_range_elements(pipe, indexBuffer,
+ indexSize,
+ 0, 0xffffffff,
+ prim, start, count);
}
-static boolean
+static void
i915_draw_arrays(struct pipe_context *pipe,
unsigned prim, unsigned start, unsigned count)
{
- return i915_draw_elements(pipe, NULL, 0, prim, start, count);
+ i915_draw_elements(pipe, NULL, 0, prim, start, count);
}
@@ -155,15 +153,11 @@ static unsigned int
i915_is_buffer_referenced(struct pipe_context *pipe,
struct pipe_buffer *buf)
{
- /**
- * FIXME: Return the corrent result. We can't alays return referenced
- * since it causes a double flush within the vbo module.
+ /*
+ * Since we never expose hardware buffers to the state tracker
+ * they can never be referenced, so this isn't a lie
*/
-#if 0
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-#else
return 0;
-#endif
}
diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c
index ce92d1af9a7..c6e6d6fd313 100644
--- a/src/gallium/drivers/i915/i915_debug.c
+++ b/src/gallium/drivers/i915/i915_debug.c
@@ -851,6 +851,7 @@ static boolean i915_debug_packet( struct debug_stream *stream )
default:
return debug(stream, "", 0);
}
+ break;
default:
assert(0);
return 0;
@@ -880,7 +881,7 @@ i915_dump_batchbuffer( struct intel_batchbuffer *batch )
return;
}
- debug_printf( "\n\nBATCH: (%d)\n", bytes / 4);
+ debug_printf( "\n\nBATCH: (%d)\n", (int)bytes / 4);
while (!done &&
stream.offset < bytes)
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index 89504ced276..25c53210be8 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -127,7 +127,7 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
va_start( args, msg );
util_vsnprintf( buffer, sizeof(buffer), msg, args );
va_end( args );
- debug_printf(buffer);
+ debug_printf("%s", buffer);
debug_printf("\n");
p->error = 1;
@@ -143,12 +143,12 @@ static uint
src_vector(struct i915_fp_compile *p,
const struct tgsi_full_src_register *source)
{
- uint index = source->SrcRegister.Index;
+ uint index = source->Register.Index;
uint src = 0, sem_name, sem_ind;
- switch (source->SrcRegister.File) {
+ switch (source->Register.File) {
case TGSI_FILE_TEMPORARY:
- if (source->SrcRegister.Index >= I915_MAX_TEMPORARY) {
+ if (source->Register.Index >= I915_MAX_TEMPORARY) {
i915_program_error(p, "Exceeded max temporary reg");
return 0;
}
@@ -214,45 +214,26 @@ src_vector(struct i915_fp_compile *p,
return 0;
}
- if (source->SrcRegister.Extended) {
- src = swizzle(src,
- source->SrcRegisterExtSwz.ExtSwizzleX,
- source->SrcRegisterExtSwz.ExtSwizzleY,
- source->SrcRegisterExtSwz.ExtSwizzleZ,
- source->SrcRegisterExtSwz.ExtSwizzleW);
- }
- else {
- src = swizzle(src,
- source->SrcRegister.SwizzleX,
- source->SrcRegister.SwizzleY,
- source->SrcRegister.SwizzleZ,
- source->SrcRegister.SwizzleW);
- }
+ src = swizzle(src,
+ source->Register.SwizzleX,
+ source->Register.SwizzleY,
+ source->Register.SwizzleZ,
+ source->Register.SwizzleW);
/* There's both negate-all-components and per-component negation.
* Try to handle both here.
*/
{
- int nx = source->SrcRegisterExtSwz.NegateX;
- int ny = source->SrcRegisterExtSwz.NegateY;
- int nz = source->SrcRegisterExtSwz.NegateZ;
- int nw = source->SrcRegisterExtSwz.NegateW;
- if (source->SrcRegister.Negate) {
- nx = !nx;
- ny = !ny;
- nz = !nz;
- nw = !nw;
- }
- src = negate(src, nx, ny, nz, nw);
+ int n = source->Register.Negate;
+ src = negate(src, n, n, n, n);
}
- /* no abs() or post-abs negation */
+ /* no abs() */
#if 0
/* XXX assertions disabled to allow arbfplight.c to run */
/* XXX enable these assertions, or fix things */
- assert(!source->SrcRegisterExtMod.Absolute);
- assert(!source->SrcRegisterExtMod.Negate);
+ assert(!source->Register.Absolute);
#endif
return src;
}
@@ -265,10 +246,10 @@ static uint
get_result_vector(struct i915_fp_compile *p,
const struct tgsi_full_dst_register *dest)
{
- switch (dest->DstRegister.File) {
+ switch (dest->Register.File) {
case TGSI_FILE_OUTPUT:
{
- uint sem_name = p->shader->info.output_semantic_name[dest->DstRegister.Index];
+ uint sem_name = p->shader->info.output_semantic_name[dest->Register.Index];
switch (sem_name) {
case TGSI_SEMANTIC_POSITION:
return UREG(REG_TYPE_OD, 0);
@@ -280,7 +261,7 @@ get_result_vector(struct i915_fp_compile *p,
}
}
case TGSI_FILE_TEMPORARY:
- return UREG(REG_TYPE_R, dest->DstRegister.Index);
+ return UREG(REG_TYPE_R, dest->Register.Index);
default:
i915_program_error(p, "Bad inst->DstReg.File");
return 0;
@@ -295,7 +276,7 @@ static uint
get_result_flags(const struct tgsi_full_instruction *inst)
{
const uint writeMask
- = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ = inst->Dst[0].Register.WriteMask;
uint flags = 0x0;
if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE)
@@ -357,14 +338,14 @@ emit_tex(struct i915_fp_compile *p,
const struct tgsi_full_instruction *inst,
uint opcode)
{
- uint texture = inst->InstructionExtTexture.Texture;
- uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+ uint texture = inst->Texture.Texture;
+ uint unit = inst->Src[1].Register.Index;
uint tex = translate_tex_src_target( p, texture );
uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
- uint coord = src_vector( p, &inst->FullSrcRegisters[0]);
+ uint coord = src_vector( p, &inst->Src[0]);
i915_emit_texld( p,
- get_result_vector( p, &inst->FullDstRegisters[0] ),
+ get_result_vector( p, &inst->Dst[0] ),
get_result_flags( inst ),
sampler,
coord,
@@ -386,13 +367,13 @@ emit_simple_arith(struct i915_fp_compile *p,
assert(numArgs <= 3);
- arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->FullSrcRegisters[0] );
- arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->FullSrcRegisters[1] );
- arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->FullSrcRegisters[2] );
+ arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0] );
+ arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1] );
+ arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2] );
i915_emit_arith( p,
opcode,
- get_result_vector( p, &inst->FullDstRegisters[0]),
+ get_result_vector( p, &inst->Dst[0]),
get_result_flags( inst ), 0,
arg1,
arg2,
@@ -412,8 +393,8 @@ emit_simple_arith_swap2(struct i915_fp_compile *p,
/* transpose first two registers */
inst2 = *inst;
- inst2.FullSrcRegisters[0] = inst->FullSrcRegisters[1];
- inst2.FullSrcRegisters[1] = inst->FullSrcRegisters[0];
+ inst2.Src[0] = inst->Src[1];
+ inst2.Src[1] = inst->Src[0];
emit_simple_arith(p, &inst2, opcode, numArgs);
}
@@ -442,10 +423,10 @@ i915_translate_instruction(struct i915_fp_compile *p,
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ABS:
- src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src0 = src_vector(p, &inst->Src[0]);
i915_emit_arith(p,
A0_MAX,
- get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_vector(p, &inst->Dst[0]),
get_result_flags(inst), 0,
src0, negate(src0, 1, 1, 1, 1), 0);
break;
@@ -455,17 +436,17 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_CMP:
- src0 = src_vector(p, &inst->FullSrcRegisters[0]);
- src1 = src_vector(p, &inst->FullSrcRegisters[1]);
- src2 = src_vector(p, &inst->FullSrcRegisters[2]);
+ src0 = src_vector(p, &inst->Src[0]);
+ src1 = src_vector(p, &inst->Src[1]);
+ src2 = src_vector(p, &inst->Src[2]);
i915_emit_arith(p, A0_CMP,
- get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_vector(p, &inst->Dst[0]),
get_result_flags(inst),
0, src0, src2, src1); /* NOTE: order of src2, src1 */
break;
case TGSI_OPCODE_COS:
- src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src0 = src_vector(p, &inst->Src[0]);
tmp = i915_get_utemp(p);
i915_emit_arith(p,
@@ -508,7 +489,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
i915_emit_arith(p,
A0_DP4,
- get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_vector(p, &inst->Dst[0]),
get_result_flags(inst), 0,
swizzle(tmp, ONE, Z, Y, X),
i915_emit_const4fv(p, cos_constants), 0);
@@ -523,19 +504,19 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_DPH:
- src0 = src_vector(p, &inst->FullSrcRegisters[0]);
- src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+ src0 = src_vector(p, &inst->Src[0]);
+ src1 = src_vector(p, &inst->Src[1]);
i915_emit_arith(p,
A0_DP4,
- get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_vector(p, &inst->Dst[0]),
get_result_flags(inst), 0,
swizzle(src0, X, Y, Z, ONE), src1, 0);
break;
case TGSI_OPCODE_DST:
- src0 = src_vector(p, &inst->FullSrcRegisters[0]);
- src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+ src0 = src_vector(p, &inst->Src[0]);
+ src1 = src_vector(p, &inst->Src[1]);
/* result[0] = 1 * 1;
* result[1] = a[1] * b[1];
@@ -544,7 +525,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
*/
i915_emit_arith(p,
A0_MUL,
- get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_vector(p, &inst->Dst[0]),
get_result_flags(inst), 0,
swizzle(src0, ONE, Y, Z, ONE),
swizzle(src1, ONE, Y, ONE, W), 0);
@@ -555,11 +536,11 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_EX2:
- src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src0 = src_vector(p, &inst->Src[0]);
i915_emit_arith(p,
A0_EXP,
- get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_vector(p, &inst->Dst[0]),
get_result_flags(inst), 0,
swizzle(src0, X, X, X, X), 0, 0);
break;
@@ -574,7 +555,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
case TGSI_OPCODE_KIL:
/* kill if src[0].x < 0 || src[0].y < 0 ... */
- src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src0 = src_vector(p, &inst->Src[0]);
tmp = i915_get_utemp(p);
i915_emit_texld(p,
@@ -590,17 +571,17 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_LG2:
- src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src0 = src_vector(p, &inst->Src[0]);
i915_emit_arith(p,
A0_LOG,
- get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_vector(p, &inst->Dst[0]),
get_result_flags(inst), 0,
swizzle(src0, X, X, X, X), 0, 0);
break;
case TGSI_OPCODE_LIT:
- src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src0 = src_vector(p, &inst->Src[0]);
tmp = i915_get_utemp(p);
/* tmp = max( a.xyzw, a.00zw )
@@ -624,7 +605,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
swizzle(tmp, Y, Y, Y, Y), 0, 0);
i915_emit_arith(p, A0_CMP,
- get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_vector(p, &inst->Dst[0]),
get_result_flags(inst), 0,
negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
swizzle(tmp, ONE, X, ZERO, ONE),
@@ -633,9 +614,9 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_LRP:
- src0 = src_vector(p, &inst->FullSrcRegisters[0]);
- src1 = src_vector(p, &inst->FullSrcRegisters[1]);
- src2 = src_vector(p, &inst->FullSrcRegisters[2]);
+ src0 = src_vector(p, &inst->Src[0]);
+ src1 = src_vector(p, &inst->Src[1]);
+ src2 = src_vector(p, &inst->Src[2]);
flags = get_result_flags(inst);
tmp = i915_get_utemp(p);
@@ -650,7 +631,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
i915_emit_arith(p, A0_MAD,
- get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_vector(p, &inst->Dst[0]),
flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
break;
@@ -663,8 +644,8 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_MIN:
- src0 = src_vector(p, &inst->FullSrcRegisters[0]);
- src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+ src0 = src_vector(p, &inst->Src[0]);
+ src1 = src_vector(p, &inst->Src[1]);
tmp = i915_get_utemp(p);
flags = get_result_flags(inst);
@@ -676,12 +657,11 @@ i915_translate_instruction(struct i915_fp_compile *p,
i915_emit_arith(p,
A0_MOV,
- get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_vector(p, &inst->Dst[0]),
flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
break;
case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_SWZ:
emit_simple_arith(p, inst, A0_MOV, 1);
break;
@@ -690,8 +670,8 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_POW:
- src0 = src_vector(p, &inst->FullSrcRegisters[0]);
- src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+ src0 = src_vector(p, &inst->Src[0]);
+ src1 = src_vector(p, &inst->Src[1]);
tmp = i915_get_utemp(p);
flags = get_result_flags(inst);
@@ -706,7 +686,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
i915_emit_arith(p,
A0_EXP,
- get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_vector(p, &inst->Dst[0]),
flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
break;
@@ -715,27 +695,27 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_RCP:
- src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src0 = src_vector(p, &inst->Src[0]);
i915_emit_arith(p,
A0_RCP,
- get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_vector(p, &inst->Dst[0]),
get_result_flags(inst), 0,
swizzle(src0, X, X, X, X), 0, 0);
break;
case TGSI_OPCODE_RSQ:
- src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src0 = src_vector(p, &inst->Src[0]);
i915_emit_arith(p,
A0_RSQ,
- get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_vector(p, &inst->Dst[0]),
get_result_flags(inst), 0,
swizzle(src0, X, X, X, X), 0, 0);
break;
case TGSI_OPCODE_SCS:
- src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src0 = src_vector(p, &inst->Src[0]);
tmp = i915_get_utemp(p);
/*
@@ -758,7 +738,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
swizzle(tmp, X, Y, X, Y),
swizzle(tmp, X, X, ONE, ONE), 0);
- writemask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ writemask = inst->Dst[0].Register.WriteMask;
if (writemask & TGSI_WRITEMASK_Y) {
uint tmp1;
@@ -776,7 +756,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
i915_emit_arith(p,
A0_DP4,
- get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_vector(p, &inst->Dst[0]),
A0_DEST_CHANNEL_Y, 0,
swizzle(tmp1, W, Z, Y, X),
i915_emit_const4fv(p, sin_constants), 0);
@@ -791,7 +771,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
i915_emit_arith(p,
A0_DP4,
- get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_vector(p, &inst->Dst[0]),
A0_DEST_CHANNEL_X, 0,
swizzle(tmp, ONE, Z, Y, X),
i915_emit_const4fv(p, cos_constants), 0);
@@ -808,7 +788,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_SIN:
- src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+ src0 = src_vector(p, &inst->Src[0]);
tmp = i915_get_utemp(p);
i915_emit_arith(p,
@@ -851,7 +831,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
i915_emit_arith(p,
A0_DP4,
- get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_vector(p, &inst->Dst[0]),
get_result_flags(inst), 0,
swizzle(tmp, W, Z, Y, X),
i915_emit_const4fv(p, sin_constants), 0);
@@ -867,12 +847,12 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_SUB:
- src0 = src_vector(p, &inst->FullSrcRegisters[0]);
- src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+ src0 = src_vector(p, &inst->Src[0]);
+ src1 = src_vector(p, &inst->Src[1]);
i915_emit_arith(p,
A0_ADD,
- get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_vector(p, &inst->Dst[0]),
get_result_flags(inst), 0,
src0, negate(src1, 1, 1, 1, 1), 0);
break;
@@ -896,8 +876,8 @@ i915_translate_instruction(struct i915_fp_compile *p,
* result.z = src0.x * src1.y - src0.y * src1.x;
* result.w = undef;
*/
- src0 = src_vector(p, &inst->FullSrcRegisters[0]);
- src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+ src0 = src_vector(p, &inst->Src[0]);
+ src1 = src_vector(p, &inst->Src[1]);
tmp = i915_get_utemp(p);
i915_emit_arith(p,
@@ -908,7 +888,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
i915_emit_arith(p,
A0_MAD,
- get_result_vector(p, &inst->FullDstRegisters[0]),
+ get_result_vector(p, &inst->Dst[0]),
get_result_flags(inst), 0,
swizzle(src0, Y, Z, X, ONE),
swizzle(src1, Z, X, Y, ONE),
@@ -948,8 +928,8 @@ i915_translate_instructions(struct i915_fp_compile *p,
if (parse.FullToken.FullDeclaration.Declaration.File
== TGSI_FILE_CONSTANT) {
uint i;
- for (i = parse.FullToken.FullDeclaration.DeclarationRange.First;
- i <= parse.FullToken.FullDeclaration.DeclarationRange.Last;
+ for (i = parse.FullToken.FullDeclaration.Range.First;
+ i <= parse.FullToken.FullDeclaration.Range.Last;
i++) {
assert(ifs->constant_flags[i] == 0x0);
ifs->constant_flags[i] = I915_CONSTFLAG_USER;
@@ -959,8 +939,8 @@ i915_translate_instructions(struct i915_fp_compile *p,
else if (parse.FullToken.FullDeclaration.Declaration.File
== TGSI_FILE_TEMPORARY) {
uint i;
- for (i = parse.FullToken.FullDeclaration.DeclarationRange.First;
- i <= parse.FullToken.FullDeclaration.DeclarationRange.Last;
+ for (i = parse.FullToken.FullDeclaration.Range.First;
+ i <= parse.FullToken.FullDeclaration.Range.Last;
i++) {
assert(i < I915_MAX_TEMPORARY);
/* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index c66558c320e..d4ee8f5339b 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -271,6 +271,7 @@ i915_create_screen(struct intel_winsys *iws, uint pci_id)
default:
debug_printf("%s: unknown pci id 0x%x, cannot create screen\n",
__FUNCTION__, pci_id);
+ FREE(is);
return NULL;
}
diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index 7d48e6e84d5..5f5b6f8e185 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -58,8 +58,10 @@ translate_wrap_mode(unsigned wrap)
return TEXCOORDMODE_CLAMP_EDGE;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
return TEXCOORDMODE_CLAMP_BORDER;
-// case PIPE_TEX_WRAP_MIRRORED_REPEAT:
-// return TEXCOORDMODE_MIRROR;
+ /*
+ case PIPE_TEX_WRAP_MIRRORED_REPEAT:
+ return TEXCOORDMODE_MIRROR;
+ */
default:
return TEXCOORDMODE_WRAP;
}
@@ -72,8 +74,6 @@ static unsigned translate_img_filter( unsigned filter )
return FILTER_NEAREST;
case PIPE_TEX_FILTER_LINEAR:
return FILTER_LINEAR;
- case PIPE_TEX_FILTER_ANISO:
- return FILTER_ANISOTROPIC;
default:
assert(0);
return FILTER_NEAREST;
@@ -219,6 +219,9 @@ i915_create_sampler_state(struct pipe_context *pipe,
minFilt = translate_img_filter( sampler->min_img_filter );
magFilt = translate_img_filter( sampler->mag_img_filter );
+ if (sampler->max_anisotropy > 1.0)
+ minFilt = magFilt = FILTER_ANISOTROPIC;
+
if (sampler->max_anisotropy > 2.0) {
cso->state[0] |= SS2_MAX_ANISO_4;
}
@@ -750,22 +753,15 @@ static void i915_set_vertex_elements(struct pipe_context *pipe,
}
-static void i915_set_edgeflags(struct pipe_context *pipe,
- const unsigned *bitfield)
-{
- /* TODO do something here */
-}
-
void
i915_init_state_functions( struct i915_context *i915 )
{
- i915->base.set_edgeflags = i915_set_edgeflags;
i915->base.create_blend_state = i915_create_blend_state;
i915->base.bind_blend_state = i915_bind_blend_state;
i915->base.delete_blend_state = i915_delete_blend_state;
i915->base.create_sampler_state = i915_create_sampler_state;
- i915->base.bind_sampler_states = i915_bind_sampler_states;
+ i915->base.bind_fragment_sampler_states = i915_bind_sampler_states;
i915->base.delete_sampler_state = i915_delete_sampler_state;
i915->base.create_depth_stencil_alpha_state = i915_create_depth_stencil_state;
@@ -789,7 +785,7 @@ i915_init_state_functions( struct i915_context *i915 )
i915->base.set_polygon_stipple = i915_set_polygon_stipple;
i915->base.set_scissor_state = i915_set_scissor_state;
- i915->base.set_sampler_textures = i915_set_sampler_textures;
+ i915->base.set_fragment_sampler_textures = i915_set_sampler_textures;
i915->base.set_viewport_state = i915_set_viewport_state;
i915->base.set_vertex_buffers = i915_set_vertex_buffers;
i915->base.set_vertex_elements = i915_set_vertex_elements;
diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c
index 178d4e8781d..03dd5091a61 100644
--- a/src/gallium/drivers/i915/i915_state_derived.c
+++ b/src/gallium/drivers/i915/i915_state_derived.c
@@ -84,7 +84,7 @@ static void calculate_vertex_layout( struct i915_context *i915 )
/* pos */
- src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0);
+ src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_POSITION, 0);
if (needW) {
draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src);
vinfo.hwfmt[0] |= S4_VFMT_XYZW;
@@ -101,21 +101,21 @@ static void calculate_vertex_layout( struct i915_context *i915 )
/* primary color */
if (colors[0]) {
- src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0);
+ src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 0);
draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src);
vinfo.hwfmt[0] |= S4_VFMT_COLOR;
}
/* secondary color */
if (colors[1]) {
- src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1);
+ src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 1);
draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src);
vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG;
}
/* fog coord, not fog blend factor */
if (fog) {
- src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0);
+ src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_FOG, 0);
draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM;
}
@@ -125,7 +125,7 @@ static void calculate_vertex_layout( struct i915_context *i915 )
uint hwtc;
if (texCoords[i]) {
hwtc = TEXCOORDFMT_4D;
- src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i);
+ src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, i);
draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
}
else {
diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c
index c5e9084d12e..cbac4175c8f 100644
--- a/src/gallium/drivers/i915/i915_state_sampler.c
+++ b/src/gallium/drivers/i915/i915_state_sampler.c
@@ -231,7 +231,7 @@ i915_update_texture(struct i915_context *i915,
{
const struct pipe_texture *pt = &tex->base;
uint format, pitch;
- const uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0];
+ const uint width = pt->width0, height = pt->height0, depth = pt->depth0;
const uint num_levels = pt->last_level;
unsigned max_lod = num_levels * 4;
unsigned tiled = MS3_USE_FENCE_REGS;
diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c
index ab8331f3e64..c693eb30e87 100644
--- a/src/gallium/drivers/i915/i915_surface.c
+++ b/src/gallium/drivers/i915/i915_surface.c
@@ -32,6 +32,7 @@
#include "pipe/p_inlines.h"
#include "pipe/p_inlines.h"
#include "pipe/internal/p_winsys_screen.h"
+#include "util/u_format.h"
#include "util/u_tile.h"
#include "util/u_rect.h"
@@ -48,17 +49,19 @@ i915_surface_copy(struct pipe_context *pipe,
{
struct i915_texture *dst_tex = (struct i915_texture *)dst->texture;
struct i915_texture *src_tex = (struct i915_texture *)src->texture;
+ struct pipe_texture *dpt = &dst_tex->base;
+ struct pipe_texture *spt = &src_tex->base;
assert( dst != src );
- assert( dst_tex->base.block.size == src_tex->base.block.size );
- assert( dst_tex->base.block.width == src_tex->base.block.height );
- assert( dst_tex->base.block.height == src_tex->base.block.height );
- assert( dst_tex->base.block.width == 1 );
- assert( dst_tex->base.block.height == 1 );
+ assert( util_format_get_blocksize(dpt->format) == util_format_get_blocksize(spt->format) );
+ assert( util_format_get_blockwidth(dpt->format) == util_format_get_blockwidth(spt->format) );
+ assert( util_format_get_blockheight(dpt->format) == util_format_get_blockheight(spt->format) );
+ assert( util_format_get_blockwidth(dpt->format) == 1 );
+ assert( util_format_get_blockheight(dpt->format) == 1 );
i915_copy_blit( i915_context(pipe),
FALSE,
- dst_tex->base.block.size,
+ util_format_get_blocksize(dpt->format),
(unsigned short) src_tex->stride, src_tex->buffer, src->offset,
(unsigned short) dst_tex->stride, dst_tex->buffer, dst->offset,
(short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height );
@@ -72,12 +75,13 @@ i915_surface_fill(struct pipe_context *pipe,
unsigned width, unsigned height, unsigned value)
{
struct i915_texture *tex = (struct i915_texture *)dst->texture;
+ struct pipe_texture *pt = &tex->base;
- assert(tex->base.block.width == 1);
- assert(tex->base.block.height == 1);
+ assert(util_format_get_blockwidth(pt->format) == 1);
+ assert(util_format_get_blockheight(pt->format) == 1);
i915_fill_blit( i915_context(pipe),
- tex->base.block.size,
+ util_format_get_blocksize(pt->format),
(unsigned short) tex->stride,
tex->buffer, dst->offset,
(short) dstx, (short) dsty,
diff --git a/src/gallium/drivers/i915/i915_texture.c b/src/gallium/drivers/i915/i915_texture.c
index 286c9ace8e5..50a9e19094b 100644
--- a/src/gallium/drivers/i915/i915_texture.c
+++ b/src/gallium/drivers/i915/i915_texture.c
@@ -35,6 +35,7 @@
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
#include "pipe/internal/p_winsys_screen.h"
+#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -74,6 +75,9 @@ static const int step_offsets[6][2] = {
{-1, 1}
};
+/* XXX really need twice the size if x is already pot?
+ Otherwise just use util_next_power_of_two?
+*/
static unsigned
power_of_two(unsigned x)
{
@@ -83,13 +87,6 @@ power_of_two(unsigned x)
return value;
}
-static unsigned
-round_up(unsigned n, unsigned multiple)
-{
- return (n + multiple - 1) & ~(multiple - 1);
-}
-
-
/*
* More advanced helper funcs
*/
@@ -101,17 +98,8 @@ i915_miptree_set_level_info(struct i915_texture *tex,
unsigned nr_images,
unsigned w, unsigned h, unsigned d)
{
- struct pipe_texture *pt = &tex->base;
-
assert(level < PIPE_MAX_TEXTURE_LEVELS);
- pt->width[level] = w;
- pt->height[level] = h;
- pt->depth[level] = d;
-
- pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w);
- pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h);
-
tex->nr_images[level] = nr_images;
/*
@@ -142,7 +130,7 @@ i915_miptree_set_image_offset(struct i915_texture *tex,
assert(img < tex->nr_images[level]);
- tex->image_offset[level][img] = y * tex->stride + x * tex->base.block.size;
+ tex->image_offset[level][img] = y * tex->stride + x * util_format_get_blocksize(tex->base.format);
/*
printf("%s level %d img %d pos %d,%d image_offset %x\n",
@@ -164,28 +152,28 @@ i915_scanout_layout(struct i915_texture *tex)
{
struct pipe_texture *pt = &tex->base;
- if (pt->last_level > 0 || pt->block.size != 4)
+ if (pt->last_level > 0 || util_format_get_blocksize(pt->format) != 4)
return FALSE;
i915_miptree_set_level_info(tex, 0, 1,
- tex->base.width[0],
- tex->base.height[0],
+ pt->width0,
+ pt->height0,
1);
i915_miptree_set_image_offset(tex, 0, 0, 0, 0);
- if (tex->base.width[0] >= 240) {
- tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size);
- tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8);
+ if (pt->width0 >= 240) {
+ tex->stride = power_of_two(util_format_get_stride(pt->format, pt->width0));
+ tex->total_nblocksy = align(util_format_get_nblocksy(pt->format, pt->height0), 8);
tex->hw_tiled = INTEL_TILE_X;
- } else if (tex->base.width[0] == 64 && tex->base.height[0] == 64) {
- tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size);
- tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8);
+ } else if (pt->width0 == 64 && pt->height0 == 64) {
+ tex->stride = power_of_two(util_format_get_stride(pt->format, pt->width0));
+ tex->total_nblocksy = align(util_format_get_nblocksy(pt->format, pt->height0), 8);
} else {
return FALSE;
}
debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
- tex->base.width[0], tex->base.height[0], pt->block.size,
+ pt->width0, pt->height0, util_format_get_blocksize(pt->format),
tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy);
return TRUE;
@@ -199,25 +187,25 @@ i915_display_target_layout(struct i915_texture *tex)
{
struct pipe_texture *pt = &tex->base;
- if (pt->last_level > 0 || pt->block.size != 4)
+ if (pt->last_level > 0 || util_format_get_blocksize(pt->format) != 4)
return FALSE;
/* fallback to normal textures for small textures */
- if (tex->base.width[0] < 240)
+ if (pt->width0 < 240)
return FALSE;
i915_miptree_set_level_info(tex, 0, 1,
- tex->base.width[0],
- tex->base.height[0],
+ pt->width0,
+ pt->height0,
1);
i915_miptree_set_image_offset(tex, 0, 0, 0, 0);
- tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size);
- tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8);
+ tex->stride = power_of_two(util_format_get_stride(pt->format, pt->width0));
+ tex->total_nblocksy = align(util_format_get_nblocksy(pt->format, pt->height0), 8);
tex->hw_tiled = INTEL_TILE_X;
debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
- tex->base.width[0], tex->base.height[0], pt->block.size,
+ pt->width0, pt->height0, util_format_get_blocksize(pt->format),
tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy);
return TRUE;
@@ -228,36 +216,34 @@ i915_miptree_layout_2d(struct i915_texture *tex)
{
struct pipe_texture *pt = &tex->base;
unsigned level;
- unsigned width = pt->width[0];
- unsigned height = pt->height[0];
- unsigned nblocksx = pt->nblocksx[0];
- unsigned nblocksy = pt->nblocksy[0];
+ unsigned width = pt->width0;
+ unsigned height = pt->height0;
+ unsigned nblocksy = util_format_get_nblocksy(pt->format, pt->width0);
/* used for scanouts that need special layouts */
- if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_PRIMARY)
+ if (pt->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY)
if (i915_scanout_layout(tex))
return;
/* for shared buffers we use some very like scanout */
- if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET)
+ if (pt->tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET)
if (i915_display_target_layout(tex))
return;
- tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4);
+ tex->stride = align(util_format_get_stride(pt->format, pt->width0), 4);
tex->total_nblocksy = 0;
for (level = 0; level <= pt->last_level; level++) {
i915_miptree_set_level_info(tex, level, 1, width, height, 1);
i915_miptree_set_image_offset(tex, level, 0, 0, tex->total_nblocksy);
- nblocksy = round_up(MAX2(2, nblocksy), 2);
+ nblocksy = align(MAX2(2, nblocksy), 2);
tex->total_nblocksy += nblocksy;
- width = minify(width);
- height = minify(height);
- nblocksx = pf_get_nblocksx(&pt->block, width);
- nblocksy = pf_get_nblocksy(&pt->block, height);
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ nblocksy = util_format_get_nblocksy(pt->format, height);
}
}
@@ -267,16 +253,15 @@ i915_miptree_layout_3d(struct i915_texture *tex)
struct pipe_texture *pt = &tex->base;
unsigned level;
- unsigned width = pt->width[0];
- unsigned height = pt->height[0];
- unsigned depth = pt->depth[0];
- unsigned nblocksx = pt->nblocksx[0];
- unsigned nblocksy = pt->nblocksy[0];
+ unsigned width = pt->width0;
+ unsigned height = pt->height0;
+ unsigned depth = pt->depth0;
+ unsigned nblocksy = util_format_get_nblocksy(pt->format, pt->height0);
unsigned stack_nblocksy = 0;
/* Calculate the size of a single slice.
*/
- tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4);
+ tex->stride = align(util_format_get_stride(pt->format, pt->width0), 4);
/* XXX: hardware expects/requires 9 levels at minimum.
*/
@@ -285,44 +270,41 @@ i915_miptree_layout_3d(struct i915_texture *tex)
stack_nblocksy += MAX2(2, nblocksy);
- width = minify(width);
- height = minify(height);
- depth = minify(depth);
- nblocksx = pf_get_nblocksx(&pt->block, width);
- nblocksy = pf_get_nblocksy(&pt->block, height);
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ nblocksy = util_format_get_nblocksy(pt->format, height);
}
/* Fixup depth image_offsets:
*/
- depth = pt->depth[0];
for (level = 0; level <= pt->last_level; level++) {
unsigned i;
for (i = 0; i < depth; i++)
i915_miptree_set_image_offset(tex, level, i, 0, i * stack_nblocksy);
- depth = minify(depth);
+ depth = u_minify(depth, 1);
}
/* Multiply slice size by texture depth for total size. It's
* remarkable how wasteful of memory the i915 texture layouts
* are. They are largely fixed in the i945.
*/
- tex->total_nblocksy = stack_nblocksy * pt->depth[0];
+ tex->total_nblocksy = stack_nblocksy * pt->depth0;
}
static void
i915_miptree_layout_cube(struct i915_texture *tex)
{
struct pipe_texture *pt = &tex->base;
- unsigned width = pt->width[0], height = pt->height[0];
- const unsigned nblocks = pt->nblocksx[0];
+ unsigned width = pt->width0, height = pt->height0;
+ const unsigned nblocks = util_format_get_nblocksx(pt->format, pt->width0);
unsigned level;
unsigned face;
assert(width == height); /* cubemap images are square */
/* double pitch for cube layouts */
- tex->stride = round_up(nblocks * pt->block.size * 2, 4);
+ tex->stride = align(nblocks * util_format_get_blocksize(pt->format) * 2, 4);
tex->total_nblocksy = nblocks * 4;
for (level = 0; level <= pt->last_level; level++) {
@@ -383,10 +365,10 @@ i945_miptree_layout_2d(struct i915_texture *tex)
unsigned level;
unsigned x = 0;
unsigned y = 0;
- unsigned width = pt->width[0];
- unsigned height = pt->height[0];
- unsigned nblocksx = pt->nblocksx[0];
- unsigned nblocksy = pt->nblocksy[0];
+ unsigned width = pt->width0;
+ unsigned height = pt->height0;
+ unsigned nblocksx = util_format_get_nblocksx(pt->format, pt->width0);
+ unsigned nblocksy = util_format_get_nblocksy(pt->format, pt->height0);
/* used for scanouts that need special layouts */
if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_PRIMARY)
@@ -398,7 +380,7 @@ i945_miptree_layout_2d(struct i915_texture *tex)
if (i915_display_target_layout(tex))
return;
- tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4);
+ tex->stride = align(util_format_get_stride(pt->format, pt->width0), 4);
/* May need to adjust pitch to accomodate the placement of
* the 2nd mipmap level. This occurs when the alignment
@@ -407,11 +389,11 @@ i945_miptree_layout_2d(struct i915_texture *tex)
*/
if (pt->last_level > 0) {
unsigned mip1_nblocksx
- = align(pf_get_nblocksx(&pt->block, minify(width)), align_x)
- + pf_get_nblocksx(&pt->block, minify(minify(width)));
+ = align(util_format_get_nblocksx(pt->format, u_minify(width, 1)), align_x)
+ + util_format_get_nblocksx(pt->format, u_minify(width, 2));
if (mip1_nblocksx > nblocksx)
- tex->stride = mip1_nblocksx * pt->block.size;
+ tex->stride = mip1_nblocksx * util_format_get_blocksize(pt->format);
}
/* Pitch must be a whole number of dwords
@@ -439,10 +421,10 @@ i945_miptree_layout_2d(struct i915_texture *tex)
y += nblocksy;
}
- width = minify(width);
- height = minify(height);
- nblocksx = pf_get_nblocksx(&pt->block, width);
- nblocksy = pf_get_nblocksy(&pt->block, height);
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ nblocksx = util_format_get_nblocksx(pt->format, width);
+ nblocksy = util_format_get_nblocksy(pt->format, height);
}
}
@@ -450,20 +432,19 @@ static void
i945_miptree_layout_3d(struct i915_texture *tex)
{
struct pipe_texture *pt = &tex->base;
- unsigned width = pt->width[0];
- unsigned height = pt->height[0];
- unsigned depth = pt->depth[0];
- unsigned nblocksx = pt->nblocksx[0];
- unsigned nblocksy = pt->nblocksy[0];
+ unsigned width = pt->width0;
+ unsigned height = pt->height0;
+ unsigned depth = pt->depth0;
+ unsigned nblocksy = util_format_get_nblocksy(pt->format, pt->width0);
unsigned pack_x_pitch, pack_x_nr;
unsigned pack_y_pitch;
unsigned level;
- tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4);
+ tex->stride = align(util_format_get_stride(pt->format, pt->width0), 4);
tex->total_nblocksy = 0;
- pack_y_pitch = MAX2(pt->nblocksy[0], 2);
- pack_x_pitch = tex->stride / pt->block.size;
+ pack_y_pitch = MAX2(nblocksy, 2);
+ pack_x_pitch = tex->stride / util_format_get_blocksize(pt->format);
pack_x_nr = 1;
for (level = 0; level <= pt->last_level; level++) {
@@ -488,18 +469,17 @@ i945_miptree_layout_3d(struct i915_texture *tex)
if (pack_x_pitch > 4) {
pack_x_pitch >>= 1;
pack_x_nr <<= 1;
- assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride);
+ assert(pack_x_pitch * pack_x_nr * util_format_get_blocksize(pt->format) <= tex->stride);
}
if (pack_y_pitch > 2) {
pack_y_pitch >>= 1;
}
- width = minify(width);
- height = minify(height);
- depth = minify(depth);
- nblocksx = pf_get_nblocksx(&pt->block, width);
- nblocksy = pf_get_nblocksy(&pt->block, height);
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ depth = u_minify(depth, 1);
+ nblocksy = util_format_get_nblocksy(pt->format, height);
}
}
@@ -509,13 +489,13 @@ i945_miptree_layout_cube(struct i915_texture *tex)
struct pipe_texture *pt = &tex->base;
unsigned level;
- const unsigned nblocks = pt->nblocksx[0];
+ const unsigned nblocks = util_format_get_nblocksx(pt->format, pt->width0);
unsigned face;
- unsigned width = pt->width[0];
- unsigned height = pt->height[0];
+ unsigned width = pt->width0;
+ unsigned height = pt->height0;
/*
- printf("%s %i, %i\n", __FUNCTION__, pt->width[0], pt->height[0]);
+ printf("%s %i, %i\n", __FUNCTION__, pt->width0, pt->height0);
*/
assert(width == height); /* cubemap images are square */
@@ -529,9 +509,9 @@ i945_miptree_layout_cube(struct i915_texture *tex)
* or the final row of 4x4, 2x2 and 1x1 faces below this.
*/
if (nblocks > 32)
- tex->stride = round_up(nblocks * pt->block.size * 2, 4);
+ tex->stride = align(nblocks * util_format_get_blocksize(pt->format) * 2, 4);
else
- tex->stride = 14 * 8 * pt->block.size;
+ tex->stride = 14 * 8 * util_format_get_blocksize(pt->format);
tex->total_nblocksy = nblocks * 4;
@@ -651,9 +631,6 @@ i915_texture_create(struct pipe_screen *screen,
pipe_reference_init(&tex->base.reference, 1);
tex->base.screen = screen;
- tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]);
- tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]);
-
if (is->is_i945) {
if (!i945_miptree_layout(tex))
goto fail;
@@ -667,7 +644,7 @@ i915_texture_create(struct pipe_screen *screen,
/* for scanouts and cursors, cursors arn't scanouts */
- if (templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY && templat->width[0] != 64)
+ if (templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY && templat->width0 != 64)
buf_usage = INTEL_NEW_SCANOUT;
else
buf_usage = INTEL_NEW_TEXTURE;
@@ -710,7 +687,7 @@ i915_texture_blanket(struct pipe_screen * screen,
/* Only supports one type */
if (base->target != PIPE_TEXTURE_2D ||
base->last_level != 0 ||
- base->depth[0] != 1) {
+ base->depth0 != 1) {
return NULL;
}
@@ -724,7 +701,7 @@ i915_texture_blanket(struct pipe_screen * screen,
tex->stride = stride[0];
- i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1);
+ i915_miptree_set_level_info(tex, 0, 1, base->width0, base->height0, 1);
i915_miptree_set_image_offset(tex, 0, 0, 0, 0);
pipe_buffer_reference(&tex->buffer, buffer);
@@ -788,8 +765,8 @@ i915_get_tex_surface(struct pipe_screen *screen,
pipe_reference_init(&ps->reference, 1);
pipe_texture_reference(&ps->texture, pt);
ps->format = pt->format;
- ps->width = pt->width[level];
- ps->height = pt->height[level];
+ ps->width = u_minify(pt->width0, level);
+ ps->height = u_minify(pt->height0, level);
ps->offset = offset;
ps->usage = flags;
}
@@ -835,14 +812,10 @@ i915_get_tex_transfer(struct pipe_screen *screen,
trans = CALLOC_STRUCT(i915_transfer);
if (trans) {
pipe_texture_reference(&trans->base.texture, texture);
- trans->base.format = trans->base.format;
trans->base.x = x;
trans->base.y = y;
trans->base.width = w;
trans->base.height = h;
- trans->base.block = texture->block;
- trans->base.nblocksx = texture->nblocksx[level];
- trans->base.nblocksy = texture->nblocksy[level];
trans->base.stride = tex->stride;
trans->offset = offset;
trans->base.usage = usage;
@@ -858,6 +831,7 @@ i915_transfer_map(struct pipe_screen *screen,
struct intel_winsys *iws = i915_screen(tex->base.screen)->iws;
char *map;
boolean write = FALSE;
+ enum pipe_format format = tex->base.format;
if (transfer->usage & PIPE_TRANSFER_WRITE)
write = TRUE;
@@ -867,8 +841,8 @@ i915_transfer_map(struct pipe_screen *screen,
return NULL;
return map + i915_transfer(transfer)->offset +
- transfer->y / transfer->block.height * transfer->stride +
- transfer->x / transfer->block.width * transfer->block.size;
+ transfer->y / util_format_get_blockheight(format) * transfer->stride +
+ transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
}
static void
@@ -919,7 +893,7 @@ i915_texture_blanket_intel(struct pipe_screen *screen,
/* Only supports one type */
if (base->target != PIPE_TEXTURE_2D ||
base->last_level != 0 ||
- base->depth[0] != 1) {
+ base->depth0 != 1) {
return NULL;
}
@@ -933,7 +907,7 @@ i915_texture_blanket_intel(struct pipe_screen *screen,
tex->stride = stride;
- i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1);
+ i915_miptree_set_level_info(tex, 0, 1, base->width0, base->height0, 1);
i915_miptree_set_image_offset(tex, 0, 0, 0, 0);
tex->buffer = buffer;
diff --git a/src/gallium/drivers/i915/intel_winsys.h b/src/gallium/drivers/i915/intel_winsys.h
index 2c8dc63f3f9..c6bf6e6f7f1 100644
--- a/src/gallium/drivers/i915/intel_winsys.h
+++ b/src/gallium/drivers/i915/intel_winsys.h
@@ -42,21 +42,21 @@ enum intel_buffer_usage
INTEL_USAGE_2D_TARGET = 0x04,
INTEL_USAGE_2D_SOURCE = 0x08,
/* use on vertex */
- INTEL_USAGE_VERTEX = 0x10,
+ INTEL_USAGE_VERTEX = 0x10
};
enum intel_buffer_type
{
INTEL_NEW_TEXTURE,
INTEL_NEW_SCANOUT, /**< a texture used for scanning out from */
- INTEL_NEW_VERTEX,
+ INTEL_NEW_VERTEX
};
enum intel_buffer_tile
{
INTEL_TILE_NONE,
INTEL_TILE_X,
- INTEL_TILE_Y,
+ INTEL_TILE_Y
};
struct intel_batchbuffer {
diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
new file mode 100644
index 00000000000..95fd3cd69bd
--- /dev/null
+++ b/src/gallium/drivers/i965/Makefile
@@ -0,0 +1,74 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = i965
+
+C_SOURCES = \
+ brw_cc.c \
+ brw_clip.c \
+ brw_clip_line.c \
+ brw_clip_point.c \
+ brw_clip_state.c \
+ brw_clip_tri.c \
+ brw_clip_unfilled.c \
+ brw_clip_util.c \
+ brw_context.c \
+ brw_curbe.c \
+ brw_disasm.c \
+ brw_draw.c \
+ brw_draw_upload.c \
+ brw_eu.c \
+ brw_eu_debug.c \
+ brw_eu_emit.c \
+ brw_eu_util.c \
+ brw_gs.c \
+ brw_gs_emit.c \
+ brw_gs_state.c \
+ brw_misc_state.c \
+ brw_pipe_blend.c \
+ brw_pipe_depth.c \
+ brw_pipe_fb.c \
+ brw_pipe_query.c \
+ brw_pipe_shader.c \
+ brw_pipe_flush.c \
+ brw_pipe_misc.c \
+ brw_pipe_sampler.c \
+ brw_pipe_vertex.c \
+ brw_pipe_clear.c \
+ brw_pipe_rast.c \
+ brw_sf.c \
+ brw_sf_emit.c \
+ brw_sf_state.c \
+ brw_state_batch.c \
+ brw_state_debug.c \
+ brw_state_cache.c \
+ brw_state_upload.c \
+ brw_structs_dump.c \
+ brw_swtnl.c \
+ brw_urb.c \
+ brw_util.c \
+ brw_vs.c \
+ brw_vs_emit.c \
+ brw_vs_state.c \
+ brw_vs_surface_state.c \
+ brw_wm.c \
+ brw_wm_debug.c \
+ brw_wm_emit.c \
+ brw_wm_fp.c \
+ brw_wm_iz.c \
+ brw_wm_pass0.c \
+ brw_wm_pass1.c \
+ brw_wm_pass2.c \
+ brw_wm_sampler_state.c \
+ brw_wm_state.c \
+ brw_wm_surface_state.c \
+ brw_screen.c \
+ brw_screen_buffers.c \
+ brw_screen_tex_layout.c \
+ brw_screen_texture.c \
+ brw_screen_surface.c \
+ brw_batchbuffer.c \
+ brw_winsys_debug.c \
+ intel_decode.c
+
+include ../../Makefile.template
diff --git a/src/gallium/drivers/i965/SConscript b/src/gallium/drivers/i965/SConscript
new file mode 100644
index 00000000000..9c2faaf4b49
--- /dev/null
+++ b/src/gallium/drivers/i965/SConscript
@@ -0,0 +1,77 @@
+Import('*')
+
+env = env.Clone()
+
+i965 = env.ConvenienceLibrary(
+ target = 'i965',
+ source = [
+ 'brw_batchbuffer.c',
+ 'brw_cc.c',
+ 'brw_clip.c',
+ 'brw_clip_line.c',
+ 'brw_clip_point.c',
+ 'brw_clip_state.c',
+ 'brw_clip_tri.c',
+ 'brw_clip_unfilled.c',
+ 'brw_clip_util.c',
+ 'brw_context.c',
+ 'brw_curbe.c',
+ 'brw_disasm.c',
+ 'brw_draw.c',
+ 'brw_draw_upload.c',
+ 'brw_eu.c',
+ 'brw_eu_debug.c',
+ 'brw_eu_emit.c',
+ 'brw_eu_util.c',
+ 'brw_gs.c',
+ 'brw_gs_emit.c',
+ 'brw_gs_state.c',
+ 'brw_misc_state.c',
+ 'brw_pipe_blend.c',
+ 'brw_pipe_clear.c',
+ 'brw_pipe_depth.c',
+ 'brw_pipe_fb.c',
+ 'brw_pipe_flush.c',
+ 'brw_pipe_misc.c',
+ 'brw_pipe_query.c',
+ 'brw_pipe_rast.c',
+ 'brw_pipe_sampler.c',
+ 'brw_pipe_shader.c',
+ 'brw_pipe_vertex.c',
+ 'brw_screen_buffers.c',
+ 'brw_screen.c',
+ 'brw_screen_surface.c',
+ 'brw_screen_tex_layout.c',
+ 'brw_screen_texture.c',
+ 'brw_structs_dump.c',
+ 'brw_sf.c',
+ 'brw_sf_emit.c',
+ 'brw_sf_state.c',
+ 'brw_state_batch.c',
+ 'brw_state_cache.c',
+# 'brw_state_debug.c',
+ 'brw_state_upload.c',
+ 'brw_swtnl.c',
+ 'brw_urb.c',
+ 'brw_util.c',
+ 'brw_vs.c',
+ 'brw_vs_emit.c',
+ 'brw_vs_state.c',
+ 'brw_vs_surface_state.c',
+ 'brw_wm.c',
+# 'brw_wm_constant_buffer.c',
+ 'brw_wm_debug.c',
+ 'brw_wm_emit.c',
+ 'brw_wm_fp.c',
+# 'brw_wm_glsl.c',
+ 'brw_wm_iz.c',
+ 'brw_wm_pass0.c',
+ 'brw_wm_pass1.c',
+ 'brw_wm_pass2.c',
+ 'brw_wm_sampler_state.c',
+ 'brw_wm_state.c',
+ 'brw_wm_surface_state.c',
+ 'intel_decode.c',
+ ])
+
+Export('i965')
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
new file mode 100644
index 00000000000..22607dc6083
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -0,0 +1,202 @@
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_memory.h"
+
+#include "brw_batchbuffer.h"
+#include "brw_reg.h"
+#include "brw_winsys.h"
+#include "brw_debug.h"
+#include "brw_structs.h"
+
+#define ALWAYS_EMIT_MI_FLUSH 1
+
+enum pipe_error
+brw_batchbuffer_reset(struct brw_batchbuffer *batch)
+{
+ enum pipe_error ret;
+
+ ret = batch->sws->bo_alloc( batch->sws,
+ BRW_BUFFER_TYPE_BATCH,
+ BRW_BATCH_SIZE, 4096,
+ &batch->buf );
+ if (ret)
+ return ret;
+
+ batch->size = BRW_BATCH_SIZE;
+
+ /* With map_range semantics, the winsys can decide whether to
+ * inject a malloc'ed bounce buffer instead of mapping directly.
+ */
+ batch->map = batch->sws->bo_map(batch->buf,
+ BRW_DATA_BATCH_BUFFER,
+ 0, batch->size,
+ GL_TRUE,
+ GL_TRUE,
+ GL_TRUE);
+
+ batch->ptr = batch->map;
+ return PIPE_OK;
+}
+
+struct brw_batchbuffer *
+brw_batchbuffer_alloc(struct brw_winsys_screen *sws,
+ struct brw_chipset chipset)
+{
+ struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer);
+
+ batch->sws = sws;
+ batch->chipset = chipset;
+ brw_batchbuffer_reset(batch);
+
+ return batch;
+}
+
+void
+brw_batchbuffer_free(struct brw_batchbuffer *batch)
+{
+ if (batch->map) {
+ batch->sws->bo_unmap(batch->buf);
+ batch->map = NULL;
+ }
+
+ bo_reference(&batch->buf, NULL);
+ FREE(batch);
+}
+
+
+void
+_brw_batchbuffer_flush(struct brw_batchbuffer *batch,
+ const char *file,
+ int line)
+{
+ GLuint used = batch->ptr - batch->map;
+
+ if (used == 0)
+ return;
+
+ /* Post-swap throttling done by the state tracker.
+ */
+
+ if (BRW_DEBUG & DEBUG_BATCH)
+ debug_printf("%s:%d: Batchbuffer flush with %db used\n",
+ file, line, used);
+
+ if (ALWAYS_EMIT_MI_FLUSH) {
+ *(GLuint *) (batch->ptr) = MI_FLUSH | BRW_FLUSH_STATE_CACHE;
+ batch->ptr += 4;
+ used = batch->ptr - batch->map;
+ }
+
+ /* Round batchbuffer usage to 2 DWORDs.
+ */
+ if ((used & 4) == 0) {
+ *(GLuint *) (batch->ptr) = 0; /* noop */
+ batch->ptr += 4;
+ used = batch->ptr - batch->map;
+ }
+
+ /* Mark the end of the buffer.
+ */
+ *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END;
+ batch->ptr += 4;
+ used = batch->ptr - batch->map;
+
+ batch->sws->bo_flush_range(batch->buf, 0, used);
+ batch->sws->bo_unmap(batch->buf);
+ batch->map = NULL;
+ batch->ptr = NULL;
+
+ batch->sws->bo_exec(batch->buf, used );
+
+ if (BRW_DEBUG & DEBUG_SYNC) {
+ /* Abuse map/unmap to achieve wait-for-fence.
+ *
+ * XXX: hide this inside the winsys and export a fence
+ * interface.
+ */
+ debug_printf("waiting for idle\n");
+ batch->sws->bo_wait_idle(batch->buf);
+ }
+
+ /* Reset the buffer:
+ */
+ brw_batchbuffer_reset(batch);
+}
+
+
+/* The OUT_RELOC() macro ends up here, generating a relocation within
+ * the batch buffer.
+ */
+enum pipe_error
+brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
+ struct brw_winsys_buffer *buffer,
+ uint32_t usage,
+ uint32_t delta)
+{
+ int ret;
+
+ if (batch->ptr - batch->map > batch->buf->size) {
+ debug_printf("bad relocation ptr %p map %p offset %d size %d\n",
+ batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
+ ret = batch->sws->bo_emit_reloc(batch->buf,
+ usage,
+ delta,
+ batch->ptr - batch->map,
+ buffer);
+ if (ret != 0)
+ return ret;
+
+ /* bo_emit_reloc was resposible for writing a zero into the
+ * batchbuffer if necessary. Just need to update our pointer.
+ */
+ batch->ptr += 4;
+
+ return 0;
+}
+
+enum pipe_error
+brw_batchbuffer_data(struct brw_batchbuffer *batch,
+ const void *data, GLuint bytes,
+ enum cliprect_mode cliprect_mode)
+{
+ enum pipe_error ret;
+
+ assert((bytes & 3) == 0);
+
+ ret = brw_batchbuffer_require_space(batch, bytes);
+ if (ret)
+ return ret;
+
+ memcpy(batch->ptr, data, bytes);
+ batch->ptr += bytes;
+ return 0;
+}
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
new file mode 100644
index 00000000000..7473f5bea4d
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -0,0 +1,148 @@
+#ifndef BRW_BATCHBUFFER_H
+#define BRW_BATCHBUFFER_H
+
+#include "util/u_debug.h"
+
+#include "brw_types.h"
+#include "brw_winsys.h"
+#include "brw_reg.h"
+
+#define BATCH_SZ 16384
+#define BATCH_RESERVED 16
+
+/* All ignored:
+ */
+enum cliprect_mode {
+ IGNORE_CLIPRECTS,
+ LOOP_CLIPRECTS,
+ NO_LOOP_CLIPRECTS,
+ REFERENCES_CLIPRECTS
+};
+
+
+
+
+struct brw_batchbuffer {
+
+ struct brw_winsys_screen *sws;
+ struct brw_winsys_buffer *buf;
+ struct brw_chipset chipset;
+
+ /**
+ * Values exported to speed up the writing the batchbuffer,
+ * instead of having to go trough a accesor function for
+ * each dword written.
+ */
+ /*{@*/
+ uint8_t *map;
+ uint8_t *ptr;
+ size_t size;
+ struct {
+ uint8_t *end_ptr;
+ } emit;
+
+
+ size_t relocs;
+ size_t max_relocs;
+ /*@}*/
+};
+
+struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws,
+ struct brw_chipset chipset );
+
+void brw_batchbuffer_free(struct brw_batchbuffer *batch);
+
+void _brw_batchbuffer_flush(struct brw_batchbuffer *batch,
+ const char *file, int line);
+
+
+enum pipe_error
+brw_batchbuffer_reset(struct brw_batchbuffer *batch);
+
+
+/* Unlike bmBufferData, this currently requires the buffer be mapped.
+ * Consider it a convenience function wrapping multple
+ * intel_buffer_dword() calls.
+ */
+int brw_batchbuffer_data(struct brw_batchbuffer *batch,
+ const void *data, GLuint bytes,
+ enum cliprect_mode cliprect_mode);
+
+
+int brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
+ struct brw_winsys_buffer *buffer,
+ enum brw_buffer_usage usage,
+ uint32_t offset);
+
+/* Inline functions - might actually be better off with these
+ * non-inlined. Certainly better off switching all command packets to
+ * be passed as structs rather than dwords, but that's a little bit of
+ * work...
+ */
+static INLINE GLint
+brw_batchbuffer_space(struct brw_batchbuffer *batch)
+{
+ return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map);
+}
+
+
+static INLINE void
+brw_batchbuffer_emit_dword(struct brw_batchbuffer *batch, GLuint dword)
+{
+ assert(batch->map);
+ assert(brw_batchbuffer_space(batch) >= 4);
+ *(GLuint *) (batch->ptr) = dword;
+ batch->ptr += 4;
+}
+
+static INLINE enum pipe_error
+brw_batchbuffer_require_space(struct brw_batchbuffer *batch,
+ GLuint sz)
+{
+ assert(sz < batch->size - 8);
+ if (brw_batchbuffer_space(batch) < sz) {
+ assert(0);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+#ifdef DEBUG
+ batch->emit.end_ptr = batch->ptr + sz;
+#endif
+ return 0;
+}
+
+/* Here are the crusty old macros, to be removed:
+ */
+#define BEGIN_BATCH(n, cliprect_mode) do { \
+ brw_batchbuffer_require_space(brw->batch, (n)*4); \
+ } while (0)
+
+#define OUT_BATCH(d) brw_batchbuffer_emit_dword(brw->batch, d)
+
+#define OUT_RELOC(buf, usage, delta) do { \
+ assert((unsigned) (delta) < buf->size); \
+ brw_batchbuffer_emit_reloc(brw->batch, buf, \
+ usage, delta); \
+ } while (0)
+
+#ifdef DEBUG
+#define ADVANCE_BATCH() do { \
+ unsigned int _n = brw->batch->ptr - brw->batch->emit.end_ptr; \
+ if (_n != 0) { \
+ debug_printf("%s: %d too many bytes emitted to batch\n", \
+ __FUNCTION__, _n); \
+ abort(); \
+ } \
+ brw->batch->emit.end_ptr = NULL; \
+ } while(0)
+#else
+#define ADVANCE_BATCH()
+#endif
+
+static INLINE void
+brw_batchbuffer_emit_mi_flush(struct brw_batchbuffer *batch)
+{
+ brw_batchbuffer_require_space(batch, 4);
+ brw_batchbuffer_emit_dword(batch, MI_FLUSH);
+}
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
new file mode 100644
index 00000000000..3e070f5591a
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -0,0 +1,111 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+
+static enum pipe_error prepare_cc_vp( struct brw_context *brw )
+{
+ return brw_cache_data( &brw->cache,
+ BRW_CC_VP,
+ &brw->curr.ccv,
+ NULL, 0,
+ &brw->cc.reloc[CC_RELOC_VP].bo );
+}
+
+const struct brw_tracked_state brw_cc_vp = {
+ .dirty = {
+ .mesa = PIPE_NEW_VIEWPORT,
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0
+ },
+ .prepare = prepare_cc_vp
+};
+
+
+/* A long-winded way to OR two unsigned integers together:
+ */
+static INLINE struct brw_cc3
+combine_cc3( struct brw_cc3 a, struct brw_cc3 b )
+{
+ union { struct brw_cc3 cc3; unsigned i; } ca, cb;
+ ca.cc3 = a;
+ cb.cc3 = b;
+ ca.i |= cb.i;
+ return ca.cc3;
+}
+
+
+static int prepare_cc_unit( struct brw_context *brw )
+{
+ brw->cc.cc.cc0 = brw->curr.zstencil->cc0;
+ brw->cc.cc.cc1 = brw->curr.zstencil->cc1;
+ brw->cc.cc.cc2 = brw->curr.zstencil->cc2;
+ brw->cc.cc.cc3 = combine_cc3( brw->curr.zstencil->cc3, brw->curr.blend->cc3 );
+
+ brw->cc.cc.cc5 = brw->curr.blend->cc5;
+ brw->cc.cc.cc6 = brw->curr.blend->cc6;
+ brw->cc.cc.cc7 = brw->curr.zstencil->cc7;
+
+ return brw_cache_data_sz(&brw->cache, BRW_CC_UNIT,
+ &brw->cc.cc, sizeof(brw->cc.cc),
+ brw->cc.reloc, 1,
+ &brw->cc.state_bo);
+}
+
+const struct brw_tracked_state brw_cc_unit = {
+ .dirty = {
+ .mesa = PIPE_NEW_DEPTH_STENCIL_ALPHA | PIPE_NEW_BLEND,
+ .brw = 0,
+ .cache = CACHE_NEW_CC_VP
+ },
+ .prepare = prepare_cc_unit,
+};
+
+
+void brw_hw_cc_init( struct brw_context *brw )
+{
+ make_reloc(&brw->cc.reloc[0],
+ BRW_USAGE_STATE,
+ 0,
+ offsetof(struct brw_cc_unit_state, cc4),
+ NULL);
+}
+
+
+void brw_hw_cc_cleanup( struct brw_context *brw )
+{
+ bo_reference(&brw->cc.state_bo, NULL);
+ bo_reference(&brw->cc.reloc[0].bo, NULL);
+}
diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c
new file mode 100644
index 00000000000..d67a1a62633
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip.c
@@ -0,0 +1,224 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_state.h"
+
+#include "util/u_math.h"
+
+#include "brw_screen.h"
+#include "brw_batchbuffer.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_pipe_rast.h"
+#include "brw_clip.h"
+
+
+#define FRONT_UNFILLED_BIT 0x1
+#define BACK_UNFILLED_BIT 0x2
+
+
+static enum pipe_error
+compile_clip_prog( struct brw_context *brw,
+ struct brw_clip_prog_key *key,
+ struct brw_winsys_buffer **bo_out )
+{
+ enum pipe_error ret;
+ struct brw_clip_compile c;
+ const GLuint *program;
+ GLuint program_size;
+ GLuint delta;
+
+ memset(&c, 0, sizeof(c));
+
+ /* Begin the compilation:
+ */
+ brw_init_compile(brw, &c.func);
+
+ c.func.single_program_flow = 1;
+
+ c.chipset = brw->chipset;
+ c.key = *key;
+ c.need_ff_sync = c.chipset.is_igdng;
+
+ /* Need to locate the two positions present in vertex + header.
+ * These are currently hardcoded:
+ */
+ c.header_position_offset = ATTR_SIZE;
+
+ if (c.chipset.is_igdng)
+ delta = 3 * REG_SIZE;
+ else
+ delta = REG_SIZE;
+
+ c.offset_hpos = delta + c.key.output_hpos * ATTR_SIZE;
+
+ if (c.key.output_color0 != BRW_OUTPUT_NOT_PRESENT)
+ c.offset_color0 = delta + c.key.output_color0 * ATTR_SIZE;
+
+ if (c.key.output_color1 != BRW_OUTPUT_NOT_PRESENT)
+ c.offset_color1 = delta + c.key.output_color1 * ATTR_SIZE;
+
+ if (c.key.output_bfc0 != BRW_OUTPUT_NOT_PRESENT)
+ c.offset_bfc0 = delta + c.key.output_bfc0 * ATTR_SIZE;
+
+ if (c.key.output_bfc1 != BRW_OUTPUT_NOT_PRESENT)
+ c.offset_bfc1 = delta + c.key.output_bfc1 * ATTR_SIZE;
+
+ if (c.key.output_edgeflag != BRW_OUTPUT_NOT_PRESENT)
+ c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE;
+
+ if (BRW_IS_IGDNG(brw))
+ c.nr_regs = (c.key.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
+ else
+ c.nr_regs = (c.key.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+
+ c.nr_bytes = c.nr_regs * REG_SIZE;
+
+ c.prog_data.clip_mode = c.key.clip_mode; /* XXX */
+
+ /* For some reason the thread is spawned with only 4 channels
+ * unmasked.
+ */
+ brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+
+
+ /* Would ideally have the option of producing a program which could
+ * do all three:
+ */
+ switch (key->primitive) {
+ case PIPE_PRIM_TRIANGLES:
+ if (key->do_unfilled)
+ brw_emit_unfilled_clip( &c );
+ else
+ brw_emit_tri_clip( &c );
+ break;
+ case PIPE_PRIM_LINES:
+ brw_emit_line_clip( &c );
+ break;
+ case PIPE_PRIM_POINTS:
+ brw_emit_point_clip( &c );
+ break;
+ default:
+ assert(0);
+ return PIPE_ERROR_BAD_INPUT;
+ }
+
+
+
+ /* get the program
+ */
+ ret = brw_get_program(&c.func, &program, &program_size);
+ if (ret)
+ return ret;
+
+ /* Upload
+ */
+ ret = brw_upload_cache( &brw->cache,
+ BRW_CLIP_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ &brw->clip.prog_data,
+ bo_out );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static enum pipe_error
+upload_clip_prog(struct brw_context *brw)
+{
+ const struct brw_vertex_shader *vs = brw->curr.vertex_shader;
+ struct brw_clip_prog_key key;
+ enum pipe_error ret;
+
+ /* Populate the key, starting from the almost-complete version from
+ * the rast state.
+ */
+
+ /* PIPE_NEW_RAST */
+ key = brw->curr.rast->clip_key;
+
+ /* BRW_NEW_REDUCED_PRIMITIVE */
+ key.primitive = brw->reduced_primitive;
+
+ /* XXX: if edgeflag is moved to a proper TGSI vs output, can remove
+ * dependency on CACHE_NEW_VS_PROG
+ */
+ /* CACHE_NEW_VS_PROG */
+ key.nr_attrs = brw->vs.prog_data->nr_outputs;
+
+ /* PIPE_NEW_VS */
+ key.output_hpos = vs->output_hpos;
+ key.output_color0 = vs->output_color0;
+ key.output_color1 = vs->output_color1;
+ key.output_bfc0 = vs->output_bfc0;
+ key.output_bfc1 = vs->output_bfc1;
+ key.output_edgeflag = vs->output_edgeflag;
+
+ /* PIPE_NEW_CLIP */
+ key.nr_userclip = brw->curr.ucp.nr;
+
+ /* Already cached?
+ */
+ if (brw_search_cache(&brw->cache, BRW_CLIP_PROG,
+ &key, sizeof(key),
+ NULL, 0,
+ &brw->clip.prog_data,
+ &brw->clip.prog_bo))
+ return PIPE_OK;
+
+ /* Compile new program:
+ */
+ ret = compile_clip_prog( brw, &key, &brw->clip.prog_bo );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+const struct brw_tracked_state brw_clip_prog = {
+ .dirty = {
+ .mesa = (PIPE_NEW_RAST |
+ PIPE_NEW_CLIP),
+ .brw = (BRW_NEW_REDUCED_PRIMITIVE),
+ .cache = CACHE_NEW_VS_PROG
+ },
+ .prepare = upload_clip_prog
+};
diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h
new file mode 100644
index 00000000000..80e3a11a370
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip.h
@@ -0,0 +1,199 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef BRW_CLIP_H
+#define BRW_CLIP_H
+
+#include "pipe/p_state.h"
+#include "brw_reg.h"
+#include "brw_eu.h"
+
+#define MAX_VERTS (3+6+6)
+
+/* Note that if unfilled primitives are being emitted, we have to fix
+ * up polygon offset and flatshading at this point:
+ */
+struct brw_clip_prog_key {
+ GLuint nr_attrs:6;
+ GLuint primitive:4;
+ GLuint nr_userclip:3;
+ GLuint do_flat_shading:1;
+ GLuint do_unfilled:1;
+ GLuint fill_cw:2; /* includes cull information */
+ GLuint fill_ccw:2; /* includes cull information */
+ GLuint offset_cw:1;
+ GLuint offset_ccw:1;
+ GLuint copy_bfc_cw:1;
+ GLuint copy_bfc_ccw:1;
+ GLuint clip_mode:3;
+ GLuint output_hpos:6; /* not always zero? */
+
+ GLuint output_color0:6;
+ GLuint output_color1:6;
+ GLuint output_bfc0:6;
+ GLuint output_bfc1:6;
+ GLuint output_edgeflag:6;
+ GLuint pad1:2;
+
+ GLfloat offset_factor;
+ GLfloat offset_units;
+};
+
+struct brw_clip_prog_data {
+ GLuint curb_read_length; /* user planes? */
+ GLuint clip_mode;
+ GLuint urb_read_length;
+ GLuint total_grf;
+};
+
+#define CLIP_LINE 0
+#define CLIP_POINT 1
+#define CLIP_FILL 2
+#define CLIP_CULL 3
+
+
+#define PRIM_MASK (0x1f)
+
+struct brw_clip_compile {
+ struct brw_compile func;
+ struct brw_clip_prog_key key;
+ struct brw_clip_prog_data prog_data;
+
+ struct {
+ struct brw_reg R0;
+ struct brw_reg vertex[MAX_VERTS];
+
+ struct brw_reg t;
+ struct brw_reg t0, t1;
+ struct brw_reg dp0, dp1;
+
+ struct brw_reg dpPrev;
+ struct brw_reg dp;
+ struct brw_reg loopcount;
+ struct brw_reg nr_verts;
+ struct brw_reg planemask;
+
+ struct brw_reg inlist;
+ struct brw_reg outlist;
+ struct brw_reg freelist;
+
+ struct brw_reg dir;
+ struct brw_reg tmp0, tmp1;
+ struct brw_reg offset;
+
+ struct brw_reg fixed_planes;
+ struct brw_reg plane_equation;
+
+ struct brw_reg ff_sync;
+ } reg;
+
+ /* 3 different ways of expressing vertex size, including
+ * key.nr_attrs.
+ */
+ GLuint nr_regs;
+ GLuint nr_bytes;
+
+ GLuint first_tmp;
+ GLuint last_tmp;
+
+ GLboolean need_direction;
+ struct brw_chipset chipset;
+
+ GLuint last_mrf;
+
+ GLuint header_position_offset;
+ GLboolean need_ff_sync;
+
+ GLuint nr_color_attrs;
+ GLuint offset_color0;
+ GLuint offset_color1;
+ GLuint offset_bfc0;
+ GLuint offset_bfc1;
+
+ GLuint offset_hpos;
+ GLuint offset_edgeflag;
+};
+
+#define ATTR_SIZE (4*4)
+
+/* Points are only culled, so no need for a clip routine, however it
+ * works out easier to have a dummy one.
+ */
+void brw_emit_unfilled_clip( struct brw_clip_compile *c );
+void brw_emit_tri_clip( struct brw_clip_compile *c );
+void brw_emit_line_clip( struct brw_clip_compile *c );
+void brw_emit_point_clip( struct brw_clip_compile *c );
+
+/* brw_clip_tri.c, for use by the unfilled clip routine:
+ */
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c );
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c );
+void brw_clip_tri( struct brw_clip_compile *c );
+void brw_clip_tri_emit_polygon( struct brw_clip_compile *c );
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
+ GLuint nr_verts );
+
+
+/* Utils:
+ */
+
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+ struct brw_indirect dest_ptr,
+ struct brw_indirect v0_ptr, /* from */
+ struct brw_indirect v1_ptr, /* to */
+ struct brw_reg t0,
+ GLboolean force_edgeflag );
+
+void brw_clip_init_planes( struct brw_clip_compile *c );
+
+void brw_clip_emit_vue(struct brw_clip_compile *c,
+ struct brw_indirect vert,
+ GLboolean allocate,
+ GLboolean eot,
+ GLuint header);
+
+void brw_clip_kill_thread(struct brw_clip_compile *c);
+
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c );
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c );
+
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+ GLuint to, GLuint from );
+
+void brw_clip_init_clipmask( struct brw_clip_compile *c );
+
+struct brw_reg get_tmp( struct brw_clip_compile *c );
+
+void brw_clip_project_position(struct brw_clip_compile *c,
+ struct brw_reg pos );
+void brw_clip_ff_sync(struct brw_clip_compile *c);
+void brw_clip_init_ff_sync(struct brw_clip_compile *c);
+#endif
diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c
new file mode 100644
index 00000000000..54282d975ed
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_line.c
@@ -0,0 +1,271 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_debug.h"
+
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+
+static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
+{
+ GLuint i = 0,j;
+
+ /* Register usage is static, precompute here:
+ */
+ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+ if (c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec4_grf(i, 0);
+ i += (6 + c->key.nr_userclip + 1) / 2;
+
+ c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+ }
+ else
+ c->prog_data.curb_read_length = 0;
+
+
+ /* Payload vertices plus space for more generated vertices:
+ */
+ for (j = 0; j < 4; j++) {
+ c->reg.vertex[j] = brw_vec4_grf(i, 0);
+ i += c->nr_regs;
+ }
+
+ c->reg.t = brw_vec1_grf(i, 0);
+ c->reg.t0 = brw_vec1_grf(i, 1);
+ c->reg.t1 = brw_vec1_grf(i, 2);
+ c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+ c->reg.plane_equation = brw_vec4_grf(i, 4);
+ i++;
+
+ c->reg.dp0 = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+ c->reg.dp1 = brw_vec1_grf(i, 4);
+ i++;
+
+ if (!c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec8_grf(i, 0);
+ i++;
+ }
+
+ if (c->need_ff_sync) {
+ c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+ i++;
+ }
+
+ c->first_tmp = i;
+ c->last_tmp = i;
+
+ c->prog_data.urb_read_length = c->nr_regs; /* ? */
+ c->prog_data.total_grf = i;
+}
+
+
+
+/* Line clipping, more or less following the following algorithm:
+ *
+ * for (p=0;p<MAX_PLANES;p++) {
+ * if (clipmask & (1 << p)) {
+ * GLfloat dp0 = DOTPROD( vtx0, plane[p] );
+ * GLfloat dp1 = DOTPROD( vtx1, plane[p] );
+ *
+ * if (IS_NEGATIVE(dp1)) {
+ * GLfloat t = dp1 / (dp1 - dp0);
+ * if (t > t1) t1 = t;
+ * } else {
+ * GLfloat t = dp0 / (dp0 - dp1);
+ * if (t > t0) t0 = t;
+ * }
+ *
+ * if (t0 + t1 >= 1.0)
+ * return;
+ * }
+ * }
+ *
+ * interp( ctx, newvtx0, vtx0, vtx1, t0 );
+ * interp( ctx, newvtx1, vtx1, vtx0, t1 );
+ *
+ */
+static void clip_and_emit_line( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_indirect vtx0 = brw_indirect(0, 0);
+ struct brw_indirect vtx1 = brw_indirect(1, 0);
+ struct brw_indirect newvtx0 = brw_indirect(2, 0);
+ struct brw_indirect newvtx1 = brw_indirect(3, 0);
+ struct brw_indirect plane_ptr = brw_indirect(4, 0);
+ struct brw_instruction *plane_loop;
+ struct brw_instruction *plane_active;
+ struct brw_instruction *is_negative;
+ struct brw_instruction *is_neg2 = NULL;
+ struct brw_instruction *not_culled;
+ struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);
+
+ brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0]));
+ brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1]));
+ brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2]));
+ brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3]));
+ brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));
+
+ /* Note: init t0, t1 together:
+ */
+ brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0));
+
+ brw_clip_init_planes(c);
+ brw_clip_init_clipmask(c);
+
+ /* -ve rhw workaround */
+ if (c->chipset.is_965) {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
+ brw_imm_ud(1<<20));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
+ }
+
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ plane_loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ /* if (planemask & 1)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1));
+
+ plane_active = brw_IF(p, BRW_EXECUTE_1);
+ {
+ if (c->key.nr_userclip)
+ brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+ else
+ brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+
+ /* dp = DP4(vtx->position, plane)
+ */
+ brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset_hpos), c->reg.plane_equation);
+
+ /* if (IS_NEGATIVE(dp1))
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset_hpos), c->reg.plane_equation);
+ is_negative = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /*
+ * Both can be negative on GM965/G965 due to RHW workaround
+ * if so, this object should be rejected.
+ */
+ if (c->chipset.is_965) {
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
+ is_neg2 = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, is_neg2);
+ }
+
+ brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1);
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 );
+ brw_MOV(p, c->reg.t1, c->reg.t);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ is_negative = brw_ELSE(p, is_negative);
+ {
+ /* Coming back in. We know that both cannot be negative
+ * because the line would have been culled in that case.
+ */
+
+ /* If both are positive, do nothing */
+ /* Only on GM965/G965 */
+ if (c->chipset.is_965) {
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
+ is_neg2 = brw_IF(p, BRW_EXECUTE_1);
+ }
+
+ {
+ brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0);
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 );
+ brw_MOV(p, c->reg.t0, c->reg.t);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+
+ if (c->chipset.is_965) {
+ brw_ENDIF(p, is_neg2);
+ }
+ }
+ brw_ENDIF(p, is_negative);
+ }
+ brw_ENDIF(p, plane_active);
+
+ /* plane_ptr++;
+ */
+ brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+
+ /* while (planemask>>=1) != 0
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+ }
+ brw_WHILE(p, plane_loop);
+
+ brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1);
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
+ not_culled = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, FALSE);
+ brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, FALSE);
+
+ brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
+ brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END);
+ }
+ brw_ENDIF(p, not_culled);
+ brw_clip_kill_thread(c);
+}
+
+
+
+void brw_emit_line_clip( struct brw_clip_compile *c )
+{
+ brw_clip_line_alloc_regs(c);
+ brw_clip_init_ff_sync(c);
+
+ if (c->key.do_flat_shading)
+ brw_clip_copy_colors(c, 0, 1);
+
+ clip_and_emit_line(c);
+}
diff --git a/src/gallium/drivers/i965/brw_clip_point.c b/src/gallium/drivers/i965/brw_clip_point.c
new file mode 100644
index 00000000000..e0a5330556d
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_point.c
@@ -0,0 +1,48 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+/* Point clipping, nothing to do?
+ */
+void brw_emit_point_clip( struct brw_clip_compile *c )
+{
+ /* Send an empty message to kill the thread:
+ */
+ brw_clip_tri_alloc_regs(c, 0);
+ brw_clip_init_ff_sync(c);
+
+ brw_clip_kill_thread(c);
+}
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
new file mode 100644
index 00000000000..5c3ccfd8d0d
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -0,0 +1,209 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+
+#include "brw_context.h"
+#include "brw_clip.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_debug.h"
+
+struct brw_clip_unit_key {
+ unsigned int total_grf;
+ unsigned int urb_entry_read_length;
+ unsigned int curb_entry_read_length;
+ unsigned int clip_mode;
+
+ unsigned int curbe_offset;
+
+ unsigned int nr_urb_entries, urb_size;
+
+ GLboolean depth_clamp;
+};
+
+static void
+clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
+{
+ memset(key, 0, sizeof(*key));
+
+ /* CACHE_NEW_CLIP_PROG */
+ key->total_grf = brw->clip.prog_data->total_grf;
+ key->urb_entry_read_length = brw->clip.prog_data->urb_read_length;
+ key->curb_entry_read_length = brw->clip.prog_data->curb_read_length;
+ key->clip_mode = brw->clip.prog_data->clip_mode;
+
+ /* BRW_NEW_CURBE_OFFSETS */
+ key->curbe_offset = brw->curbe.clip_start;
+
+ /* BRW_NEW_URB_FENCE */
+ key->nr_urb_entries = brw->urb.nr_clip_entries;
+ key->urb_size = brw->urb.vsize;
+
+ /* */
+ key->depth_clamp = 0; /* XXX: add this to gallium: ctx->Transform.DepthClamp; */
+}
+
+static enum pipe_error
+clip_unit_create_from_key(struct brw_context *brw,
+ struct brw_clip_unit_key *key,
+ struct brw_winsys_reloc *reloc,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_clip_unit_state clip;
+ enum pipe_error ret;
+
+ memset(&clip, 0, sizeof(clip));
+
+ clip.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
+ /* reloc */
+ clip.thread0.kernel_start_pointer = 0;
+
+ clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ clip.thread1.single_program_flow = 1;
+
+ clip.thread3.urb_entry_read_length = key->urb_entry_read_length;
+ clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+ clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+ clip.thread3.dispatch_grf_start_reg = 1;
+ clip.thread3.urb_entry_read_offset = 0;
+
+ clip.thread4.nr_urb_entries = key->nr_urb_entries;
+ clip.thread4.urb_entry_allocation_size = key->urb_size - 1;
+ /* If we have enough clip URB entries to run two threads, do so.
+ */
+ if (key->nr_urb_entries >= 10) {
+ /* Half of the URB entries go to each thread, and it has to be an
+ * even number.
+ */
+ assert(key->nr_urb_entries % 2 == 0);
+
+ /* Although up to 16 concurrent Clip threads are allowed on IGDNG,
+ * only 2 threads can output VUEs at a time.
+ */
+ if (BRW_IS_IGDNG(brw))
+ clip.thread4.max_threads = 16 - 1;
+ else
+ clip.thread4.max_threads = 2 - 1;
+ } else {
+ assert(key->nr_urb_entries >= 5);
+ clip.thread4.max_threads = 1 - 1;
+ }
+
+ if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
+ clip.thread4.max_threads = 0;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ clip.thread4.stats_enable = 1;
+
+ clip.clip5.userclip_enable_flags = 0x7f;
+ clip.clip5.userclip_must_clip = 1;
+ clip.clip5.guard_band_enable = 0;
+ if (!key->depth_clamp)
+ clip.clip5.viewport_z_clip_enable = 1;
+ clip.clip5.viewport_xy_clip_enable = 1;
+ clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
+ clip.clip5.api_mode = BRW_CLIP_API_OGL;
+ clip.clip5.clip_mode = key->clip_mode;
+
+ if (BRW_IS_G4X(brw))
+ clip.clip5.negative_w_clip_test = 1;
+
+ clip.clip6.clipper_viewport_state_ptr = 0;
+ clip.viewport_xmin = -1;
+ clip.viewport_xmax = 1;
+ clip.viewport_ymin = -1;
+ clip.viewport_ymax = 1;
+
+ ret = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
+ key, sizeof(*key),
+ reloc, 1,
+ &clip, sizeof(clip),
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+static int upload_clip_unit( struct brw_context *brw )
+{
+ struct brw_clip_unit_key key;
+ struct brw_winsys_reloc reloc[1];
+ unsigned grf_reg_count;
+ enum pipe_error ret;
+
+ clip_unit_populate_key(brw, &key);
+
+ grf_reg_count = align(key.total_grf, 16) / 16 - 1;
+
+ /* clip program relocation
+ *
+ * XXX: these reloc structs are long lived and only need to be
+ * updated when the bound BO changes. Hopefully the stuff mixed in
+ * in the delta's is non-orthogonal.
+ */
+ assert(brw->clip.prog_bo);
+ make_reloc(&reloc[0],
+ BRW_USAGE_STATE,
+ grf_reg_count << 1,
+ offsetof(struct brw_clip_unit_state, thread0),
+ brw->clip.prog_bo);
+
+
+ if (brw_search_cache(&brw->cache, BRW_CLIP_UNIT,
+ &key, sizeof(key),
+ reloc, 1,
+ NULL,
+ &brw->clip.state_bo))
+ return PIPE_OK;
+
+ /* Create new:
+ */
+ ret = clip_unit_create_from_key(brw, &key,
+ reloc,
+ &brw->clip.state_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_clip_unit = {
+ .dirty = {
+ .mesa = 0,
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_URB_FENCE),
+ .cache = CACHE_NEW_CLIP_PROG
+ },
+ .prepare = upload_clip_unit,
+};
diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c
new file mode 100644
index 00000000000..4cde7294ea0
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_tri.c
@@ -0,0 +1,595 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+static void release_tmps( struct brw_clip_compile *c )
+{
+ c->last_tmp = c->first_tmp;
+}
+
+
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
+ GLuint nr_verts )
+{
+ GLuint i = 0,j;
+
+ /* Register usage is static, precompute here:
+ */
+ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+ if (c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec4_grf(i, 0);
+ i += (6 + c->key.nr_userclip + 1) / 2;
+
+ c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+ }
+ else
+ c->prog_data.curb_read_length = 0;
+
+
+ /* Payload vertices plus space for more generated vertices:
+ */
+ for (j = 0; j < nr_verts; j++) {
+ c->reg.vertex[j] = brw_vec4_grf(i, 0);
+ i += c->nr_regs;
+ }
+
+ if (c->key.nr_attrs & 1) {
+ for (j = 0; j < 3; j++) {
+ GLuint delta = c->key.nr_attrs*16 + 32;
+
+ if (c->chipset.is_igdng)
+ delta = c->key.nr_attrs * 16 + 32 * 3;
+
+ brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
+ }
+ }
+
+ c->reg.t = brw_vec1_grf(i, 0);
+ c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D);
+ c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD);
+ c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+ c->reg.plane_equation = brw_vec4_grf(i, 4);
+ i++;
+
+ c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+ c->reg.dp = brw_vec1_grf(i, 4);
+ i++;
+
+ c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+ i++;
+
+ c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+ i++;
+
+ c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+ i++;
+
+ if (!c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec8_grf(i, 0);
+ i++;
+ }
+
+ if (c->key.do_unfilled) {
+ c->reg.dir = brw_vec4_grf(i, 0);
+ c->reg.offset = brw_vec4_grf(i, 4);
+ i++;
+ c->reg.tmp0 = brw_vec4_grf(i, 0);
+ c->reg.tmp1 = brw_vec4_grf(i, 4);
+ i++;
+ }
+
+ if (c->need_ff_sync) {
+ c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+ i++;
+ }
+
+ c->first_tmp = i;
+ c->last_tmp = i;
+
+ c->prog_data.urb_read_length = c->nr_regs; /* ? */
+ c->prog_data.total_grf = i;
+}
+
+
+
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+ struct brw_instruction *is_rev;
+
+ /* Initial list of indices for incoming vertexes:
+ */
+ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
+
+ /* XXX: Is there an easier way to do this? Need to reverse every
+ * second tristrip element: Can ignore sometimes?
+ */
+ is_rev = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[1]) );
+ brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[0]) );
+ if (c->need_direction)
+ brw_MOV(p, c->reg.dir, brw_imm_f(-1));
+ }
+ is_rev = brw_ELSE(p, is_rev);
+ {
+ brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[0]) );
+ brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[1]) );
+ if (c->need_direction)
+ brw_MOV(p, c->reg.dir, brw_imm_f(1));
+ }
+ brw_ENDIF(p, is_rev);
+
+ brw_MOV(p, get_element(c->reg.inlist, 2), brw_address(c->reg.vertex[2]) );
+ brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0));
+ brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3));
+}
+
+
+
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *is_poly;
+ struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+
+ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_POLYGON));
+
+ is_poly = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_copy_colors(c, 1, 0);
+ brw_clip_copy_colors(c, 2, 0);
+ }
+ is_poly = brw_ELSE(p, is_poly);
+ {
+ brw_clip_copy_colors(c, 0, 2);
+ brw_clip_copy_colors(c, 1, 2);
+ }
+ brw_ENDIF(p, is_poly);
+}
+
+
+
+/* Use mesa's clipping algorithms, translated to GEN4 assembly.
+ */
+void brw_clip_tri( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_indirect vtx = brw_indirect(0, 0);
+ struct brw_indirect vtxPrev = brw_indirect(1, 0);
+ struct brw_indirect vtxOut = brw_indirect(2, 0);
+ struct brw_indirect plane_ptr = brw_indirect(3, 0);
+ struct brw_indirect inlist_ptr = brw_indirect(4, 0);
+ struct brw_indirect outlist_ptr = brw_indirect(5, 0);
+ struct brw_indirect freelist_ptr = brw_indirect(6, 0);
+ struct brw_instruction *plane_loop;
+ struct brw_instruction *plane_active;
+ struct brw_instruction *vertex_loop;
+ struct brw_instruction *next_test;
+ struct brw_instruction *prev_test;
+
+ brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) );
+ brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));
+ brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
+ brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+
+ brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) );
+
+ plane_loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ /* if (planemask & 1)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1));
+
+ plane_active = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /* vtxOut = freelist_ptr++
+ */
+ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) );
+ brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE));
+
+ if (c->key.nr_userclip)
+ brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+ else
+ brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0));
+
+ vertex_loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ /* vtx = *input_ptr;
+ */
+ brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0));
+
+ /* IS_NEGATIVE(prev) */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset_hpos), c->reg.plane_equation);
+ prev_test = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /* IS_POSITIVE(next)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_GE);
+ brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation);
+ next_test = brw_IF(p, BRW_EXECUTE_1);
+ {
+
+ /* Coming back in.
+ */
+ brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev);
+
+ /* If (vtxOut == 0) vtxOut = vtxPrev
+ */
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) );
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, GL_FALSE);
+
+ /* *outlist_ptr++ = vtxOut;
+ * nr_verts++;
+ * vtxOut = 0;
+ */
+ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+ brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+ brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ }
+ brw_ENDIF(p, next_test);
+
+ }
+ prev_test = brw_ELSE(p, prev_test);
+ {
+ /* *outlist_ptr++ = vtxPrev;
+ * nr_verts++;
+ */
+ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev));
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+ brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+
+ /* IS_NEGATIVE(next)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation);
+ next_test = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /* Going out of bounds. Avoid division by zero as we
+ * know dp != dpPrev from DIFFERENT_SIGNS, above.
+ */
+ brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp);
+
+ /* If (vtxOut == 0) vtxOut = vtx
+ */
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) );
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, GL_TRUE);
+
+ /* *outlist_ptr++ = vtxOut;
+ * nr_verts++;
+ * vtxOut = 0;
+ */
+ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+ brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+ brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ }
+ brw_ENDIF(p, next_test);
+ }
+ brw_ENDIF(p, prev_test);
+
+ /* vtxPrev = vtx;
+ * inlist_ptr++;
+ */
+ brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx));
+ brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short)));
+
+ /* while (--loopcount != 0)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, vertex_loop);
+
+ /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1]
+ * inlist = outlist
+ * inlist_ptr = &inlist[0]
+ * outlist_ptr = &outlist[0]
+ */
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2));
+ brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0));
+ brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0));
+ brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
+ brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+ }
+ brw_ENDIF(p, plane_active);
+
+ /* plane_ptr++;
+ */
+ brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+
+ /* nr_verts >= 3
+ */
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_GE,
+ c->reg.nr_verts,
+ brw_imm_ud(3));
+
+ /* && (planemask>>=1) != 0
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+ }
+ brw_WHILE(p, plane_loop);
+}
+
+
+
+void brw_clip_tri_emit_polygon(struct brw_clip_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *loop, *if_insn;
+
+ /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+ brw_ADD(p,
+ c->reg.loopcount,
+ c->reg.nr_verts,
+ brw_imm_d(-2));
+
+ if_insn = brw_IF(p, BRW_EXECUTE_1);
+ {
+ struct brw_indirect v0 = brw_indirect(0, 0);
+ struct brw_indirect vptr = brw_indirect(1, 0);
+
+ brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist));
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+ brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START));
+
+ brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2));
+
+ brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+
+ brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END));
+ }
+ brw_ENDIF(p, if_insn);
+}
+
+static void do_clip_tri( struct brw_clip_compile *c )
+{
+ brw_clip_init_planes(c);
+
+ brw_clip_tri(c);
+}
+
+
+static void maybe_do_clip_tri( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *do_clip;
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+ do_clip = brw_IF(p, BRW_EXECUTE_1);
+ {
+ do_clip_tri(c);
+ }
+ brw_ENDIF(p, do_clip);
+}
+
+static void brw_clip_test( struct brw_clip_compile *c )
+{
+ struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+
+ struct brw_reg v0 = get_tmp(c);
+ struct brw_reg v1 = get_tmp(c);
+ struct brw_reg v2 = get_tmp(c);
+
+ struct brw_indirect vt0 = brw_indirect(0, 0);
+ struct brw_indirect vt1 = brw_indirect(1, 0);
+ struct brw_indirect vt2 = brw_indirect(2, 0);
+
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *is_outside;
+ struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+
+ brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
+ brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
+ brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
+ brw_MOV(p, v0, deref_4f(vt0, c->offset_hpos));
+ brw_MOV(p, v1, deref_4f(vt1, c->offset_hpos));
+ brw_MOV(p, v2, deref_4f(vt2, c->offset_hpos));
+ brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f));
+
+ /* test nearz, xmin, ymin plane */
+ /* clip.xyz < -clip.w */
+ brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* All vertices are outside of a plane, rejected */
+ brw_AND(p, t, t1, t2);
+ brw_AND(p, t, t, t3);
+ brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
+ brw_OR(p, tmp0, tmp0, get_element(t, 2));
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
+ is_outside = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, is_outside);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* some vertices are inside a plane, some are outside,need to clip */
+ brw_XOR(p, t, t1, t2);
+ brw_XOR(p, t1, t2, t3);
+ brw_OR(p, t, t, t1);
+ brw_AND(p, t, t, brw_imm_ud(0x1));
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 0), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 1), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 2), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* test farz, xmax, ymax plane */
+ /* clip.xyz > clip.w */
+ brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* All vertices are outside of a plane, rejected */
+ brw_AND(p, t, t1, t2);
+ brw_AND(p, t, t, t3);
+ brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
+ brw_OR(p, tmp0, tmp0, get_element(t, 2));
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
+ is_outside = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, is_outside);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* some vertices are inside a plane, some are outside,need to clip */
+ brw_XOR(p, t, t1, t2);
+ brw_XOR(p, t1, t2, t3);
+ brw_OR(p, t, t, t1);
+ brw_AND(p, t, t, brw_imm_ud(0x1));
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 0), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 1), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 2), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ release_tmps(c);
+}
+
+
+void brw_emit_tri_clip( struct brw_clip_compile *c )
+{
+ struct brw_instruction *neg_rhw;
+ struct brw_compile *p = &c->func;
+ brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+ brw_clip_tri_init_vertices(c);
+ brw_clip_init_clipmask(c);
+ brw_clip_init_ff_sync(c);
+
+ /* if -ve rhw workaround bit is set,
+ do cliptest */
+ if (c->chipset.is_965) {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
+ brw_imm_ud(1<<20));
+ neg_rhw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_test(c);
+ }
+ brw_ENDIF(p, neg_rhw);
+ }
+ /* Can't push into do_clip_tri because with polygon (or quad)
+ * flatshading, need to apply the flatshade here because we don't
+ * respect the PV when converting to trifan for emit:
+ */
+ if (c->key.do_flat_shading)
+ brw_clip_tri_flat_shade(c);
+
+ if ((c->key.clip_mode == BRW_CLIPMODE_NORMAL) ||
+ (c->key.clip_mode == BRW_CLIPMODE_KERNEL_CLIP))
+ do_clip_tri(c);
+ else
+ maybe_do_clip_tri(c);
+
+ brw_clip_tri_emit_polygon(c);
+
+ /* Send an empty message to kill the thread:
+ */
+ brw_clip_kill_thread(c);
+}
diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c
new file mode 100644
index 00000000000..aec835b8cec
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_unfilled.c
@@ -0,0 +1,497 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+/* This is performed against the original triangles, so no indirection
+ * required:
+BZZZT!
+ */
+static void compute_tri_direction( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg e = c->reg.tmp0;
+ struct brw_reg f = c->reg.tmp1;
+ struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset_hpos);
+ struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset_hpos);
+ struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset_hpos);
+
+
+ struct brw_reg v0n = get_tmp(c);
+ struct brw_reg v1n = get_tmp(c);
+ struct brw_reg v2n = get_tmp(c);
+
+ /* Convert to NDC.
+ * NOTE: We can't modify the original vertex coordinates,
+ * as it may impact further operations.
+ * So, we have to keep normalized coordinates in temp registers.
+ *
+ * TBD-KC
+ * Try to optimize unnecessary MOV's.
+ */
+ brw_MOV(p, v0n, v0);
+ brw_MOV(p, v1n, v1);
+ brw_MOV(p, v2n, v2);
+
+ brw_clip_project_position(c, v0n);
+ brw_clip_project_position(c, v1n);
+ brw_clip_project_position(c, v2n);
+
+ /* Calculate the vectors of two edges of the triangle:
+ */
+ brw_ADD(p, e, v0n, negate(v2n));
+ brw_ADD(p, f, v1n, negate(v2n));
+
+ /* Take their crossproduct:
+ */
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3));
+ brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e));
+}
+
+
+static void cull_direction( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *ccw;
+ GLuint conditional;
+
+ assert (!(c->key.fill_ccw == CLIP_CULL &&
+ c->key.fill_cw == CLIP_CULL));
+
+ if (c->key.fill_ccw == CLIP_CULL)
+ conditional = BRW_CONDITIONAL_GE;
+ else
+ conditional = BRW_CONDITIONAL_L;
+
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ conditional,
+ get_element(c->reg.dir, 2),
+ brw_imm_f(0));
+
+ ccw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, ccw);
+}
+
+
+
+static void copy_bfc( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *ccw;
+ GLuint conditional;
+
+ /* Do we have any colors to copy?
+ */
+ if ((c->offset_color0 == 0 || c->offset_bfc0 == 0) &&
+ (c->offset_color1 == 0 || c->offset_bfc1 == 0))
+ return;
+
+ /* In some wierd degnerate cases we can end up testing the
+ * direction twice, once for culling and once for bfc copying. Oh
+ * well, that's what you get for setting wierd GL state.
+ */
+ if (c->key.copy_bfc_ccw)
+ conditional = BRW_CONDITIONAL_GE;
+ else
+ conditional = BRW_CONDITIONAL_L;
+
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ conditional,
+ get_element(c->reg.dir, 2),
+ brw_imm_f(0));
+
+ ccw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ GLuint i;
+
+ for (i = 0; i < 3; i++) {
+ if (c->offset_color0 && c->offset_bfc0)
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[i], c->offset_color0),
+ byte_offset(c->reg.vertex[i], c->offset_bfc0));
+
+ if (c->offset_color1 && c->offset_bfc1)
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[i], c->offset_color0),
+ byte_offset(c->reg.vertex[i], c->offset_bfc0));
+ }
+ }
+ brw_ENDIF(p, ccw);
+}
+
+
+
+
+/*
+ GLfloat iz = 1.0 / dir.z;
+ GLfloat ac = dir.x * iz;
+ GLfloat bc = dir.y * iz;
+ offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE;
+ offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor;
+ offset *= MRD;
+*/
+static void compute_offset( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg off = c->reg.offset;
+ struct brw_reg dir = c->reg.dir;
+
+ brw_math_invert(p, get_element(off, 2), get_element(dir, 2));
+ brw_MUL(p, vec2(off), dir, get_element(off, 2));
+
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_GE,
+ brw_abs(get_element(off, 0)),
+ brw_abs(get_element(off, 1)));
+
+ brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor));
+ brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units));
+}
+
+
+static void merge_edgeflags( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *is_poly;
+ struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0);
+
+ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_POLYGON));
+
+ /* Get away with using reg.vertex because we know that this is not
+ * a _3DPRIM_TRISTRIP_REVERSE:
+ */
+ is_poly = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+ brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8));
+ brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset_edgeflag), brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+ brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9));
+ brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset_edgeflag), brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ brw_ENDIF(p, is_poly);
+}
+
+
+
+static void apply_one_offset( struct brw_clip_compile *c,
+ struct brw_indirect vert )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg z = deref_1f(vert, c->header_position_offset +
+ 2 * type_sz(BRW_REGISTER_TYPE_F));
+
+ brw_ADD(p, z, z, vec1(c->reg.offset));
+}
+
+
+
+/***********************************************************************
+ * Output clipped polygon as an unfilled primitive:
+ */
+static void emit_lines(struct brw_clip_compile *c,
+ GLboolean do_offset)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *loop;
+ struct brw_instruction *draw_edge;
+ struct brw_indirect v0 = brw_indirect(0, 0);
+ struct brw_indirect v1 = brw_indirect(1, 0);
+ struct brw_indirect v0ptr = brw_indirect(2, 0);
+ struct brw_indirect v1ptr = brw_indirect(3, 0);
+
+ /* Need a seperate loop for offset:
+ */
+ if (do_offset) {
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+ brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+ apply_one_offset(c, v0);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+ }
+
+ /* v1ptr = &inlist[nr_verts]
+ * *v1ptr = v0
+ */
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+ brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+ brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+ brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+ brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2));
+ brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+ /* draw edge if edgeflag != 0 */
+ brw_CMP(p,
+ vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,
+ deref_1f(v0, c->offset_edgeflag),
+ brw_imm_f(0));
+ draw_edge = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
+ brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END);
+ }
+ brw_ENDIF(p, draw_edge);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+}
+
+
+
+static void emit_points(struct brw_clip_compile *c,
+ GLboolean do_offset )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *loop;
+ struct brw_instruction *draw_point;
+
+ struct brw_indirect v0 = brw_indirect(0, 0);
+ struct brw_indirect v0ptr = brw_indirect(2, 0);
+
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+ brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+ /* draw if edgeflag != 0
+ */
+ brw_CMP(p,
+ vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,
+ deref_1f(v0, c->offset_edgeflag),
+ brw_imm_f(0));
+ draw_point = brw_IF(p, BRW_EXECUTE_1);
+ {
+ if (do_offset)
+ apply_one_offset(c, v0);
+
+ brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END);
+ }
+ brw_ENDIF(p, draw_point);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+}
+
+
+
+
+
+
+
+static void emit_primitives( struct brw_clip_compile *c,
+ GLuint mode,
+ GLboolean do_offset )
+{
+ switch (mode) {
+ case CLIP_FILL:
+ brw_clip_tri_emit_polygon(c);
+ break;
+
+ case CLIP_LINE:
+ emit_lines(c, do_offset);
+ break;
+
+ case CLIP_POINT:
+ emit_points(c, do_offset);
+ break;
+
+ case CLIP_CULL:
+ assert(0);
+ break;
+ }
+}
+
+
+
+static void emit_unfilled_primitives( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *ccw;
+
+ /* Direction culling has already been done.
+ */
+ if (c->key.fill_ccw != c->key.fill_cw &&
+ c->key.fill_ccw != CLIP_CULL &&
+ c->key.fill_cw != CLIP_CULL)
+ {
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_GE,
+ get_element(c->reg.dir, 2),
+ brw_imm_f(0));
+
+ ccw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+ }
+ ccw = brw_ELSE(p, ccw);
+ {
+ emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+ }
+ brw_ENDIF(p, ccw);
+ }
+ else if (c->key.fill_cw != CLIP_CULL) {
+ emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+ }
+ else if (c->key.fill_ccw != CLIP_CULL) {
+ emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+ }
+}
+
+
+
+
+static void check_nr_verts( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *if_insn;
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3));
+ if_insn = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, if_insn);
+}
+
+
+void brw_emit_unfilled_clip( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *do_clip;
+
+
+ c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) ||
+ (c->key.fill_ccw != c->key.fill_cw) ||
+ c->key.fill_ccw == CLIP_CULL ||
+ c->key.fill_cw == CLIP_CULL ||
+ c->key.copy_bfc_cw ||
+ c->key.copy_bfc_ccw);
+
+ brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+ brw_clip_tri_init_vertices(c);
+ brw_clip_init_ff_sync(c);
+
+ assert(c->offset_edgeflag);
+
+ if (c->key.fill_ccw == CLIP_CULL &&
+ c->key.fill_cw == CLIP_CULL) {
+ brw_clip_kill_thread(c);
+ return;
+ }
+
+ merge_edgeflags(c);
+
+ /* Need to use the inlist indirection here:
+ */
+ if (c->need_direction)
+ compute_tri_direction(c);
+
+ if (c->key.fill_ccw == CLIP_CULL ||
+ c->key.fill_cw == CLIP_CULL)
+ cull_direction(c);
+
+ if (c->key.offset_ccw ||
+ c->key.offset_cw)
+ compute_offset(c);
+
+ if (c->key.copy_bfc_ccw ||
+ c->key.copy_bfc_cw)
+ copy_bfc(c);
+
+ /* Need to do this whether we clip or not:
+ */
+ if (c->key.do_flat_shading)
+ brw_clip_tri_flat_shade(c);
+
+ brw_clip_init_clipmask(c);
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+ do_clip = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_init_planes(c);
+ brw_clip_tri(c);
+ check_nr_verts(c);
+ }
+ brw_ENDIF(p, do_clip);
+
+ emit_unfilled_primitives(c);
+ brw_clip_kill_thread(c);
+}
+
+
+
diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c
new file mode 100644
index 00000000000..97a57103105
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_util.c
@@ -0,0 +1,388 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+
+struct brw_reg get_tmp( struct brw_clip_compile *c )
+{
+ struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
+
+ if (++c->last_tmp > c->prog_data.total_grf)
+ c->prog_data.total_grf = c->last_tmp;
+
+ return tmp;
+}
+
+static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp )
+{
+ if (tmp.nr == c->last_tmp-1)
+ c->last_tmp--;
+}
+
+
+static struct brw_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w)
+{
+ return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x);
+}
+
+
+void brw_clip_init_planes( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+
+ if (!c->key.nr_userclip) {
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1));
+ }
+}
+
+
+
+#define W 3
+
+/* Project 'pos' to screen space (or back again), overwrite with results:
+ */
+void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
+{
+ struct brw_compile *p = &c->func;
+
+ /* calc rhw
+ */
+ brw_math_invert(p, get_element(pos, W), get_element(pos, W));
+
+ /* value.xyz *= value.rhw
+ */
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_MUL(p, brw_writemask(pos, BRW_WRITEMASK_XYZ), pos, brw_swizzle1(pos, W));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+}
+
+
+static void brw_clip_project_vertex( struct brw_clip_compile *c,
+ struct brw_indirect vert_addr )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = get_tmp(c);
+
+ /* Fixup position. Extract from the original vertex and re-project
+ * to screen space:
+ */
+ brw_MOV(p, tmp, deref_4f(vert_addr, c->offset_hpos));
+ brw_clip_project_position(c, tmp);
+ brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp);
+
+ release_tmp(c, tmp);
+}
+
+
+
+
+/* Interpolate between two vertices and put the result into a0.0.
+ * Increment a0.0 accordingly.
+ */
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+ struct brw_indirect dest_ptr,
+ struct brw_indirect v0_ptr, /* from */
+ struct brw_indirect v1_ptr, /* to */
+ struct brw_reg t0,
+ GLboolean force_edgeflag)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = get_tmp(c);
+ GLuint i;
+
+ /* Just copy the vertex header:
+ */
+ /*
+ * After CLIP stage, only first 256 bits of the VUE are read
+ * back on IGDNG, so needn't change it
+ */
+ brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
+
+ /* Iterate over each attribute (could be done in pairs?)
+ */
+ for (i = 0; i < c->key.nr_attrs; i++) {
+ GLuint delta = i*16 + 32;
+
+ if (c->chipset.is_igdng)
+ delta = i * 16 + 32 * 3;
+
+ if (delta == c->offset_edgeflag) {
+ if (force_edgeflag)
+ brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
+ else
+ brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
+ }
+ else {
+ /* Interpolate:
+ *
+ * New = attr0 + t*attr1 - t*attr0
+ */
+ brw_MUL(p,
+ vec4(brw_null_reg()),
+ deref_4f(v1_ptr, delta),
+ t0);
+
+ brw_MAC(p,
+ tmp,
+ negate(deref_4f(v0_ptr, delta)),
+ t0);
+
+ brw_ADD(p,
+ deref_4f(dest_ptr, delta),
+ deref_4f(v0_ptr, delta),
+ tmp);
+ }
+ }
+
+ if (i & 1) {
+ GLuint delta = i*16 + 32;
+
+ if (c->chipset.is_igdng)
+ delta = i * 16 + 32 * 3;
+
+ brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
+ }
+
+ release_tmp(c, tmp);
+
+ /* Recreate the projected (NDC) coordinate in the new vertex
+ * header:
+ */
+ brw_clip_project_vertex(c, dest_ptr );
+}
+
+
+
+
+#define MAX_MRF 16
+
+void brw_clip_emit_vue(struct brw_clip_compile *c,
+ struct brw_indirect vert,
+ GLboolean allocate,
+ GLboolean eot,
+ GLuint header)
+{
+ struct brw_compile *p = &c->func;
+ GLuint start = c->last_mrf;
+
+ brw_clip_ff_sync(c);
+
+ assert(!(allocate && eot));
+
+ /* Cycle through mrf regs - probably futile as we have to wait for
+ * the allocation response anyway. Also, the order this function
+ * is invoked doesn't correspond to the order the instructions will
+ * be executed, so it won't have any effect in many cases.
+ */
+#if 0
+ if (start + c->nr_regs + 1 >= MAX_MRF)
+ start = 0;
+
+ c->last_mrf = start + c->nr_regs + 1;
+#endif
+
+ /* Copy the vertex from vertn into m1..mN+1:
+ */
+ brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs);
+
+ /* Overwrite PrimType and PrimStart in the message header, for
+ * each vertex in turn:
+ */
+ brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+
+
+ /* Send each vertex as a seperate write to the urb. This
+ * is different to the concept in brw_sf_emit.c, where
+ * subsequent writes are used to build up a single urb
+ * entry. Each of these writes instantiates a seperate
+ * urb entry - (I think... what about 'allocate'?)
+ */
+ brw_urb_WRITE(p,
+ allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ start,
+ c->reg.R0,
+ allocate,
+ 1, /* used */
+ c->nr_regs + 1, /* msg length */
+ allocate ? 1 : 0, /* response_length */
+ eot, /* eot */
+ 1, /* writes_complete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+void brw_clip_kill_thread(struct brw_clip_compile *c)
+{
+ struct brw_compile *p = &c->func;
+
+ brw_clip_ff_sync(c);
+ /* Send an empty message to kill the thread and release any
+ * allocated urb entry:
+ */
+ brw_urb_WRITE(p,
+ retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ 0,
+ c->reg.R0,
+ 0, /* allocate */
+ 0, /* used */
+ 1, /* msg len */
+ 0, /* response len */
+ 1, /* eot */
+ 1, /* writes complete */
+ 0,
+ BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c )
+{
+ return brw_address(c->reg.fixed_planes);
+}
+
+
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c )
+{
+ if (c->key.nr_userclip) {
+ return brw_imm_uw(16);
+ }
+ else {
+ return brw_imm_uw(4);
+ }
+}
+
+
+/* If flatshading, distribute color from provoking vertex prior to
+ * clipping.
+ */
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+ GLuint to, GLuint from )
+{
+ struct brw_compile *p = &c->func;
+
+ if (c->offset_color0)
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset_color0),
+ byte_offset(c->reg.vertex[from], c->offset_color0));
+
+ if (c->offset_color1)
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset_color1),
+ byte_offset(c->reg.vertex[from], c->offset_color1));
+
+ if (c->offset_bfc0)
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset_bfc0),
+ byte_offset(c->reg.vertex[from], c->offset_bfc0));
+
+ if (c->offset_bfc1)
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset_bfc1),
+ byte_offset(c->reg.vertex[from], c->offset_bfc1));
+}
+
+
+
+void brw_clip_init_clipmask( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg incoming = get_element_ud(c->reg.R0, 2);
+
+ /* Shift so that lowest outcode bit is rightmost:
+ */
+ brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26));
+
+ if (c->key.nr_userclip) {
+ struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD);
+
+ /* Rearrange userclip outcodes so that they come directly after
+ * the fixed plane bits.
+ */
+ brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14));
+ brw_SHR(p, tmp, tmp, brw_imm_ud(8));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, tmp);
+
+ release_tmp(c, tmp);
+ }
+}
+
+void brw_clip_ff_sync(struct brw_clip_compile *c)
+{
+ if (c->need_ff_sync) {
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *need_ff_sync;
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
+ need_ff_sync = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
+ brw_ff_sync(p,
+ c->reg.R0,
+ 0,
+ c->reg.R0,
+ 1,
+ 1, /* used */
+ 1, /* msg length */
+ 1, /* response length */
+ 0, /* eot */
+ 1, /* write compelete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+ }
+ brw_ENDIF(p, need_ff_sync);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+}
+
+void brw_clip_init_ff_sync(struct brw_clip_compile *c)
+{
+ if (c->need_ff_sync) {
+ struct brw_compile *p = &c->func;
+
+ brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
new file mode 100644
index 00000000000..e67551882dc
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -0,0 +1,154 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_context.h"
+#include "util/u_simple_list.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_draw.h"
+#include "brw_state.h"
+#include "brw_batchbuffer.h"
+#include "brw_winsys.h"
+#include "brw_screen.h"
+
+
+static void brw_destroy_context( struct pipe_context *pipe )
+{
+ struct brw_context *brw = brw_context(pipe);
+ int i;
+
+ brw_context_flush( brw );
+ brw_batchbuffer_free( brw->batch );
+ brw_destroy_state(brw);
+
+ brw_draw_cleanup( brw );
+
+ brw_pipe_blend_cleanup( brw );
+ brw_pipe_depth_stencil_cleanup( brw );
+ brw_pipe_framebuffer_cleanup( brw );
+ brw_pipe_flush_cleanup( brw );
+ brw_pipe_misc_cleanup( brw );
+ brw_pipe_query_cleanup( brw );
+ brw_pipe_rast_cleanup( brw );
+ brw_pipe_sampler_cleanup( brw );
+ brw_pipe_shader_cleanup( brw );
+ brw_pipe_vertex_cleanup( brw );
+ brw_pipe_clear_cleanup( brw );
+
+ brw_hw_cc_cleanup( brw );
+
+
+ FREE(brw->wm.compile_data);
+
+ for (i = 0; i < brw->curr.fb.nr_cbufs; i++)
+ pipe_surface_reference(&brw->curr.fb.cbufs[i], NULL);
+ brw->curr.fb.nr_cbufs = 0;
+ pipe_surface_reference(&brw->curr.fb.zsbuf, NULL);
+
+ bo_reference(&brw->curbe.curbe_bo, NULL);
+ bo_reference(&brw->vs.prog_bo, NULL);
+ bo_reference(&brw->vs.state_bo, NULL);
+ bo_reference(&brw->vs.bind_bo, NULL);
+ bo_reference(&brw->gs.prog_bo, NULL);
+ bo_reference(&brw->gs.state_bo, NULL);
+ bo_reference(&brw->clip.prog_bo, NULL);
+ bo_reference(&brw->clip.state_bo, NULL);
+ bo_reference(&brw->clip.vp_bo, NULL);
+ bo_reference(&brw->sf.prog_bo, NULL);
+ bo_reference(&brw->sf.state_bo, NULL);
+ bo_reference(&brw->sf.vp_bo, NULL);
+
+ for (i = 0; i < Elements(brw->wm.sdc_bo); i++)
+ bo_reference(&brw->wm.sdc_bo[i], NULL);
+
+ bo_reference(&brw->wm.bind_bo, NULL);
+
+ for (i = 0; i < Elements(brw->wm.surf_bo); i++)
+ bo_reference(&brw->wm.surf_bo[i], NULL);
+
+ bo_reference(&brw->wm.sampler_bo, NULL);
+ bo_reference(&brw->wm.prog_bo, NULL);
+ bo_reference(&brw->wm.state_bo, NULL);
+}
+
+
+struct pipe_context *brw_create_context(struct pipe_screen *screen)
+{
+ struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context);
+
+ if (!brw) {
+ debug_printf("%s: failed to alloc context\n", __FUNCTION__);
+ return NULL;
+ }
+
+ brw->base.screen = screen;
+ brw->base.destroy = brw_destroy_context;
+ brw->sws = brw_screen(screen)->sws;
+ brw->chipset = brw_screen(screen)->chipset;
+
+ brw_pipe_blend_init( brw );
+ brw_pipe_depth_stencil_init( brw );
+ brw_pipe_framebuffer_init( brw );
+ brw_pipe_flush_init( brw );
+ brw_pipe_misc_init( brw );
+ brw_pipe_query_init( brw );
+ brw_pipe_rast_init( brw );
+ brw_pipe_sampler_init( brw );
+ brw_pipe_shader_init( brw );
+ brw_pipe_vertex_init( brw );
+ brw_pipe_clear_init( brw );
+
+ brw_hw_cc_init( brw );
+
+ brw_init_state( brw );
+ brw_draw_init( brw );
+
+ brw->state.dirty.mesa = ~0;
+ brw->state.dirty.brw = ~0;
+
+ brw->flags.always_emit_state = 0;
+
+ make_empty_list(&brw->query.active_head);
+
+ brw->batch = brw_batchbuffer_alloc( brw->sws, brw->chipset );
+ if (brw->batch == NULL)
+ goto fail;
+
+ return &brw->base;
+
+fail:
+ if (brw->batch)
+ brw_batchbuffer_free( brw->batch );
+ return NULL;
+}
+
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
new file mode 100644
index 00000000000..8c006bb95b2
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -0,0 +1,858 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRWCONTEXT_INC
+#define BRWCONTEXT_INC
+
+#include "brw_structs.h"
+#include "brw_winsys.h"
+#include "brw_reg.h"
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "tgsi/tgsi_scan.h"
+
+
+/* Glossary:
+ *
+ * URB - uniform resource buffer. A mid-sized buffer which is
+ * partitioned between the fixed function units and used for passing
+ * values (vertices, primitives, constants) between them.
+ *
+ * CURBE - constant URB entry. An urb region (entry) used to hold
+ * constant values which the fixed function units can be instructed to
+ * preload into the GRF when spawning a thread.
+ *
+ * VUE - vertex URB entry. An urb entry holding a vertex and usually
+ * a vertex header. The header contains control information and
+ * things like primitive type, Begin/end flags and clip codes.
+ *
+ * PUE - primitive URB entry. An urb entry produced by the setup (SF)
+ * unit holding rasterization and interpolation parameters.
+ *
+ * GRF - general register file. One of several register files
+ * addressable by programmed threads. The inputs (r0, payload, curbe,
+ * urb) of the thread are preloaded to this area before the thread is
+ * spawned. The registers are individually 8 dwords wide and suitable
+ * for general usage. Registers holding thread input values are not
+ * special and may be overwritten.
+ *
+ * MRF - message register file. Threads communicate (and terminate)
+ * by sending messages. Message parameters are placed in contiguous
+ * MRF registers. All program output is via these messages. URB
+ * entries are populated by sending a message to the shared URB
+ * function containing the new data, together with a control word,
+ * often an unmodified copy of R0.
+ *
+ * R0 - GRF register 0. Typically holds control information used when
+ * sending messages to other threads.
+ *
+ * EU or GEN4 EU: The name of the programmable subsystem of the
+ * i965 hardware. Threads are executed by the EU, the registers
+ * described above are part of the EU architecture.
+ *
+ * Fixed function units:
+ *
+ * CS - Command streamer. Notional first unit, little software
+ * interaction. Holds the URB entries used for constant data, ie the
+ * CURBEs.
+ *
+ * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of
+ * this unit is responsible for pulling vertices out of vertex buffers
+ * in vram and injecting them into the processing pipe as VUEs. If
+ * enabled, it first passes them to a VS thread which is a good place
+ * for the driver to implement any active vertex shader.
+ *
+ * GS - Geometry Shader. This corresponds to a new DX10 concept. If
+ * enabled, incoming strips etc are passed to GS threads in individual
+ * line/triangle/point units. The GS thread may perform arbitary
+ * computation and emit whatever primtives with whatever vertices it
+ * chooses. This makes GS an excellent place to implement GL's
+ * unfilled polygon modes, though of course it is capable of much
+ * more. Additionally, GS is used to translate away primitives not
+ * handled by latter units, including Quads and Lineloops.
+ *
+ * CS - Clipper. Mesa's clipping algorithms are imported to run on
+ * this unit. The fixed function part performs cliptesting against
+ * the 6 fixed clipplanes and makes decisions on whether or not the
+ * incoming primitive needs to be passed to a thread for clipping.
+ * User clip planes are handled via cooperation with the VS thread.
+ *
+ * SF - Strips Fans or Setup: Triangles are prepared for
+ * rasterization. Interpolation coefficients are calculated.
+ * Flatshading and two-side lighting usually performed here.
+ *
+ * WM - Windower. Interpolation of vertex attributes performed here.
+ * Fragment shader implemented here. SIMD aspects of EU taken full
+ * advantage of, as pixels are processed in blocks of 16.
+ *
+ * CC - Color Calculator. No EU threads associated with this unit.
+ * Handles blending and (presumably) depth and stencil testing.
+ */
+
+#define BRW_MAX_CURBE (32*16)
+
+
+/* Need a value to say a particular vertex shader output isn't
+ * present. Limits us to 63 outputs currently.
+ */
+#define BRW_OUTPUT_NOT_PRESENT ((1<<6)-1)
+
+
+struct brw_context;
+
+struct brw_depth_stencil_state {
+ /* Precalculated hardware state:
+ */
+ struct brw_cc0 cc0;
+ struct brw_cc1 cc1;
+ struct brw_cc2 cc2;
+ struct brw_cc3 cc3;
+ struct brw_cc7 cc7;
+
+ unsigned iz_lookup;
+};
+
+
+struct brw_blend_state {
+ /* Precalculated hardware state:
+ */
+ struct brw_cc2 cc2;
+ struct brw_cc3 cc3;
+ struct brw_cc5 cc5;
+ struct brw_cc6 cc6;
+
+ struct brw_surf_ss0 ss0;
+};
+
+
+struct brw_rasterizer_state;
+
+struct brw_immediate_data {
+ unsigned nr;
+ float (*data)[4];
+};
+
+struct brw_vertex_shader {
+ const struct tgsi_token *tokens;
+ struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */
+
+ struct tgsi_shader_info info;
+ struct brw_immediate_data immediates;
+
+ GLuint has_flow_control:1;
+ GLuint use_const_buffer:1;
+
+ /* Offsets of special vertex shader outputs required for clipping.
+ */
+ GLuint output_hpos:6; /* not always zero? */
+ GLuint output_color0:6;
+ GLuint output_color1:6;
+ GLuint output_bfc0:6;
+ GLuint output_bfc1:6;
+ GLuint output_edgeflag:6;
+
+ unsigned id;
+};
+
+struct brw_fs_signature {
+ GLuint nr_inputs;
+ struct {
+ GLuint interp:3; /* TGSI_INTERPOLATE_x */
+ GLuint semantic:5; /* TGSI_SEMANTIC_x */
+ GLuint semantic_index:24;
+ } input[PIPE_MAX_SHADER_INPUTS];
+};
+
+#define brw_fs_signature_size(s) (offsetof(struct brw_fs_signature, input) + \
+ ((s)->nr_inputs * sizeof (s)->input[0]))
+
+
+struct brw_fragment_shader {
+ const struct tgsi_token *tokens;
+ struct tgsi_shader_info info;
+
+ struct brw_fs_signature signature;
+ struct brw_immediate_data immediates;
+
+ unsigned iz_lookup;
+ /*unsigned wm_lookup;*/
+
+ unsigned uses_depth:1;
+ unsigned has_flow_control:1;
+
+ unsigned id;
+ struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */
+ GLboolean use_const_buffer;
+};
+
+
+struct brw_sampler {
+ struct brw_ss0 ss0;
+ struct brw_ss1 ss1;
+ float border_color[4];
+ struct brw_ss3 ss3;
+};
+
+
+
+#define PIPE_NEW_DEPTH_STENCIL_ALPHA 0x1
+#define PIPE_NEW_RAST 0x2
+#define PIPE_NEW_BLEND 0x4
+#define PIPE_NEW_VIEWPORT 0x8
+#define PIPE_NEW_SAMPLERS 0x10
+#define PIPE_NEW_VERTEX_BUFFER 0x20
+#define PIPE_NEW_VERTEX_ELEMENT 0x40
+#define PIPE_NEW_FRAGMENT_SHADER 0x80
+#define PIPE_NEW_VERTEX_SHADER 0x100
+#define PIPE_NEW_FRAGMENT_CONSTANTS 0x200
+#define PIPE_NEW_VERTEX_CONSTANTS 0x400
+#define PIPE_NEW_CLIP 0x800
+#define PIPE_NEW_INDEX_BUFFER 0x1000
+#define PIPE_NEW_INDEX_RANGE 0x2000
+#define PIPE_NEW_BLEND_COLOR 0x4000
+#define PIPE_NEW_POLYGON_STIPPLE 0x8000
+#define PIPE_NEW_FRAMEBUFFER_DIMENSIONS 0x10000
+#define PIPE_NEW_DEPTH_BUFFER 0x20000
+#define PIPE_NEW_COLOR_BUFFERS 0x40000
+#define PIPE_NEW_QUERY 0x80000
+#define PIPE_NEW_SCISSOR 0x100000
+#define PIPE_NEW_BOUND_TEXTURES 0x200000
+#define PIPE_NEW_NR_CBUFS 0x400000
+#define PIPE_NEW_FRAGMENT_SIGNATURE 0x800000
+
+
+
+#define BRW_NEW_URB_FENCE 0x1
+#define BRW_NEW_FRAGMENT_PROGRAM 0x2
+#define BRW_NEW_VERTEX_PROGRAM 0x4
+#define BRW_NEW_INPUT_DIMENSIONS 0x8
+#define BRW_NEW_CURBE_OFFSETS 0x10
+#define BRW_NEW_REDUCED_PRIMITIVE 0x20
+#define BRW_NEW_PRIMITIVE 0x40
+#define BRW_NEW_CONTEXT 0x80
+#define BRW_NEW_WM_INPUT_DIMENSIONS 0x100
+#define BRW_NEW_PSP 0x800
+#define BRW_NEW_WM_SURFACES 0x1000
+#define BRW_NEW_xxx 0x2000 /* was FENCE */
+#define BRW_NEW_INDICES 0x4000
+
+/**
+ * Used for any batch entry with a relocated pointer that will be used
+ * by any 3D rendering. Need to re-emit these fresh in each
+ * batchbuffer as the referenced buffers may be relocated in the
+ * meantime.
+ */
+#define BRW_NEW_BATCH 0x10000
+#define BRW_NEW_NR_WM_SURFACES 0x40000
+#define BRW_NEW_NR_VS_SURFACES 0x80000
+#define BRW_NEW_INDEX_BUFFER 0x100000
+
+struct brw_state_flags {
+ /** State update flags signalled by mesa internals */
+ GLuint mesa;
+ /**
+ * State update flags signalled as the result of brw_tracked_state updates
+ */
+ GLuint brw;
+ /** State update flags signalled by brw_state_cache.c searches */
+ GLuint cache;
+};
+
+
+
+/* Data about a particular attempt to compile a program. Note that
+ * there can be many of these, each in a different GL state
+ * corresponding to a different brw_wm_prog_key struct, with different
+ * compiled programs:
+ */
+struct brw_wm_prog_data {
+ GLuint curb_read_length;
+ GLuint urb_read_length;
+
+ GLuint first_curbe_grf;
+ GLuint total_grf;
+ GLuint total_scratch;
+
+ GLuint nr_params; /**< number of float params/constants */
+ GLboolean error;
+
+ /* Pointer to tracked values (only valid once
+ * _mesa_load_state_parameters has been called at runtime).
+ */
+ const GLfloat *param[BRW_MAX_CURBE];
+};
+
+struct brw_sf_prog_data {
+ GLuint urb_read_length;
+ GLuint total_grf;
+
+ /* Each vertex may have upto 12 attributes, 4 components each,
+ * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11
+ * rows.
+ *
+ * Actually we use 4 for each, so call it 12 rows.
+ */
+ GLuint urb_entry_size;
+};
+
+
+struct brw_clip_prog_data;
+
+struct brw_gs_prog_data {
+ GLuint urb_read_length;
+ GLuint total_grf;
+};
+
+struct brw_vs_prog_data {
+ GLuint curb_read_length;
+ GLuint urb_read_length;
+ GLuint total_grf;
+
+ GLuint nr_outputs;
+ GLuint nr_inputs;
+
+ GLuint nr_params; /**< number of TGSI_FILE_CONSTANT's */
+
+ GLboolean writes_psiz;
+
+ /* Used for calculating urb partitions:
+ */
+ GLuint urb_entry_size;
+};
+
+
+/* Size == 0 if output either not written, or always [0,0,0,1]
+ */
+struct brw_vs_ouput_sizes {
+ GLubyte output_size[PIPE_MAX_SHADER_OUTPUTS];
+};
+
+
+/** Number of texture sampler units */
+#define BRW_MAX_TEX_UNIT 16
+
+/** Max number of render targets in a shader */
+#define BRW_MAX_DRAW_BUFFERS 4
+
+/**
+ * Size of our surface binding table for the WM.
+ * This contains pointers to the drawing surfaces and current texture
+ * objects and shader constant buffers (+2).
+ */
+#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
+
+/**
+ * Helpers to convert drawing buffers, textures and constant buffers
+ * to surface binding table indexes, for WM.
+ */
+#define BTI_COLOR_BUF(d) (d)
+#define BTI_FRAGMENT_CONSTANTS (BRW_MAX_DRAW_BUFFERS)
+#define BTI_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 1 + (t))
+
+/**
+ * Size of surface binding table for the VS.
+ * Only one constant buffer for now.
+ */
+#define BRW_VS_MAX_SURF 1
+
+/**
+ * Only a VS constant buffer
+ */
+#define SURF_INDEX_VERT_CONST_BUFFER 0
+
+
+/* Bit of a hack to align these with the winsys buffer_data_type enum.
+ */
+enum brw_cache_id {
+ BRW_CC_VP = BRW_DATA_GS_CC_VP,
+ BRW_CC_UNIT = BRW_DATA_GS_CC_UNIT,
+ BRW_WM_PROG = BRW_DATA_GS_WM_PROG,
+ BRW_SAMPLER_DEFAULT_COLOR = BRW_DATA_GS_SAMPLER_DEFAULT_COLOR,
+ BRW_SAMPLER = BRW_DATA_GS_SAMPLER,
+ BRW_WM_UNIT = BRW_DATA_GS_WM_UNIT,
+ BRW_SF_PROG = BRW_DATA_GS_SF_PROG,
+ BRW_SF_VP = BRW_DATA_GS_SF_VP,
+ BRW_SF_UNIT = BRW_DATA_GS_SF_UNIT,
+ BRW_VS_UNIT = BRW_DATA_GS_VS_UNIT,
+ BRW_VS_PROG = BRW_DATA_GS_VS_PROG,
+ BRW_GS_UNIT = BRW_DATA_GS_GS_UNIT,
+ BRW_GS_PROG = BRW_DATA_GS_GS_PROG,
+ BRW_CLIP_VP = BRW_DATA_GS_CLIP_VP,
+ BRW_CLIP_UNIT = BRW_DATA_GS_CLIP_UNIT,
+ BRW_CLIP_PROG = BRW_DATA_GS_CLIP_PROG,
+ BRW_SS_SURFACE = BRW_DATA_SS_SURFACE,
+ BRW_SS_SURF_BIND = BRW_DATA_SS_SURF_BIND,
+
+ BRW_MAX_CACHE
+};
+
+struct brw_cache_item {
+ /**
+ * Effectively part of the key, cache_id identifies what kind of state
+ * buffer is involved, and also which brw->state.dirty.cache flag should
+ * be set when this cache item is chosen.
+ */
+ enum brw_cache_id cache_id;
+ /** 32-bit hash of the key data */
+ GLuint hash;
+ GLuint key_size; /* for variable-sized keys */
+ const void *key;
+ struct brw_winsys_reloc *relocs;
+ GLuint nr_relocs;
+
+ struct brw_winsys_buffer *bo;
+ GLuint data_size;
+
+ struct brw_cache_item *next;
+};
+
+
+
+struct brw_cache {
+ struct brw_context *brw;
+ struct brw_winsys_screen *sws;
+
+ struct brw_cache_item **items;
+ GLuint size, n_items;
+
+ enum brw_buffer_type buffer_type;
+
+ GLuint key_size[BRW_MAX_CACHE]; /* for fixed-size keys */
+ GLuint aux_size[BRW_MAX_CACHE];
+ char *name[BRW_MAX_CACHE];
+
+
+ /* Record of the last BOs chosen for each cache_id. Used to set
+ * brw->state.dirty.cache when a new cache item is chosen.
+ */
+ struct brw_winsys_buffer *last_bo[BRW_MAX_CACHE];
+};
+
+
+struct brw_tracked_state {
+ struct brw_state_flags dirty;
+ int (*prepare)( struct brw_context *brw );
+ int (*emit)( struct brw_context *brw );
+};
+
+/* Flags for brw->state.cache.
+ */
+#define CACHE_NEW_CC_VP (1<<BRW_CC_VP)
+#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT)
+#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG)
+#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR)
+#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER)
+#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT)
+#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG)
+#define CACHE_NEW_SF_VP (1<<BRW_SF_VP)
+#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT)
+#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT)
+#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG)
+#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT)
+#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG)
+#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP)
+#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT)
+#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG)
+#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE)
+#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND)
+
+struct brw_cached_batch_item {
+ struct header *header;
+ GLuint sz;
+ struct brw_cached_batch_item *next;
+};
+
+
+
+/* Protect against a future where VERT_ATTRIB_MAX > 32. Wouldn't life
+ * be easier if C allowed arrays of packed elements?
+ */
+#define VS_INPUT_BITMASK_DWORDS ((PIPE_MAX_SHADER_INPUTS+31)/32)
+
+
+
+
+struct brw_vertex_info {
+ GLuint sizes[VS_INPUT_BITMASK_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */
+};
+
+
+struct brw_query_object {
+ /** Doubly linked list of active query objects in the context. */
+ struct brw_query_object *prev, *next;
+
+ /** Last query BO associated with this query. */
+ struct brw_winsys_buffer *bo;
+ /** First index in bo with query data for this object. */
+ int first_index;
+ /** Last index in bo with query data for this object. */
+ int last_index;
+
+ /* Total count of pixels from previous BOs */
+ uint64_t result;
+};
+
+#define CC_RELOC_VP 0
+
+
+/**
+ * brw_context is derived from pipe_context
+ */
+struct brw_context
+{
+ struct pipe_context base;
+ struct brw_chipset chipset;
+
+ struct brw_winsys_screen *sws;
+
+ struct brw_batchbuffer *batch;
+
+ GLuint primitive;
+ GLuint reduced_primitive;
+
+ /* Active state from the state tracker:
+ */
+ struct {
+ struct brw_vertex_shader *vertex_shader;
+ struct brw_fragment_shader *fragment_shader;
+ const struct brw_blend_state *blend;
+ const struct brw_rasterizer_state *rast;
+ const struct brw_depth_stencil_state *zstencil;
+
+ const struct brw_sampler *sampler[PIPE_MAX_SAMPLERS];
+ unsigned num_samplers;
+
+ struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+ unsigned num_vertex_elements;
+ unsigned num_textures;
+ unsigned num_vertex_buffers;
+
+ struct pipe_scissor_state scissor;
+ struct pipe_viewport_state viewport;
+ struct pipe_framebuffer_state fb;
+ struct pipe_clip_state ucp;
+ struct pipe_buffer *vertex_constants;
+ struct pipe_buffer *fragment_constants;
+
+ struct brw_blend_constant_color bcc;
+ struct brw_polygon_stipple bps;
+ struct brw_cc_viewport ccv;
+
+ /**
+ * Index buffer for this draw_prims call.
+ *
+ * Updates are signaled by PIPE_NEW_INDEX_BUFFER.
+ */
+ struct pipe_buffer *index_buffer;
+ unsigned index_size;
+
+ /* Updates are signalled by PIPE_NEW_INDEX_RANGE:
+ */
+ unsigned min_index;
+ unsigned max_index;
+
+ } curr;
+
+ struct {
+ struct brw_state_flags dirty;
+
+ /**
+ * List of buffers accumulated in brw_validate_state to receive
+ * dri_bo_check_aperture treatment before exec, so we can know if we
+ * should flush the batch and try again before emitting primitives.
+ *
+ * This can be a fixed number as we only have a limited number of
+ * objects referenced from the batchbuffer in a primitive emit,
+ * consisting of the vertex buffers, pipelined state pointers,
+ * the CURBE, the depth buffer, and a query BO.
+ */
+ struct brw_winsys_buffer *validated_bos[PIPE_MAX_SHADER_INPUTS + 16];
+ int validated_bo_count;
+ } state;
+
+ struct brw_cache cache; /** non-surface items */
+ struct brw_cache surface_cache; /* surface items */
+ struct brw_cached_batch_item *cached_batch_items;
+
+ struct {
+ struct u_upload_mgr *upload_vertex;
+ struct u_upload_mgr *upload_index;
+
+ /* Information on uploaded vertex buffers:
+ */
+ struct {
+ unsigned stride; /* in bytes between successive vertices */
+ unsigned offset; /* in bytes, of first vertex in bo */
+ unsigned vertex_count; /* count of valid vertices which may be accessed */
+ struct brw_winsys_buffer *bo;
+ } vb[PIPE_MAX_ATTRIBS];
+
+ unsigned nr_vb; /* currently the same as curr.num_vertex_buffers */
+ } vb;
+
+ struct {
+ /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */
+ struct brw_winsys_buffer *bo;
+ unsigned int offset;
+ unsigned int size;
+ /* Offset to index buffer index to use in CMD_3D_PRIM so that we can
+ * avoid re-uploading the IB packet over and over if we're actually
+ * referencing the same index buffer.
+ */
+ unsigned int start_vertex_offset;
+ } ib;
+
+
+ /* BRW_NEW_URB_ALLOCATIONS:
+ */
+ struct {
+ GLuint vsize; /* vertex size plus header in urb registers */
+ GLuint csize; /* constant buffer size in urb registers */
+ GLuint sfsize; /* setup data size in urb registers */
+
+ GLboolean constrained;
+
+ GLuint nr_vs_entries;
+ GLuint nr_gs_entries;
+ GLuint nr_clip_entries;
+ GLuint nr_sf_entries;
+ GLuint nr_cs_entries;
+
+ GLuint vs_start;
+ GLuint gs_start;
+ GLuint clip_start;
+ GLuint sf_start;
+ GLuint cs_start;
+ } urb;
+
+
+ /* BRW_NEW_CURBE_OFFSETS:
+ */
+ struct {
+ GLuint wm_start; /**< pos of first wm const in CURBE buffer */
+ GLuint wm_size; /**< number of float[4] consts, multiple of 16 */
+ GLuint clip_start;
+ GLuint clip_size;
+ GLuint vs_start;
+ GLuint vs_size;
+ GLuint total_size;
+
+ struct brw_winsys_buffer *curbe_bo;
+ /** Offset within curbe_bo of space for current curbe entry */
+ GLuint curbe_offset;
+ /** Offset within curbe_bo of space for next curbe entry */
+ GLuint curbe_next_offset;
+
+ GLfloat *last_buf;
+ GLuint last_bufsz;
+ /**
+ * Whether we should create a new bo instead of reusing the old one
+ * (if we just dispatch the batch pointing at the old one.
+ */
+ GLboolean need_new_bo;
+ } curbe;
+
+ struct {
+ struct brw_vs_prog_data *prog_data;
+
+ struct brw_winsys_buffer *prog_bo;
+ struct brw_winsys_buffer *state_bo;
+
+ /** Binding table of pointers to surf_bo entries */
+ struct brw_winsys_buffer *bind_bo;
+ struct brw_winsys_buffer *surf_bo[BRW_VS_MAX_SURF];
+ GLuint nr_surfaces;
+ } vs;
+
+ struct {
+ struct brw_gs_prog_data *prog_data;
+
+ GLboolean prog_active;
+ struct brw_winsys_buffer *prog_bo;
+ struct brw_winsys_buffer *state_bo;
+ } gs;
+
+ struct {
+ struct brw_clip_prog_data *prog_data;
+
+ struct brw_winsys_buffer *prog_bo;
+ struct brw_winsys_buffer *state_bo;
+ struct brw_winsys_buffer *vp_bo;
+ } clip;
+
+
+ struct {
+ struct brw_sf_prog_data *prog_data;
+
+ struct brw_winsys_buffer *prog_bo;
+ struct brw_winsys_buffer *state_bo;
+ struct brw_winsys_buffer *vp_bo;
+ } sf;
+
+ struct {
+ struct brw_wm_prog_data *prog_data;
+ struct brw_wm_compile *compile_data;
+
+ /** Input sizes, calculated from active vertex program.
+ * One bit per fragment program input attribute.
+ */
+ /*GLbitfield input_size_masks[4];*/
+
+ /** Array of surface default colors (texture border color) */
+ struct brw_winsys_buffer *sdc_bo[BRW_MAX_TEX_UNIT];
+
+ GLuint render_surf;
+ GLuint nr_surfaces;
+
+ GLuint max_threads;
+ struct brw_winsys_buffer *scratch_bo;
+
+ GLuint sampler_count;
+ struct brw_winsys_buffer *sampler_bo;
+
+ /** Binding table of pointers to surf_bo entries */
+ struct brw_winsys_buffer *bind_bo;
+ struct brw_winsys_buffer *surf_bo[BRW_WM_MAX_SURF];
+
+ struct brw_winsys_buffer *prog_bo;
+ struct brw_winsys_buffer *state_bo;
+ } wm;
+
+
+ struct {
+ struct brw_winsys_buffer *state_bo;
+
+ struct brw_cc_unit_state cc;
+ struct brw_winsys_reloc reloc[1];
+ } cc;
+
+ struct {
+ struct brw_query_object active_head;
+ struct brw_winsys_buffer *bo;
+ int index;
+ GLboolean active;
+ int stats_wm;
+ } query;
+
+ struct {
+ unsigned always_emit_state:1;
+ unsigned always_flush_batch:1;
+ unsigned force_swtnl:1;
+ unsigned no_swtnl:1;
+ } flags;
+
+ /* Used to give every program string a unique id
+ */
+ GLuint program_id;
+};
+
+
+
+/*======================================================================
+ * brw_queryobj.c
+ */
+void brw_init_query(struct brw_context *brw);
+enum pipe_error brw_prepare_query_begin(struct brw_context *brw);
+void brw_emit_query_begin(struct brw_context *brw);
+void brw_emit_query_end(struct brw_context *brw);
+
+/*======================================================================
+ * brw_state_dump.c
+ */
+void brw_debug_batch(struct brw_context *intel);
+
+
+/*======================================================================
+ * brw_pipe_*.c
+ */
+void brw_pipe_blend_init( struct brw_context *brw );
+void brw_pipe_depth_stencil_init( struct brw_context *brw );
+void brw_pipe_framebuffer_init( struct brw_context *brw );
+void brw_pipe_flush_init( struct brw_context *brw );
+void brw_pipe_misc_init( struct brw_context *brw );
+void brw_pipe_query_init( struct brw_context *brw );
+void brw_pipe_rast_init( struct brw_context *brw );
+void brw_pipe_sampler_init( struct brw_context *brw );
+void brw_pipe_shader_init( struct brw_context *brw );
+void brw_pipe_vertex_init( struct brw_context *brw );
+void brw_pipe_clear_init( struct brw_context *brw );
+
+
+void brw_pipe_blend_cleanup( struct brw_context *brw );
+void brw_pipe_depth_stencil_cleanup( struct brw_context *brw );
+void brw_pipe_framebuffer_cleanup( struct brw_context *brw );
+void brw_pipe_flush_cleanup( struct brw_context *brw );
+void brw_pipe_misc_cleanup( struct brw_context *brw );
+void brw_pipe_query_cleanup( struct brw_context *brw );
+void brw_pipe_rast_cleanup( struct brw_context *brw );
+void brw_pipe_sampler_cleanup( struct brw_context *brw );
+void brw_pipe_shader_cleanup( struct brw_context *brw );
+void brw_pipe_vertex_cleanup( struct brw_context *brw );
+void brw_pipe_clear_cleanup( struct brw_context *brw );
+
+void brw_hw_cc_init( struct brw_context *brw );
+void brw_hw_cc_cleanup( struct brw_context *brw );
+
+
+
+void brw_context_flush( struct brw_context *brw );
+
+
+/* brw_urb.c
+ */
+int brw_upload_urb_fence(struct brw_context *brw);
+
+/* brw_curbe.c
+ */
+int brw_upload_cs_urb_state(struct brw_context *brw);
+
+
+/*======================================================================
+ * Inline conversion functions. These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct brw_context *
+brw_context( struct pipe_context *ctx )
+{
+ return (struct brw_context *)ctx;
+}
+
+
+#define BRW_IS_965(brw) ((brw)->chipset.is_965)
+#define BRW_IS_IGDNG(brw) ((brw)->chipset.is_igdng)
+#define BRW_IS_G4X(brw) ((brw)->chipset.is_g4x)
+
+
+#endif
+
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
new file mode 100644
index 00000000000..3f031577d5a
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -0,0 +1,390 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "brw_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "brw_util.h"
+#include "brw_debug.h"
+#include "brw_screen.h"
+
+
+/**
+ * Partition the CURBE between the various users of constant values:
+ * Note that vertex and fragment shaders can now fetch constants out
+ * of constant buffers. We no longer allocatea block of the GRF for
+ * constants. That greatly reduces the demand for space in the CURBE.
+ * Some of the comments within are dated...
+ */
+static int calculate_curbe_offsets( struct brw_context *brw )
+{
+ /* CACHE_NEW_WM_PROG */
+ const GLuint nr_fp_regs = brw->wm.prog_data->curb_read_length;
+
+ /* BRW_NEW_VERTEX_PROGRAM */
+ const GLuint nr_vp_regs = brw->vs.prog_data->curb_read_length;
+ GLuint nr_clip_regs = 0;
+ GLuint total_regs;
+
+ /* PIPE_NEW_CLIP */
+ if (brw->curr.ucp.nr) {
+ GLuint nr_planes = 6 + brw->curr.ucp.nr;
+ nr_clip_regs = (nr_planes * 4 + 15) / 16;
+ }
+
+
+ total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
+
+ /* When this is > 32, want to use a true constant buffer to hold
+ * the extra constants.
+ */
+ assert(total_regs <= 32);
+
+ /* Lazy resize:
+ */
+ if (nr_fp_regs > brw->curbe.wm_size ||
+ nr_vp_regs > brw->curbe.vs_size ||
+ nr_clip_regs != brw->curbe.clip_size ||
+ (total_regs < brw->curbe.total_size / 4 &&
+ brw->curbe.total_size > 16)) {
+
+ GLuint reg = 0;
+
+ /* Calculate a new layout:
+ */
+ reg = 0;
+ brw->curbe.wm_start = reg;
+ brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
+ brw->curbe.clip_start = reg;
+ brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
+ brw->curbe.vs_start = reg;
+ brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
+ brw->curbe.total_size = reg;
+
+ if (BRW_DEBUG & DEBUG_CURBE)
+ debug_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
+ brw->curbe.wm_start,
+ brw->curbe.wm_size,
+ brw->curbe.clip_start,
+ brw->curbe.clip_size,
+ brw->curbe.vs_start,
+ brw->curbe.vs_size );
+
+ brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
+ }
+
+ return 0;
+}
+
+
+const struct brw_tracked_state brw_curbe_offsets = {
+ .dirty = {
+ .mesa = PIPE_NEW_CLIP,
+ .brw = BRW_NEW_VERTEX_PROGRAM,
+ .cache = CACHE_NEW_WM_PROG
+ },
+ .prepare = calculate_curbe_offsets
+};
+
+
+
+
+/* Define the number of curbes within CS's urb allocation. Multiple
+ * urb entries -> multiple curbes. These will be used by
+ * fixed-function hardware in a double-buffering scheme to avoid a
+ * pipeline stall each time the contents of the curbe is changed.
+ */
+int brw_upload_cs_urb_state(struct brw_context *brw)
+{
+ struct brw_cs_urb_state cs_urb;
+ memset(&cs_urb, 0, sizeof(cs_urb));
+
+ /* It appears that this is the state packet for the CS unit, ie. the
+ * urb entries detailed here are housed in the CS range from the
+ * URB_FENCE command.
+ */
+ cs_urb.header.opcode = CMD_CS_URB_STATE;
+ cs_urb.header.length = sizeof(cs_urb)/4 - 2;
+
+ /* BRW_NEW_URB_FENCE */
+ cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
+ cs_urb.bits0.urb_entry_size = brw->urb.csize - 1;
+
+ assert(brw->urb.nr_cs_entries);
+ BRW_CACHED_BATCH_STRUCT(brw, &cs_urb);
+ return 0;
+}
+
+static GLfloat fixed_plane[6][4] = {
+ { 0, 0, -1, 1 },
+ { 0, 0, 1, 1 },
+ { 0, -1, 0, 1 },
+ { 0, 1, 0, 1 },
+ {-1, 0, 0, 1 },
+ { 1, 0, 0, 1 }
+};
+
+/* Upload a new set of constants. Too much variability to go into the
+ * cache mechanism, but maybe would benefit from a comparison against
+ * the current uploaded set of constants.
+ */
+static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
+{
+ struct pipe_screen *screen = brw->base.screen;
+ const GLuint sz = brw->curbe.total_size;
+ const GLuint bufsz = sz * 16 * sizeof(GLfloat);
+ enum pipe_error ret;
+ GLfloat *buf;
+ GLuint i;
+
+ if (sz == 0) {
+ if (brw->curbe.last_buf) {
+ free(brw->curbe.last_buf);
+ brw->curbe.last_buf = NULL;
+ brw->curbe.last_bufsz = 0;
+ }
+ return 0;
+ }
+
+ buf = (GLfloat *) CALLOC(bufsz, 1);
+
+ /* fragment shader constants */
+ if (brw->curbe.wm_size) {
+ const struct brw_fragment_shader *fs = brw->curr.fragment_shader;
+ GLuint offset = brw->curbe.wm_start * 16;
+ GLuint nr_immediate, nr_const;
+
+ nr_immediate = fs->immediates.nr;
+ if (nr_immediate) {
+ memcpy(&buf[offset],
+ fs->immediates.data,
+ nr_immediate * 4 * sizeof(float));
+
+ offset += nr_immediate * 4;
+ }
+
+ nr_const = fs->info.file_max[TGSI_FILE_CONSTANT] + 1;
+/* nr_const = brw->wm.prog_data->nr_params; */
+ if (nr_const) {
+ const GLfloat *value = screen->buffer_map( screen,
+ brw->curr.fragment_constants,
+ PIPE_BUFFER_USAGE_CPU_READ);
+
+ memcpy(&buf[offset], value,
+ nr_const * 4 * sizeof(float));
+
+ screen->buffer_unmap( screen,
+ brw->curr.fragment_constants );
+ }
+ }
+
+
+ /* The clipplanes are actually delivered to both CLIP and VS units.
+ * VS uses them to calculate the outcode bitmasks.
+ */
+ if (brw->curbe.clip_size) {
+ GLuint offset = brw->curbe.clip_start * 16;
+ GLuint j;
+
+ /* If any planes are going this way, send them all this way:
+ */
+ for (i = 0; i < 6; i++) {
+ buf[offset + i * 4 + 0] = fixed_plane[i][0];
+ buf[offset + i * 4 + 1] = fixed_plane[i][1];
+ buf[offset + i * 4 + 2] = fixed_plane[i][2];
+ buf[offset + i * 4 + 3] = fixed_plane[i][3];
+ }
+
+ /* Clip planes:
+ */
+ assert(brw->curr.ucp.nr <= 6);
+ for (j = 0; j < brw->curr.ucp.nr; j++) {
+ buf[offset + i * 4 + 0] = brw->curr.ucp.ucp[j][0];
+ buf[offset + i * 4 + 1] = brw->curr.ucp.ucp[j][1];
+ buf[offset + i * 4 + 2] = brw->curr.ucp.ucp[j][2];
+ buf[offset + i * 4 + 3] = brw->curr.ucp.ucp[j][3];
+ i++;
+ }
+ }
+
+ /* vertex shader constants */
+ if (brw->curbe.vs_size) {
+ GLuint offset = brw->curbe.vs_start * 16;
+ const struct brw_vertex_shader *vs = brw->curr.vertex_shader;
+ GLuint nr_immediate, nr_const;
+
+ nr_immediate = vs->immediates.nr;
+ if (nr_immediate) {
+ memcpy(&buf[offset],
+ vs->immediates.data,
+ nr_immediate * 4 * sizeof(float));
+
+ offset += nr_immediate * 4;
+ }
+
+ nr_const = vs->info.file_max[TGSI_FILE_CONSTANT] + 1;
+ if (nr_const) {
+ /* XXX: note that constant buffers are currently *already* in
+ * buffer objects. If we want to keep on putting them into the
+ * curbe, makes sense to treat constbuf's specially with malloc.
+ */
+ const GLfloat *value = screen->buffer_map( screen,
+ brw->curr.vertex_constants,
+ PIPE_BUFFER_USAGE_CPU_READ);
+
+ /* XXX: what if user's constant buffer is too small?
+ */
+ memcpy(&buf[offset], value, nr_const * 4 * sizeof(float));
+
+ screen->buffer_unmap( screen, brw->curr.vertex_constants );
+ }
+ }
+
+ if (BRW_DEBUG & DEBUG_CURBE) {
+ for (i = 0; i < sz*16; i+=4)
+ debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
+ buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
+
+ debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
+ (void *)brw->curbe.last_buf, (void *)buf,
+ bufsz, brw->curbe.last_bufsz,
+ brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
+ }
+
+ if (brw->curbe.curbe_bo != NULL &&
+ brw->curbe.last_buf &&
+ bufsz == brw->curbe.last_bufsz &&
+ memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
+ /* constants have not changed */
+ FREE(buf);
+ }
+ else {
+ /* constants have changed */
+ FREE(brw->curbe.last_buf);
+
+ brw->curbe.last_buf = buf;
+ brw->curbe.last_bufsz = bufsz;
+
+ if (brw->curbe.curbe_bo != NULL &&
+ (brw->curbe.need_new_bo ||
+ brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size))
+ {
+ bo_reference(&brw->curbe.curbe_bo, NULL);
+ }
+
+ if (brw->curbe.curbe_bo == NULL) {
+ /* Allocate a single page for CURBE entries for this
+ * batchbuffer. They're generally around 64b. We will
+ * discard the curbe buffer after the batch is flushed to
+ * avoid synchronous updates.
+ */
+ ret = brw->sws->bo_alloc(brw->sws,
+ BRW_BUFFER_TYPE_CURBE,
+ 4096, 1 << 6,
+ &brw->curbe.curbe_bo);
+ if (ret)
+ return ret;
+
+ brw->curbe.curbe_next_offset = 0;
+ }
+
+ brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
+ brw->curbe.curbe_next_offset += bufsz;
+ brw->curbe.curbe_next_offset = align(brw->curbe.curbe_next_offset, 64);
+
+ /* Copy data to the buffer:
+ */
+ brw->sws->bo_subdata(brw->curbe.curbe_bo,
+ BRW_DATA_CONSTANT_BUFFER,
+ brw->curbe.curbe_offset,
+ bufsz,
+ buf,
+ NULL, 0);
+ }
+
+ brw_add_validated_bo(brw, brw->curbe.curbe_bo);
+
+ /* Because this provokes an action (ie copy the constants into the
+ * URB), it shouldn't be shortcircuited if identical to the
+ * previous time - because eg. the urb destination may have
+ * changed, or the urb contents different to last time.
+ *
+ * Note that the data referred to is actually copied internally,
+ * not just used in place according to passed pointer.
+ *
+ * It appears that the CS unit takes care of using each available
+ * URB entry (Const URB Entry == CURBE) in turn, and issuing
+ * flushes as necessary when doublebuffering of CURBEs isn't
+ * possible.
+ */
+
+ return 0;
+}
+
+static enum pipe_error emit_curbe_buffer(struct brw_context *brw)
+{
+ GLuint sz = brw->curbe.total_size;
+
+ BEGIN_BATCH(2, IGNORE_CLIPRECTS);
+ if (sz == 0) {
+ OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
+ OUT_BATCH(0);
+ } else {
+ OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
+ OUT_RELOC(brw->curbe.curbe_bo,
+ BRW_USAGE_STATE,
+ (sz - 1) + brw->curbe.curbe_offset);
+ }
+ ADVANCE_BATCH();
+ return 0;
+}
+
+const struct brw_tracked_state brw_curbe_buffer = {
+ .dirty = {
+ .mesa = (PIPE_NEW_FRAGMENT_CONSTANTS |
+ PIPE_NEW_VERTEX_CONSTANTS |
+ PIPE_NEW_CLIP),
+ .brw = (BRW_NEW_FRAGMENT_PROGRAM |
+ BRW_NEW_VERTEX_PROGRAM |
+ BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
+ BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
+ BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_BATCH),
+ .cache = (CACHE_NEW_WM_PROG)
+ },
+ .prepare = prepare_curbe_buffer,
+ .emit = emit_curbe_buffer,
+};
+
diff --git a/src/gallium/drivers/i965/brw_debug.h b/src/gallium/drivers/i965/brw_debug.h
new file mode 100644
index 00000000000..ae8e9254a68
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_debug.h
@@ -0,0 +1,43 @@
+#ifndef BRW_DEBUG_H
+#define BRW_DEBUG_H
+
+/* ================================================================
+ * Debugging:
+ */
+
+#define DEBUG_TEXTURE 0x1
+#define DEBUG_STATE 0x2
+#define DEBUG_IOCTL 0x4
+#define DEBUG_BLIT 0x8
+#define DEBUG_CURBE 0x10
+#define DEBUG_FALLBACKS 0x20
+#define DEBUG_VERBOSE 0x40
+#define DEBUG_BATCH 0x80
+#define DEBUG_PIXEL 0x100
+#define DEBUG_WINSYS 0x200
+#define DEBUG_MIN_URB 0x400
+#define DEBUG_DISASSEM 0x800
+#define DEBUG_unused3 0x1000
+#define DEBUG_SYNC 0x2000
+#define DEBUG_PRIMS 0x4000
+#define DEBUG_VERTS 0x8000
+#define DEBUG_unused4 0x10000
+#define DEBUG_DMA 0x20000
+#define DEBUG_SANITY 0x40000
+#define DEBUG_SLEEP 0x80000
+#define DEBUG_STATS 0x100000
+#define DEBUG_unused5 0x200000
+#define DEBUG_SINGLE_THREAD 0x400000
+#define DEBUG_WM 0x800000
+#define DEBUG_URB 0x1000000
+#define DEBUG_VS 0x2000000
+
+#ifdef DEBUG
+extern int BRW_DEBUG;
+#else
+#define BRW_DEBUG 0
+#endif
+
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h
new file mode 100644
index 00000000000..e201ce4d7ce
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_defines.h
@@ -0,0 +1,847 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_DEFINES_H
+#define BRW_DEFINES_H
+
+/* 3D state:
+ */
+#define _3DOP_3DSTATE_PIPELINED 0x0
+#define _3DOP_3DSTATE_NONPIPELINED 0x1
+#define _3DOP_3DCONTROL 0x2
+#define _3DOP_3DPRIMITIVE 0x3
+
+#define _3DSTATE_PIPELINED_POINTERS 0x00
+#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
+#define _3DSTATE_VERTEX_BUFFERS 0x08
+#define _3DSTATE_VERTEX_ELEMENTS 0x09
+#define _3DSTATE_INDEX_BUFFER 0x0A
+#define _3DSTATE_VF_STATISTICS 0x0B
+#define _3DSTATE_DRAWING_RECTANGLE 0x00
+#define _3DSTATE_CONSTANT_COLOR 0x01
+#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
+#define _3DSTATE_CHROMA_KEY 0x04
+#define _3DSTATE_DEPTH_BUFFER 0x05
+#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
+#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
+#define _3DSTATE_LINE_STIPPLE 0x08
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
+#define _3DCONTROL 0x00
+
+#define PIPE_CONTROL_NOWRITE 0x00
+#define PIPE_CONTROL_WRITEIMMEDIATE 0x01
+#define PIPE_CONTROL_WRITEDEPTH 0x02
+#define PIPE_CONTROL_WRITETIMESTAMP 0x03
+
+#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00
+#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01
+
+#define _3DPRIM_POINTLIST 0x01
+#define _3DPRIM_LINELIST 0x02
+#define _3DPRIM_LINESTRIP 0x03
+#define _3DPRIM_TRILIST 0x04
+#define _3DPRIM_TRISTRIP 0x05
+#define _3DPRIM_TRIFAN 0x06
+#define _3DPRIM_QUADLIST 0x07
+#define _3DPRIM_QUADSTRIP 0x08
+#define _3DPRIM_LINELIST_ADJ 0x09
+#define _3DPRIM_LINESTRIP_ADJ 0x0A
+#define _3DPRIM_TRILIST_ADJ 0x0B
+#define _3DPRIM_TRISTRIP_ADJ 0x0C
+#define _3DPRIM_TRISTRIP_REVERSE 0x0D
+#define _3DPRIM_POLYGON 0x0E
+#define _3DPRIM_RECTLIST 0x0F
+#define _3DPRIM_LINELOOP 0x10
+#define _3DPRIM_POINTLIST_BF 0x11
+#define _3DPRIM_LINESTRIP_CONT 0x12
+#define _3DPRIM_LINESTRIP_BF 0x13
+#define _3DPRIM_LINESTRIP_CONT_BF 0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
+
+#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
+#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
+
+#define BRW_ANISORATIO_2 0
+#define BRW_ANISORATIO_4 1
+#define BRW_ANISORATIO_6 2
+#define BRW_ANISORATIO_8 3
+#define BRW_ANISORATIO_10 4
+#define BRW_ANISORATIO_12 5
+#define BRW_ANISORATIO_14 6
+#define BRW_ANISORATIO_16 7
+
+#define BRW_BLENDFACTOR_ONE 0x1
+#define BRW_BLENDFACTOR_SRC_COLOR 0x2
+#define BRW_BLENDFACTOR_SRC_ALPHA 0x3
+#define BRW_BLENDFACTOR_DST_ALPHA 0x4
+#define BRW_BLENDFACTOR_DST_COLOR 0x5
+#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
+#define BRW_BLENDFACTOR_CONST_COLOR 0x7
+#define BRW_BLENDFACTOR_CONST_ALPHA 0x8
+#define BRW_BLENDFACTOR_SRC1_COLOR 0x9
+#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A
+#define BRW_BLENDFACTOR_ZERO 0x11
+#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12
+#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13
+#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14
+#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15
+#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17
+#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18
+#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19
+#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
+
+#define BRW_BLENDFUNCTION_ADD 0
+#define BRW_BLENDFUNCTION_SUBTRACT 1
+#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2
+#define BRW_BLENDFUNCTION_MIN 3
+#define BRW_BLENDFUNCTION_MAX 4
+
+#define BRW_ALPHATEST_FORMAT_UNORM8 0
+#define BRW_ALPHATEST_FORMAT_FLOAT32 1
+
+#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0
+#define BRW_CHROMAKEY_REPLACE_BLACK 1
+
+#define BRW_CLIP_API_OGL 0
+#define BRW_CLIP_API_DX 1
+
+#define BRW_CLIPMODE_NORMAL 0
+#define BRW_CLIPMODE_CLIP_ALL 1
+#define BRW_CLIPMODE_CLIP_NON_REJECTED 2
+#define BRW_CLIPMODE_REJECT_ALL 3
+#define BRW_CLIPMODE_ACCEPT_ALL 4
+#define BRW_CLIPMODE_KERNEL_CLIP 5
+
+#define BRW_CLIP_NDCSPACE 0
+#define BRW_CLIP_SCREENSPACE 1
+
+#define BRW_COMPAREFUNCTION_ALWAYS 0
+#define BRW_COMPAREFUNCTION_NEVER 1
+#define BRW_COMPAREFUNCTION_LESS 2
+#define BRW_COMPAREFUNCTION_EQUAL 3
+#define BRW_COMPAREFUNCTION_LEQUAL 4
+#define BRW_COMPAREFUNCTION_GREATER 5
+#define BRW_COMPAREFUNCTION_NOTEQUAL 6
+#define BRW_COMPAREFUNCTION_GEQUAL 7
+
+#define BRW_COVERAGE_PIXELS_HALF 0
+#define BRW_COVERAGE_PIXELS_1 1
+#define BRW_COVERAGE_PIXELS_2 2
+#define BRW_COVERAGE_PIXELS_4 3
+
+#define BRW_CULLMODE_BOTH 0
+#define BRW_CULLMODE_NONE 1
+#define BRW_CULLMODE_FRONT 2
+#define BRW_CULLMODE_BACK 3
+
+#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0
+#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
+
+#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
+#define BRW_DEPTHFORMAT_D32_FLOAT 1
+#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2
+#define BRW_DEPTHFORMAT_D16_UNORM 5
+
+#define BRW_FLOATING_POINT_IEEE_754 0
+#define BRW_FLOATING_POINT_NON_IEEE_754 1
+
+#define BRW_FRONTWINDING_CW 0
+#define BRW_FRONTWINDING_CCW 1
+
+#define BRW_SPRITE_POINT_ENABLE 16
+
+#define BRW_INDEX_BYTE 0
+#define BRW_INDEX_WORD 1
+#define BRW_INDEX_DWORD 2
+
+#define BRW_LOGICOPFUNCTION_CLEAR 0
+#define BRW_LOGICOPFUNCTION_NOR 1
+#define BRW_LOGICOPFUNCTION_AND_INVERTED 2
+#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3
+#define BRW_LOGICOPFUNCTION_AND_REVERSE 4
+#define BRW_LOGICOPFUNCTION_INVERT 5
+#define BRW_LOGICOPFUNCTION_XOR 6
+#define BRW_LOGICOPFUNCTION_NAND 7
+#define BRW_LOGICOPFUNCTION_AND 8
+#define BRW_LOGICOPFUNCTION_EQUIV 9
+#define BRW_LOGICOPFUNCTION_NOOP 10
+#define BRW_LOGICOPFUNCTION_OR_INVERTED 11
+#define BRW_LOGICOPFUNCTION_COPY 12
+#define BRW_LOGICOPFUNCTION_OR_REVERSE 13
+#define BRW_LOGICOPFUNCTION_OR 14
+#define BRW_LOGICOPFUNCTION_SET 15
+
+#define BRW_MAPFILTER_NEAREST 0x0
+#define BRW_MAPFILTER_LINEAR 0x1
+#define BRW_MAPFILTER_ANISOTROPIC 0x2
+
+#define BRW_MIPFILTER_NONE 0
+#define BRW_MIPFILTER_NEAREST 1
+#define BRW_MIPFILTER_LINEAR 3
+
+#define BRW_POLYGON_FRONT_FACING 0
+#define BRW_POLYGON_BACK_FACING 1
+
+#define BRW_PREFILTER_ALWAYS 0x0
+#define BRW_PREFILTER_NEVER 0x1
+#define BRW_PREFILTER_LESS 0x2
+#define BRW_PREFILTER_EQUAL 0x3
+#define BRW_PREFILTER_LEQUAL 0x4
+#define BRW_PREFILTER_GREATER 0x5
+#define BRW_PREFILTER_NOTEQUAL 0x6
+#define BRW_PREFILTER_GEQUAL 0x7
+
+#define BRW_PROVOKING_VERTEX_0 0
+#define BRW_PROVOKING_VERTEX_1 1
+#define BRW_PROVOKING_VERTEX_2 2
+
+#define BRW_RASTRULE_UPPER_LEFT 0
+#define BRW_RASTRULE_UPPER_RIGHT 1
+/* These are listed as "Reserved, but not seen as useful"
+ * in Intel documentation (page 212, "Point Rasterization Rule",
+ * section 7.4 "SF Pipeline State Summary", of document
+ * "Intel® 965 Express Chipset Family and Intel® G35 Express
+ * Chipset Graphics Controller Programmer's Reference Manual,
+ * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
+ * available at
+ * http://intellinuxgraphics.org/documentation.html
+ * at the time of this writing).
+ *
+ * These appear to be supported on at least some
+ * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT
+ * is useful when using OpenGL to render to a FBO
+ * (which has the pixel coordinate Y orientation inverted
+ * with respect to the normal OpenGL pixel coordinate system).
+ */
+#define BRW_RASTRULE_LOWER_LEFT 2
+#define BRW_RASTRULE_LOWER_RIGHT 3
+
+#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0
+#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1
+#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2
+
+#define BRW_STENCILOP_KEEP 0
+#define BRW_STENCILOP_ZERO 1
+#define BRW_STENCILOP_REPLACE 2
+#define BRW_STENCILOP_INCRSAT 3
+#define BRW_STENCILOP_DECRSAT 4
+#define BRW_STENCILOP_INCR 5
+#define BRW_STENCILOP_DECR 6
+#define BRW_STENCILOP_INVERT 7
+
+#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0
+#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1
+
+#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
+#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001
+#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002
+#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
+#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
+#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005
+#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
+#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
+#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
+#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040
+#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041
+#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042
+#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043
+#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044
+#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045
+#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046
+#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
+#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
+#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082
+#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083
+#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
+#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085
+#define BRW_SURFACEFORMAT_R32G32_SINT 0x086
+#define BRW_SURFACEFORMAT_R32G32_UINT 0x087
+#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
+#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
+#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A
+#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B
+#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C
+#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D
+#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
+#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
+#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090
+#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091
+#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092
+#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
+#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
+#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095
+#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
+#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
+#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
+#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
+#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
+#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
+#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC
+#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD
+#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE
+#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF
+#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
+#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
+#define BRW_SURFACEFORMAT_R32_SINT 0x0D6
+#define BRW_SURFACEFORMAT_R32_UINT 0x0D7
+#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8
+#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
+#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
+#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF
+#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0
+#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1
+#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2
+#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3
+#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4
+#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
+#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
+#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
+#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0
+#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1
+#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2
+#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
+#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
+#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
+#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6
+#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7
+#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8
+#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
+#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106
+#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107
+#define BRW_SURFACEFORMAT_R8G8_SINT 0x108
+#define BRW_SURFACEFORMAT_R8G8_UINT 0x109
+#define BRW_SURFACEFORMAT_R16_UNORM 0x10A
+#define BRW_SURFACEFORMAT_R16_SNORM 0x10B
+#define BRW_SURFACEFORMAT_R16_SINT 0x10C
+#define BRW_SURFACEFORMAT_R16_UINT 0x10D
+#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E
+#define BRW_SURFACEFORMAT_I16_UNORM 0x111
+#define BRW_SURFACEFORMAT_L16_UNORM 0x112
+#define BRW_SURFACEFORMAT_A16_UNORM 0x113
+#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114
+#define BRW_SURFACEFORMAT_I16_FLOAT 0x115
+#define BRW_SURFACEFORMAT_L16_FLOAT 0x116
+#define BRW_SURFACEFORMAT_A16_FLOAT 0x117
+#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118
+#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
+#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C
+#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D
+#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E
+#define BRW_SURFACEFORMAT_R16_USCALED 0x11F
+#define BRW_SURFACEFORMAT_R8_UNORM 0x140
+#define BRW_SURFACEFORMAT_R8_SNORM 0x141
+#define BRW_SURFACEFORMAT_R8_SINT 0x142
+#define BRW_SURFACEFORMAT_R8_UINT 0x143
+#define BRW_SURFACEFORMAT_A8_UNORM 0x144
+#define BRW_SURFACEFORMAT_I8_UNORM 0x145
+#define BRW_SURFACEFORMAT_L8_UNORM 0x146
+#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147
+#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148
+#define BRW_SURFACEFORMAT_R8_SSCALED 0x149
+#define BRW_SURFACEFORMAT_R8_USCALED 0x14A
+#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C
+#define BRW_SURFACEFORMAT_R1_UINT 0x181
+#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
+#define BRW_SURFACEFORMAT_BC1_UNORM 0x186
+#define BRW_SURFACEFORMAT_BC2_UNORM 0x187
+#define BRW_SURFACEFORMAT_BC3_UNORM 0x188
+#define BRW_SURFACEFORMAT_BC4_UNORM 0x189
+#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A
+#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
+#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
+#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
+#define BRW_SURFACEFORMAT_MONO8 0x18E
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
+#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190
+#define BRW_SURFACEFORMAT_DXT1_RGB 0x191
+#define BRW_SURFACEFORMAT_FXT1 0x192
+#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193
+#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194
+#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195
+#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196
+#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
+#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198
+#define BRW_SURFACEFORMAT_BC4_SNORM 0x199
+#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A
+#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C
+#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D
+#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
+#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F
+#define BRW_SURFACEFORMAT_INVALID 0xFFF
+
+#define BRW_SURFACERETURNFORMAT_FLOAT32 0
+#define BRW_SURFACERETURNFORMAT_S1 1
+
+#define BRW_SURFACE_1D 0
+#define BRW_SURFACE_2D 1
+#define BRW_SURFACE_3D 2
+#define BRW_SURFACE_CUBE 3
+#define BRW_SURFACE_BUFFER 4
+#define BRW_SURFACE_NULL 7
+
+#define BRW_TEXCOORDMODE_WRAP 0
+#define BRW_TEXCOORDMODE_MIRROR 1
+#define BRW_TEXCOORDMODE_CLAMP 2
+#define BRW_TEXCOORDMODE_CUBE 3
+#define BRW_TEXCOORDMODE_CLAMP_BORDER 4
+#define BRW_TEXCOORDMODE_MIRROR_ONCE 5
+
+#define BRW_THREAD_PRIORITY_NORMAL 0
+#define BRW_THREAD_PRIORITY_HIGH 1
+
+#define BRW_TILEWALK_XMAJOR 0
+#define BRW_TILEWALK_YMAJOR 1
+
+#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0
+#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1
+
+/* Execution Unit (EU) defines
+ */
+
+#define BRW_ALIGN_1 0
+#define BRW_ALIGN_16 1
+
+#define BRW_ADDRESS_DIRECT 0
+#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1
+
+#define BRW_CHANNEL_X 0
+#define BRW_CHANNEL_Y 1
+#define BRW_CHANNEL_Z 2
+#define BRW_CHANNEL_W 3
+
+#define BRW_COMPRESSION_NONE 0
+#define BRW_COMPRESSION_2NDHALF 1
+#define BRW_COMPRESSION_COMPRESSED 2
+
+#define BRW_CONDITIONAL_NONE 0
+#define BRW_CONDITIONAL_Z 1
+#define BRW_CONDITIONAL_NZ 2
+#define BRW_CONDITIONAL_EQ 1 /* Z */
+#define BRW_CONDITIONAL_NEQ 2 /* NZ */
+#define BRW_CONDITIONAL_G 3
+#define BRW_CONDITIONAL_GE 4
+#define BRW_CONDITIONAL_L 5
+#define BRW_CONDITIONAL_LE 6
+#define BRW_CONDITIONAL_R 7
+#define BRW_CONDITIONAL_O 8
+#define BRW_CONDITIONAL_U 9
+
+#define BRW_DEBUG_NONE 0
+#define BRW_DEBUG_BREAKPOINT 1
+
+#define BRW_DEPENDENCY_NORMAL 0
+#define BRW_DEPENDENCY_NOTCLEARED 1
+#define BRW_DEPENDENCY_NOTCHECKED 2
+#define BRW_DEPENDENCY_DISABLE 3
+
+#define BRW_EXECUTE_1 0
+#define BRW_EXECUTE_2 1
+#define BRW_EXECUTE_4 2
+#define BRW_EXECUTE_8 3
+#define BRW_EXECUTE_16 4
+#define BRW_EXECUTE_32 5
+
+#define BRW_HORIZONTAL_STRIDE_0 0
+#define BRW_HORIZONTAL_STRIDE_1 1
+#define BRW_HORIZONTAL_STRIDE_2 2
+#define BRW_HORIZONTAL_STRIDE_4 3
+
+#define BRW_INSTRUCTION_NORMAL 0
+#define BRW_INSTRUCTION_SATURATE 1
+
+#define BRW_MASK_ENABLE 0
+#define BRW_MASK_DISABLE 1
+
+#define BRW_OPCODE_MOV 1
+#define BRW_OPCODE_SEL 2
+#define BRW_OPCODE_NOT 4
+#define BRW_OPCODE_AND 5
+#define BRW_OPCODE_OR 6
+#define BRW_OPCODE_XOR 7
+#define BRW_OPCODE_SHR 8
+#define BRW_OPCODE_SHL 9
+#define BRW_OPCODE_RSR 10
+#define BRW_OPCODE_RSL 11
+#define BRW_OPCODE_ASR 12
+#define BRW_OPCODE_CMP 16
+#define BRW_OPCODE_CMPN 17
+#define BRW_OPCODE_JMPI 32
+#define BRW_OPCODE_IF 34
+#define BRW_OPCODE_IFF 35
+#define BRW_OPCODE_ELSE 36
+#define BRW_OPCODE_ENDIF 37
+#define BRW_OPCODE_DO 38
+#define BRW_OPCODE_WHILE 39
+#define BRW_OPCODE_BREAK 40
+#define BRW_OPCODE_CONTINUE 41
+#define BRW_OPCODE_HALT 42
+#define BRW_OPCODE_MSAVE 44
+#define BRW_OPCODE_MRESTORE 45
+#define BRW_OPCODE_PUSH 46
+#define BRW_OPCODE_POP 47
+#define BRW_OPCODE_WAIT 48
+#define BRW_OPCODE_SEND 49
+#define BRW_OPCODE_ADD 64
+#define BRW_OPCODE_MUL 65
+#define BRW_OPCODE_AVG 66
+#define BRW_OPCODE_FRC 67
+#define BRW_OPCODE_RNDU 68
+#define BRW_OPCODE_RNDD 69
+#define BRW_OPCODE_RNDE 70
+#define BRW_OPCODE_RNDZ 71
+#define BRW_OPCODE_MAC 72
+#define BRW_OPCODE_MACH 73
+#define BRW_OPCODE_LZD 74
+#define BRW_OPCODE_SAD2 80
+#define BRW_OPCODE_SADA2 81
+#define BRW_OPCODE_DP4 84
+#define BRW_OPCODE_DPH 85
+#define BRW_OPCODE_DP3 86
+#define BRW_OPCODE_DP2 87
+#define BRW_OPCODE_DPA2 88
+#define BRW_OPCODE_LINE 89
+#define BRW_OPCODE_NOP 126
+
+#define BRW_PREDICATE_NONE 0
+#define BRW_PREDICATE_NORMAL 1
+#define BRW_PREDICATE_ALIGN1_ANYV 2
+#define BRW_PREDICATE_ALIGN1_ALLV 3
+#define BRW_PREDICATE_ALIGN1_ANY2H 4
+#define BRW_PREDICATE_ALIGN1_ALL2H 5
+#define BRW_PREDICATE_ALIGN1_ANY4H 6
+#define BRW_PREDICATE_ALIGN1_ALL4H 7
+#define BRW_PREDICATE_ALIGN1_ANY8H 8
+#define BRW_PREDICATE_ALIGN1_ALL8H 9
+#define BRW_PREDICATE_ALIGN1_ANY16H 10
+#define BRW_PREDICATE_ALIGN1_ALL16H 11
+#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4
+#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5
+#define BRW_PREDICATE_ALIGN16_ANY4H 6
+#define BRW_PREDICATE_ALIGN16_ALL4H 7
+
+#define BRW_ARCHITECTURE_REGISTER_FILE 0
+#define BRW_GENERAL_REGISTER_FILE 1
+#define BRW_MESSAGE_REGISTER_FILE 2
+#define BRW_IMMEDIATE_VALUE 3
+
+#define BRW_REGISTER_TYPE_UD 0
+#define BRW_REGISTER_TYPE_D 1
+#define BRW_REGISTER_TYPE_UW 2
+#define BRW_REGISTER_TYPE_W 3
+#define BRW_REGISTER_TYPE_UB 4
+#define BRW_REGISTER_TYPE_B 5
+#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
+#define BRW_REGISTER_TYPE_HF 6
+#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
+#define BRW_REGISTER_TYPE_F 7
+
+#define BRW_ARF_NULL 0x00
+#define BRW_ARF_ADDRESS 0x10
+#define BRW_ARF_ACCUMULATOR 0x20
+#define BRW_ARF_FLAG 0x30
+#define BRW_ARF_MASK 0x40
+#define BRW_ARF_MASK_STACK 0x50
+#define BRW_ARF_MASK_STACK_DEPTH 0x60
+#define BRW_ARF_STATE 0x70
+#define BRW_ARF_CONTROL 0x80
+#define BRW_ARF_NOTIFICATION_COUNT 0x90
+#define BRW_ARF_IP 0xA0
+
+#define BRW_AMASK 0
+#define BRW_IMASK 1
+#define BRW_LMASK 2
+#define BRW_CMASK 3
+
+
+
+#define BRW_THREAD_NORMAL 0
+#define BRW_THREAD_ATOMIC 1
+#define BRW_THREAD_SWITCH 2
+
+#define BRW_VERTICAL_STRIDE_0 0
+#define BRW_VERTICAL_STRIDE_1 1
+#define BRW_VERTICAL_STRIDE_2 2
+#define BRW_VERTICAL_STRIDE_4 3
+#define BRW_VERTICAL_STRIDE_8 4
+#define BRW_VERTICAL_STRIDE_16 5
+#define BRW_VERTICAL_STRIDE_32 6
+#define BRW_VERTICAL_STRIDE_64 7
+#define BRW_VERTICAL_STRIDE_128 8
+#define BRW_VERTICAL_STRIDE_256 9
+#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
+
+#define BRW_WIDTH_1 0
+#define BRW_WIDTH_2 1
+#define BRW_WIDTH_4 2
+#define BRW_WIDTH_8 3
+#define BRW_WIDTH_16 4
+
+#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0
+#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1
+#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2
+#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3
+#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4
+#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5
+#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6
+#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7
+#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8
+#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9
+#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10
+#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11
+
+#define BRW_POLYGON_FACING_FRONT 0
+#define BRW_POLYGON_FACING_BACK 1
+
+#define BRW_MESSAGE_TARGET_NULL 0
+#define BRW_MESSAGE_TARGET_MATH 1
+#define BRW_MESSAGE_TARGET_SAMPLER 2
+#define BRW_MESSAGE_TARGET_GATEWAY 3
+#define BRW_MESSAGE_TARGET_DATAPORT_READ 4
+#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5
+#define BRW_MESSAGE_TARGET_URB 6
+#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7
+
+#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0
+#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2
+#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3
+
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
+#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3
+#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
+#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
+
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3
+
+/* for IGDNG only */
+#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
+#define BRW_SAMPLER_SIMD_MODE_SIMD8 1
+#define BRW_SAMPLER_SIMD_MODE_SIMD16 2
+#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3
+
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
+#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
+#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3
+#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4
+
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
+
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
+
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
+
+#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0
+#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1
+#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
+
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
+
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
+#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
+#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
+
+#define BRW_MATH_FUNCTION_INV 1
+#define BRW_MATH_FUNCTION_LOG 2
+#define BRW_MATH_FUNCTION_EXP 3
+#define BRW_MATH_FUNCTION_SQRT 4
+#define BRW_MATH_FUNCTION_RSQ 5
+#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */
+#define BRW_MATH_FUNCTION_COS 7 /* was 8 */
+#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */
+#define BRW_MATH_FUNCTION_TAN 9
+#define BRW_MATH_FUNCTION_POW 10
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12
+#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13
+
+#define BRW_MATH_INTEGER_UNSIGNED 0
+#define BRW_MATH_INTEGER_SIGNED 1
+
+#define BRW_MATH_PRECISION_FULL 0
+#define BRW_MATH_PRECISION_PARTIAL 1
+
+#define BRW_MATH_SATURATE_NONE 0
+#define BRW_MATH_SATURATE_SATURATE 1
+
+#define BRW_MATH_DATA_VECTOR 0
+#define BRW_MATH_DATA_SCALAR 1
+
+#define BRW_URB_OPCODE_WRITE 0
+
+#define BRW_URB_SWIZZLE_NONE 0
+#define BRW_URB_SWIZZLE_INTERLEAVE 1
+#define BRW_URB_SWIZZLE_TRANSPOSE 2
+
+#define BRW_SCRATCH_SPACE_SIZE_1K 0
+#define BRW_SCRATCH_SPACE_SIZE_2K 1
+#define BRW_SCRATCH_SPACE_SIZE_4K 2
+#define BRW_SCRATCH_SPACE_SIZE_8K 3
+#define BRW_SCRATCH_SPACE_SIZE_16K 4
+#define BRW_SCRATCH_SPACE_SIZE_32K 5
+#define BRW_SCRATCH_SPACE_SIZE_64K 6
+#define BRW_SCRATCH_SPACE_SIZE_128K 7
+#define BRW_SCRATCH_SPACE_SIZE_256K 8
+#define BRW_SCRATCH_SPACE_SIZE_512K 9
+#define BRW_SCRATCH_SPACE_SIZE_1M 10
+#define BRW_SCRATCH_SPACE_SIZE_2M 11
+
+
+
+
+#define CMD_URB_FENCE 0x6000
+#define CMD_CS_URB_STATE 0x6001
+#define CMD_CONST_BUFFER 0x6002
+
+#define CMD_STATE_BASE_ADDRESS 0x6101
+#define CMD_STATE_INSN_POINTER 0x6102
+#define CMD_PIPELINE_SELECT_965 0x6104
+#define CMD_PIPELINE_SELECT_GM45 0x6904
+
+#define CMD_PIPELINED_STATE_POINTERS 0x7800
+#define CMD_BINDING_TABLE_PTRS 0x7801
+
+#define CMD_VERTEX_BUFFER 0x7808
+# define BRW_VB0_INDEX_SHIFT 27
+# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26)
+# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26)
+# define BRW_VB0_PITCH_SHIFT 0
+
+#define CMD_VERTEX_ELEMENT 0x7809
+# define BRW_VE0_INDEX_SHIFT 27
+# define BRW_VE0_FORMAT_SHIFT 16
+# define BRW_VE0_VALID (1 << 26)
+# define BRW_VE0_SRC_OFFSET_SHIFT 0
+# define BRW_VE1_COMPONENT_NOSTORE 0
+# define BRW_VE1_COMPONENT_STORE_SRC 1
+# define BRW_VE1_COMPONENT_STORE_0 2
+# define BRW_VE1_COMPONENT_STORE_1_FLT 3
+# define BRW_VE1_COMPONENT_STORE_1_INT 4
+# define BRW_VE1_COMPONENT_STORE_VID 5
+# define BRW_VE1_COMPONENT_STORE_IID 6
+# define BRW_VE1_COMPONENT_STORE_PID 7
+# define BRW_VE1_COMPONENT_0_SHIFT 28
+# define BRW_VE1_COMPONENT_1_SHIFT 24
+# define BRW_VE1_COMPONENT_2_SHIFT 20
+# define BRW_VE1_COMPONENT_3_SHIFT 16
+# define BRW_VE1_DST_OFFSET_SHIFT 0
+
+#define CMD_INDEX_BUFFER 0x780a
+#define CMD_VF_STATISTICS_965 0x780b
+#define CMD_VF_STATISTICS_GM45 0x680b
+
+#define CMD_DRAW_RECT 0x7900
+#define CMD_BLEND_CONSTANT_COLOR 0x7901
+#define CMD_CHROMA_KEY 0x7904
+#define CMD_DEPTH_BUFFER 0x7905
+#define CMD_POLY_STIPPLE_OFFSET 0x7906
+#define CMD_POLY_STIPPLE_PATTERN 0x7907
+#define CMD_LINE_STIPPLE_PATTERN 0x7908
+#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
+#define CMD_AA_LINE_PARAMETERS 0x790a
+
+#define CMD_PIPE_CONTROL 0x7a00
+
+#define CMD_3D_PRIM 0x7b00
+
+#define CMD_MI_FLUSH 0x0200
+
+
+/* Various values from the R0 vertex header:
+ */
+#define R02_PRIM_END 0x1
+#define R02_PRIM_START 0x2
+
+#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \
+ (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */
+
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c
new file mode 100644
index 00000000000..65db27248b1
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_disasm.c
@@ -0,0 +1,922 @@
+/*
+ * Copyright © 2008 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no representations
+ * about the suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <stdarg.h>
+
+#include "brw_disasm.h"
+#include "brw_structs.h"
+#include "brw_reg.h"
+#include "brw_defines.h"
+
+struct {
+ char *name;
+ int nsrc;
+ int ndst;
+} opcode[128] = {
+ [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
+
+ [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 },
+ [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
+};
+
+char *conditional_modifier[16] = {
+ [BRW_CONDITIONAL_NONE] = "",
+ [BRW_CONDITIONAL_Z] = ".e",
+ [BRW_CONDITIONAL_NZ] = ".ne",
+ [BRW_CONDITIONAL_G] = ".g",
+ [BRW_CONDITIONAL_GE] = ".ge",
+ [BRW_CONDITIONAL_L] = ".l",
+ [BRW_CONDITIONAL_LE] = ".le",
+ [BRW_CONDITIONAL_R] = ".r",
+ [BRW_CONDITIONAL_O] = ".o",
+ [BRW_CONDITIONAL_U] = ".u",
+};
+
+char *negate[2] = {
+ [0] = "",
+ [1] = "-",
+};
+
+char *_abs[2] = {
+ [0] = "",
+ [1] = "(abs)",
+};
+
+char *vert_stride[16] = {
+ [0] = "0",
+ [1] = "1",
+ [2] = "2",
+ [3] = "4",
+ [4] = "8",
+ [5] = "16",
+ [6] = "32",
+ [15] = "VxH",
+};
+
+char *width[8] = {
+ [0] = "1",
+ [1] = "2",
+ [2] = "4",
+ [3] = "8",
+ [4] = "16",
+};
+
+char *horiz_stride[4] = {
+ [0] = "0",
+ [1] = "1",
+ [2] = "2",
+ [3] = "4"
+};
+
+char *chan_sel[4] = {
+ [0] = "x",
+ [1] = "y",
+ [2] = "z",
+ [3] = "w",
+};
+
+char *dest_condmod[16] = {
+ [0] = NULL
+};
+
+char *debug_ctrl[2] = {
+ [0] = "",
+ [1] = ".breakpoint"
+};
+
+char *saturate[2] = {
+ [0] = "",
+ [1] = ".sat"
+};
+
+char *exec_size[8] = {
+ [0] = "1",
+ [1] = "2",
+ [2] = "4",
+ [3] = "8",
+ [4] = "16",
+ [5] = "32"
+};
+
+char *pred_inv[2] = {
+ [0] = "+",
+ [1] = "-"
+};
+
+char *pred_ctrl_align16[16] = {
+ [1] = "",
+ [2] = ".x",
+ [3] = ".y",
+ [4] = ".z",
+ [5] = ".w",
+ [6] = ".any4h",
+ [7] = ".all4h",
+};
+
+char *pred_ctrl_align1[16] = {
+ [1] = "",
+ [2] = ".anyv",
+ [3] = ".allv",
+ [4] = ".any2h",
+ [5] = ".all2h",
+ [6] = ".any4h",
+ [7] = ".all4h",
+ [8] = ".any8h",
+ [9] = ".all8h",
+ [10] = ".any16h",
+ [11] = ".all16h",
+};
+
+char *thread_ctrl[4] = {
+ [0] = "",
+ [2] = "switch"
+};
+
+char *compr_ctrl[4] = {
+ [0] = "",
+ [1] = "sechalf",
+ [2] = "compr",
+};
+
+char *dep_ctrl[4] = {
+ [0] = "",
+ [1] = "NoDDClr",
+ [2] = "NoDDChk",
+ [3] = "NoDDClr,NoDDChk",
+};
+
+char *mask_ctrl[4] = {
+ [0] = "",
+ [1] = "nomask",
+};
+
+char *access_mode[2] = {
+ [0] = "align1",
+ [1] = "align16",
+};
+
+char *reg_encoding[8] = {
+ [0] = "UD",
+ [1] = "D",
+ [2] = "UW",
+ [3] = "W",
+ [4] = "UB",
+ [5] = "B",
+ [7] = "F"
+};
+
+char *imm_encoding[8] = {
+ [0] = "UD",
+ [1] = "D",
+ [2] = "UW",
+ [3] = "W",
+ [5] = "VF",
+ [5] = "V",
+ [7] = "F"
+};
+
+char *reg_file[4] = {
+ [0] = "A",
+ [1] = "g",
+ [2] = "m",
+ [3] = "imm",
+};
+
+char *writemask[16] = {
+ [0x0] = ".",
+ [0x1] = ".x",
+ [0x2] = ".y",
+ [0x3] = ".xy",
+ [0x4] = ".z",
+ [0x5] = ".xz",
+ [0x6] = ".yz",
+ [0x7] = ".xyz",
+ [0x8] = ".w",
+ [0x9] = ".xw",
+ [0xa] = ".yw",
+ [0xb] = ".xyw",
+ [0xc] = ".zw",
+ [0xd] = ".xzw",
+ [0xe] = ".yzw",
+ [0xf] = "",
+};
+
+char *end_of_thread[2] = {
+ [0] = "",
+ [1] = "EOT"
+};
+
+char *target_function[16] = {
+ [BRW_MESSAGE_TARGET_NULL] = "null",
+ [BRW_MESSAGE_TARGET_MATH] = "math",
+ [BRW_MESSAGE_TARGET_SAMPLER] = "sampler",
+ [BRW_MESSAGE_TARGET_GATEWAY] = "gateway",
+ [BRW_MESSAGE_TARGET_DATAPORT_READ] = "read",
+ [BRW_MESSAGE_TARGET_DATAPORT_WRITE] = "write",
+ [BRW_MESSAGE_TARGET_URB] = "urb",
+ [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner"
+};
+
+char *math_function[16] = {
+ [BRW_MATH_FUNCTION_INV] = "inv",
+ [BRW_MATH_FUNCTION_LOG] = "log",
+ [BRW_MATH_FUNCTION_EXP] = "exp",
+ [BRW_MATH_FUNCTION_SQRT] = "sqrt",
+ [BRW_MATH_FUNCTION_RSQ] = "rsq",
+ [BRW_MATH_FUNCTION_SIN] = "sin",
+ [BRW_MATH_FUNCTION_COS] = "cos",
+ [BRW_MATH_FUNCTION_SINCOS] = "sincos",
+ [BRW_MATH_FUNCTION_TAN] = "tan",
+ [BRW_MATH_FUNCTION_POW] = "pow",
+ [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod",
+ [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intmod",
+ [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intdiv",
+};
+
+char *math_saturate[2] = {
+ [0] = "",
+ [1] = "sat"
+};
+
+char *math_signed[2] = {
+ [0] = "",
+ [1] = "signed"
+};
+
+char *math_scalar[2] = {
+ [0] = "",
+ [1] = "scalar"
+};
+
+char *math_precision[2] = {
+ [0] = "",
+ [1] = "partial_precision"
+};
+
+char *urb_swizzle[4] = {
+ [BRW_URB_SWIZZLE_NONE] = "",
+ [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
+ [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose",
+};
+
+char *urb_allocate[2] = {
+ [0] = "",
+ [1] = "allocate"
+};
+
+char *urb_used[2] = {
+ [0] = "",
+ [1] = "used"
+};
+
+char *urb_complete[2] = {
+ [0] = "",
+ [1] = "complete"
+};
+
+char *sampler_target_format[4] = {
+ [0] = "F",
+ [2] = "UD",
+ [3] = "D"
+};
+
+
+static int column;
+
+static int string (FILE *file, char *string)
+{
+ fputs (string, file);
+ column += strlen (string);
+ return 0;
+}
+
+static int format (FILE *f, char *format, ...)
+{
+ char buf[1024];
+ va_list args;
+ va_start (args, format);
+
+ vsnprintf (buf, sizeof (buf) - 1, format, args);
+ string (f, buf);
+ return 0;
+}
+
+static int newline (FILE *f)
+{
+ putc ('\n', f);
+ column = 0;
+ return 0;
+}
+
+static int pad (FILE *f, int c)
+{
+ do
+ string (f, " ");
+ while (column < c);
+ return 0;
+}
+
+static int control (FILE *file, char *name, char *ctrl[], GLuint id, int *space)
+{
+ if (!ctrl[id]) {
+ fprintf (file, "*** invalid %s value %d ",
+ name, id);
+ return 1;
+ }
+ if (ctrl[id][0])
+ {
+ if (space && *space)
+ string (file, " ");
+ string (file, ctrl[id]);
+ if (space)
+ *space = 1;
+ }
+ return 0;
+}
+
+static int print_opcode (FILE *file, int id)
+{
+ if (!opcode[id].name) {
+ format (file, "*** invalid opcode value %d ", id);
+ return 1;
+ }
+ string (file, opcode[id].name);
+ return 0;
+}
+
+static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
+{
+ int err = 0;
+ if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
+ switch (_reg_nr & 0xf0) {
+ case BRW_ARF_NULL:
+ string (file, "null");
+ return -1;
+ case BRW_ARF_ADDRESS:
+ format (file, "a%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_ACCUMULATOR:
+ format (file, "acc%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_MASK:
+ format (file, "mask%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_MASK_STACK:
+ format (file, "msd%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_STATE:
+ format (file, "sr%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_CONTROL:
+ format (file, "cr%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_NOTIFICATION_COUNT:
+ format (file, "n%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_IP:
+ string (file, "ip");
+ return -1;
+ break;
+ default:
+ format (file, "ARF%d", _reg_nr);
+ break;
+ }
+ } else {
+ err |= control (file, "src reg file", reg_file, _reg_file, NULL);
+ format (file, "%d", _reg_nr);
+ }
+ return err;
+}
+
+static int dest (FILE *file, const struct brw_instruction *inst)
+{
+ int err = 0;
+
+ if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da1.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da1.dest_subreg_nr);
+ format (file, "<%d>", inst->bits1.da1.dest_horiz_stride);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
+ }
+ else
+ {
+ string (file, "g[a0");
+ if (inst->bits1.ia1.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.ia1.dest_subreg_nr);
+ if (inst->bits1.ia1.dest_indirect_offset)
+ format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
+ string (file, "]");
+ format (file, "<%d>", inst->bits1.ia1.dest_horiz_stride);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL);
+ }
+ }
+ else
+ {
+ if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da16.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da16.dest_subreg_nr);
+ string (file, "<1>");
+ err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
+ }
+ else
+ {
+ err = 1;
+ string (file, "Indirect align16 address mode not supported");
+ }
+ }
+
+ return 0;
+}
+
+static int src_align1_region (FILE *file,
+ GLuint _vert_stride, GLuint _width, GLuint _horiz_stride)
+{
+ int err = 0;
+ string (file, "<");
+ err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+ string (file, ",");
+ err |= control (file, "width", width, _width, NULL);
+ string (file, ",");
+ err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL);
+ string (file, ">");
+ return err;
+}
+
+static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
+ GLuint _vert_stride, GLuint _width, GLuint _horiz_stride,
+ GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ err |= reg (file, _reg_file, reg_num);
+ if (err == -1)
+ return 0;
+ if (sub_reg_num)
+ format (file, ".%d", sub_reg_num);
+ src_align1_region (file, _vert_stride, _width, _horiz_stride);
+ err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+ return err;
+}
+
+static int src_ia1 (FILE *file,
+ GLuint type,
+ GLuint _reg_file,
+ GLint _addr_imm,
+ GLuint _addr_subreg_nr,
+ GLuint _negate,
+ GLuint __abs,
+ GLuint _addr_mode,
+ GLuint _horiz_stride,
+ GLuint _width,
+ GLuint _vert_stride)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ string (file, "g[a0");
+ if (_addr_subreg_nr)
+ format (file, ".%d", _addr_subreg_nr);
+ if (_addr_imm)
+ format (file, " %d", _addr_imm);
+ string (file, "]");
+ src_align1_region (file, _vert_stride, _width, _horiz_stride);
+ err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+ return err;
+}
+
+static int src_da16 (FILE *file,
+ GLuint _reg_type,
+ GLuint _reg_file,
+ GLuint _vert_stride,
+ GLuint _reg_nr,
+ GLuint _subreg_nr,
+ GLuint __abs,
+ GLuint _negate,
+ GLuint swz_x,
+ GLuint swz_y,
+ GLuint swz_z,
+ GLuint swz_w)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ err |= reg (file, _reg_file, _reg_nr);
+ if (err == -1)
+ return 0;
+ if (_subreg_nr)
+ format (file, ".%d", _subreg_nr);
+ string (file, "<");
+ err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+ string (file, ",1,1>");
+ err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == BRW_CHANNEL_X &&
+ swz_y == BRW_CHANNEL_Y &&
+ swz_z == BRW_CHANNEL_Z &&
+ swz_w == BRW_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ return err;
+}
+
+
+static int imm (FILE *file, GLuint type, const struct brw_instruction *inst) {
+ switch (type) {
+ case BRW_REGISTER_TYPE_UD:
+ format (file, "0x%08xUD", inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_D:
+ format (file, "%dD", inst->bits3.d);
+ break;
+ case BRW_REGISTER_TYPE_UW:
+ format (file, "0x%04xUW", (uint16_t) inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_W:
+ format (file, "%dW", (int16_t) inst->bits3.d);
+ break;
+ case BRW_REGISTER_TYPE_UB:
+ format (file, "0x%02xUB", (int8_t) inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_VF:
+ format (file, "Vector Float");
+ break;
+ case BRW_REGISTER_TYPE_V:
+ format (file, "0x%08xV", inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_F:
+ format (file, "%-gF", inst->bits3.f);
+ }
+ return 0;
+}
+
+static int src0 (FILE *file, const struct brw_instruction *inst)
+{
+ if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE)
+ return imm (file, inst->bits1.da1.src0_reg_type,
+ inst);
+ else if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da1 (file,
+ inst->bits1.da1.src0_reg_type,
+ inst->bits1.da1.src0_reg_file,
+ inst->bits2.da1.src0_vert_stride,
+ inst->bits2.da1.src0_width,
+ inst->bits2.da1.src0_horiz_stride,
+ inst->bits2.da1.src0_reg_nr,
+ inst->bits2.da1.src0_subreg_nr,
+ inst->bits2.da1.src0_abs,
+ inst->bits2.da1.src0_negate);
+ }
+ else
+ {
+ return src_ia1 (file,
+ inst->bits1.ia1.src0_reg_type,
+ inst->bits1.ia1.src0_reg_file,
+ inst->bits2.ia1.src0_indirect_offset,
+ inst->bits2.ia1.src0_subreg_nr,
+ inst->bits2.ia1.src0_negate,
+ inst->bits2.ia1.src0_abs,
+ inst->bits2.ia1.src0_address_mode,
+ inst->bits2.ia1.src0_horiz_stride,
+ inst->bits2.ia1.src0_width,
+ inst->bits2.ia1.src0_vert_stride);
+ }
+ }
+ else
+ {
+ if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da16 (file,
+ inst->bits1.da16.src0_reg_type,
+ inst->bits1.da16.src0_reg_file,
+ inst->bits2.da16.src0_vert_stride,
+ inst->bits2.da16.src0_reg_nr,
+ inst->bits2.da16.src0_subreg_nr,
+ inst->bits2.da16.src0_abs,
+ inst->bits2.da16.src0_negate,
+ inst->bits2.da16.src0_swz_x,
+ inst->bits2.da16.src0_swz_y,
+ inst->bits2.da16.src0_swz_z,
+ inst->bits2.da16.src0_swz_w);
+ }
+ else
+ {
+ string (file, "Indirect align16 address mode not supported");
+ return 1;
+ }
+ }
+}
+
+static int src1 (FILE *file, const struct brw_instruction *inst)
+{
+ if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
+ return imm (file, inst->bits1.da1.src1_reg_type,
+ inst);
+ else if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da1 (file,
+ inst->bits1.da1.src1_reg_type,
+ inst->bits1.da1.src1_reg_file,
+ inst->bits3.da1.src1_vert_stride,
+ inst->bits3.da1.src1_width,
+ inst->bits3.da1.src1_horiz_stride,
+ inst->bits3.da1.src1_reg_nr,
+ inst->bits3.da1.src1_subreg_nr,
+ inst->bits3.da1.src1_abs,
+ inst->bits3.da1.src1_negate);
+ }
+ else
+ {
+ return src_ia1 (file,
+ inst->bits1.ia1.src1_reg_type,
+ inst->bits1.ia1.src1_reg_file,
+ inst->bits3.ia1.src1_indirect_offset,
+ inst->bits3.ia1.src1_subreg_nr,
+ inst->bits3.ia1.src1_negate,
+ inst->bits3.ia1.src1_abs,
+ inst->bits3.ia1.src1_address_mode,
+ inst->bits3.ia1.src1_horiz_stride,
+ inst->bits3.ia1.src1_width,
+ inst->bits3.ia1.src1_vert_stride);
+ }
+ }
+ else
+ {
+ if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da16 (file,
+ inst->bits1.da16.src1_reg_type,
+ inst->bits1.da16.src1_reg_file,
+ inst->bits3.da16.src1_vert_stride,
+ inst->bits3.da16.src1_reg_nr,
+ inst->bits3.da16.src1_subreg_nr,
+ inst->bits3.da16.src1_abs,
+ inst->bits3.da16.src1_negate,
+ inst->bits3.da16.src1_swz_x,
+ inst->bits3.da16.src1_swz_y,
+ inst->bits3.da16.src1_swz_z,
+ inst->bits3.da16.src1_swz_w);
+ }
+ else
+ {
+ string (file, "Indirect align16 address mode not supported");
+ return 1;
+ }
+ }
+}
+
+int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
+{
+ int err = 0;
+ int space = 0;
+
+ if (inst->header.predicate_control) {
+ string (file, "(");
+ err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL);
+ string (file, "f0");
+ if (inst->bits2.da1.flag_reg_nr)
+ format (file, ".%d", inst->bits2.da1.flag_reg_nr);
+ if (inst->header.access_mode == BRW_ALIGN_1)
+ err |= control (file, "predicate control align1", pred_ctrl_align1,
+ inst->header.predicate_control, NULL);
+ else
+ err |= control (file, "predicate control align16", pred_ctrl_align16,
+ inst->header.predicate_control, NULL);
+ string (file, ") ");
+ }
+
+ err |= print_opcode (file, inst->header.opcode);
+ err |= control (file, "saturate", saturate, inst->header.saturate, NULL);
+ err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL);
+
+ if (inst->header.opcode != BRW_OPCODE_SEND)
+ err |= control (file, "conditional modifier", conditional_modifier,
+ inst->header.destreg__conditionalmod, NULL);
+
+ if (inst->header.opcode != BRW_OPCODE_NOP) {
+ string (file, "(");
+ err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL);
+ string (file, ")");
+ }
+
+ if (inst->header.opcode == BRW_OPCODE_SEND)
+ format (file, " %d", inst->header.destreg__conditionalmod);
+
+ if (opcode[inst->header.opcode].ndst > 0) {
+ pad (file, 16);
+ err |= dest (file, inst);
+ }
+ if (opcode[inst->header.opcode].nsrc > 0) {
+ pad (file, 32);
+ err |= src0 (file, inst);
+ }
+ if (opcode[inst->header.opcode].nsrc > 1) {
+ pad (file, 48);
+ err |= src1 (file, inst);
+ }
+
+ if (inst->header.opcode == BRW_OPCODE_SEND) {
+ newline (file);
+ pad (file, 16);
+ space = 0;
+ err |= control (file, "target function", target_function,
+ inst->bits3.generic.msg_target, &space);
+ switch (inst->bits3.generic.msg_target) {
+ case BRW_MESSAGE_TARGET_MATH:
+ err |= control (file, "math function", math_function,
+ inst->bits3.math.function, &space);
+ err |= control (file, "math saturate", math_saturate,
+ inst->bits3.math.saturate, &space);
+ err |= control (file, "math signed", math_signed,
+ inst->bits3.math.int_type, &space);
+ err |= control (file, "math scalar", math_scalar,
+ inst->bits3.math.data_type, &space);
+ err |= control (file, "math precision", math_precision,
+ inst->bits3.math.precision, &space);
+ break;
+ case BRW_MESSAGE_TARGET_SAMPLER:
+ format (file, " (%d, %d, ",
+ inst->bits3.sampler.binding_table_index,
+ inst->bits3.sampler.sampler);
+ err |= control (file, "sampler target format", sampler_target_format,
+ inst->bits3.sampler.return_format, NULL);
+ string (file, ")");
+ break;
+ case BRW_MESSAGE_TARGET_DATAPORT_WRITE:
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.dp_write.binding_table_index,
+ (inst->bits3.dp_write.pixel_scoreboard_clear << 3) |
+ inst->bits3.dp_write.msg_control,
+ inst->bits3.dp_write.msg_type,
+ inst->bits3.dp_write.send_commit_msg);
+ break;
+ case BRW_MESSAGE_TARGET_URB:
+ format (file, " %d", inst->bits3.urb.offset);
+ space = 1;
+ err |= control (file, "urb swizzle", urb_swizzle,
+ inst->bits3.urb.swizzle_control, &space);
+ err |= control (file, "urb allocate", urb_allocate,
+ inst->bits3.urb.allocate, &space);
+ err |= control (file, "urb used", urb_used,
+ inst->bits3.urb.used, &space);
+ err |= control (file, "urb complete", urb_complete,
+ inst->bits3.urb.complete, &space);
+ break;
+ case BRW_MESSAGE_TARGET_THREAD_SPAWNER:
+ break;
+ default:
+ format (file, "unsupported target %d", inst->bits3.generic.msg_target);
+ break;
+ }
+ if (space)
+ string (file, " ");
+ format (file, "mlen %d",
+ inst->bits3.generic.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic.response_length);
+ }
+ pad (file, 64);
+ if (inst->header.opcode != BRW_OPCODE_NOP) {
+ string (file, "{");
+ space = 1;
+ err |= control(file, "access mode", access_mode, inst->header.access_mode, &space);
+ err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
+ err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
+ err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space);
+ err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
+ if (inst->header.opcode == BRW_OPCODE_SEND)
+ err |= control (file, "end of thread", end_of_thread,
+ inst->bits3.generic.end_of_thread, &space);
+ if (space)
+ string (file, " ");
+ string (file, "}");
+ }
+ string (file, ";");
+ newline (file);
+ return err;
+}
+
+
+int brw_disasm (FILE *file,
+ const struct brw_instruction *inst,
+ unsigned count)
+{
+ int i, err;
+
+ for (i = 0; i < count; i++) {
+ err = brw_disasm_insn(stderr, &inst[i]);
+ if (err)
+ return err;
+ }
+
+ fprintf(file, "\n");
+ return 0;
+}
+
diff --git a/src/gallium/drivers/i965/brw_disasm.h b/src/gallium/drivers/i965/brw_disasm.h
new file mode 100644
index 00000000000..ba5b109c483
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_disasm.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright © 2008 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no representations
+ * about the suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#ifndef BRW_DISASM_H
+#define BRW_DISASM_H
+
+#include <stdio.h>
+
+struct brw_instruction;
+
+int brw_disasm_insn (FILE *file, const struct brw_instruction *inst);
+int brw_disasm (FILE *file,
+ const struct brw_instruction *inst,
+ unsigned count);
+
+#endif
+
diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
new file mode 100644
index 00000000000..ea8d39adaf0
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -0,0 +1,289 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_prim.h"
+#include "util/u_upload_mgr.h"
+
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_debug.h"
+#include "brw_screen.h"
+
+#include "brw_batchbuffer.h"
+
+
+static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = {
+ _3DPRIM_POINTLIST,
+ _3DPRIM_LINELIST,
+ _3DPRIM_LINELOOP,
+ _3DPRIM_LINESTRIP,
+ _3DPRIM_TRILIST,
+ _3DPRIM_TRISTRIP,
+ _3DPRIM_TRIFAN,
+ _3DPRIM_QUADLIST,
+ _3DPRIM_QUADSTRIP,
+ _3DPRIM_POLYGON
+};
+
+
+
+/* When the primitive changes, set a state bit and re-validate. Not
+ * the nicest and would rather deal with this by having all the
+ * programs be immune to the active primitive (ie. cope with all
+ * possibilities). That may not be realistic however.
+ */
+static int brw_set_prim(struct brw_context *brw, unsigned prim )
+{
+
+ if (BRW_DEBUG & DEBUG_PRIMS)
+ debug_printf("PRIM: %s\n", u_prim_name(prim));
+
+ if (prim != brw->primitive) {
+ unsigned reduced_prim;
+
+ brw->primitive = prim;
+ brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
+
+ reduced_prim = u_reduced_prim(prim);
+ if (reduced_prim != brw->reduced_primitive) {
+ brw->reduced_primitive = reduced_prim;
+ brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
+ }
+ }
+
+ return prim_to_hw_prim[prim];
+}
+
+
+
+static int brw_emit_prim(struct brw_context *brw,
+ unsigned start,
+ unsigned count,
+ boolean indexed,
+ uint32_t hw_prim)
+{
+ struct brw_3d_primitive prim_packet;
+ int ret;
+
+ if (BRW_DEBUG & DEBUG_PRIMS)
+ debug_printf("%s start %d count %d indexed %d hw_prim %d\n",
+ __FUNCTION__, start, count, indexed, hw_prim);
+
+ prim_packet.header.opcode = CMD_3D_PRIM;
+ prim_packet.header.length = sizeof(prim_packet)/4 - 2;
+ prim_packet.header.pad = 0;
+ prim_packet.header.topology = hw_prim;
+ prim_packet.header.indexed = indexed;
+
+ prim_packet.verts_per_instance = count;
+ prim_packet.start_vert_location = start;
+ if (indexed)
+ prim_packet.start_vert_location += brw->ib.start_vertex_offset;
+ prim_packet.instance_count = 1;
+ prim_packet.start_instance_location = 0;
+ prim_packet.base_vert_location = 0; /* prim->basevertex; XXX: add this to gallium */
+
+
+ /* If we're set to always flush, do it before and after the primitive emit.
+ * We want to catch both missed flushes that hurt instruction/state cache
+ * and missed flushes of the render cache as it heads to other parts of
+ * the besides the draw code.
+ */
+ if (0) {
+ BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
+ ADVANCE_BATCH();
+ }
+ if (prim_packet.verts_per_instance) {
+ ret = brw_batchbuffer_data( brw->batch, &prim_packet,
+ sizeof(prim_packet), LOOP_CLIPRECTS);
+ if (ret)
+ return ret;
+ }
+ if (0) {
+ BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
+ ADVANCE_BATCH();
+ }
+
+ return 0;
+}
+
+
+/* May fail if out of video memory for texture or vbo upload, or on
+ * fallback conditions.
+ */
+static int
+try_draw_range_elements(struct brw_context *brw,
+ struct pipe_buffer *index_buffer,
+ unsigned hw_prim,
+ unsigned start, unsigned count)
+{
+ int ret;
+
+ ret = brw_validate_state(brw);
+ if (ret)
+ return ret;
+
+ /* Check that we can fit our state in with our existing batchbuffer, or
+ * flush otherwise.
+ */
+ ret = brw->sws->check_aperture_space(brw->sws,
+ brw->state.validated_bos,
+ brw->state.validated_bo_count);
+ if (ret)
+ return ret;
+
+ ret = brw_upload_state(brw);
+ if (ret)
+ return ret;
+
+ ret = brw_emit_prim(brw, start, count, index_buffer != NULL, hw_prim);
+ if (ret)
+ return ret;
+
+ if (brw->flags.always_flush_batch)
+ brw_context_flush( brw );
+
+ return 0;
+}
+
+
+static void
+brw_draw_range_elements(struct pipe_context *pipe,
+ struct pipe_buffer *index_buffer,
+ unsigned index_size,
+ unsigned min_index,
+ unsigned max_index,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct brw_context *brw = brw_context(pipe);
+ int ret;
+ uint32_t hw_prim;
+
+ hw_prim = brw_set_prim(brw, mode);
+
+ if (BRW_DEBUG & DEBUG_PRIMS)
+ debug_printf("PRIM: %s start %d count %d index_buffer %p\n",
+ u_prim_name(mode), start, count, (void *)index_buffer);
+
+ /* Potentially trigger upload of new index buffer.
+ *
+ * XXX: do we need to go through state validation to achieve this?
+ * Could just call upload code directly.
+ */
+ if (brw->curr.index_buffer != index_buffer ||
+ brw->curr.index_size != index_size) {
+ pipe_buffer_reference( &brw->curr.index_buffer, index_buffer );
+ brw->curr.index_size = index_size;
+ brw->state.dirty.mesa |= PIPE_NEW_INDEX_BUFFER;
+ }
+
+ /* XXX: do we really care?
+ */
+ if (brw->curr.min_index != min_index ||
+ brw->curr.max_index != max_index)
+ {
+ brw->curr.min_index = min_index;
+ brw->curr.max_index = max_index;
+ brw->state.dirty.mesa |= PIPE_NEW_INDEX_RANGE;
+ }
+
+
+ /* Make a first attempt at drawing:
+ */
+ ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count );
+
+ /* Otherwise, flush and retry:
+ */
+ if (ret != 0) {
+ brw_context_flush( brw );
+ ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count );
+ assert(ret == 0);
+ }
+}
+
+static void
+brw_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *index_buffer,
+ unsigned index_size,
+ unsigned mode,
+ unsigned start, unsigned count)
+{
+ brw_draw_range_elements( pipe, index_buffer,
+ index_size,
+ 0, 0xffffffff,
+ mode,
+ start, count );
+}
+
+static void
+brw_draw_arrays(struct pipe_context *pipe, unsigned mode,
+ unsigned start, unsigned count)
+{
+ brw_draw_elements(pipe, NULL, 0, mode, start, count);
+}
+
+
+
+boolean brw_draw_init( struct brw_context *brw )
+{
+ /* Register our drawing function:
+ */
+ brw->base.draw_arrays = brw_draw_arrays;
+ brw->base.draw_elements = brw_draw_elements;
+ brw->base.draw_range_elements = brw_draw_range_elements;
+
+ /* Create helpers for uploading data in user buffers:
+ */
+ brw->vb.upload_vertex = u_upload_create( brw->base.screen,
+ 128 * 1024,
+ 64,
+ PIPE_BUFFER_USAGE_VERTEX );
+ if (brw->vb.upload_vertex == NULL)
+ return FALSE;
+
+ brw->vb.upload_index = u_upload_create( brw->base.screen,
+ 32 * 1024,
+ 64,
+ PIPE_BUFFER_USAGE_INDEX );
+ if (brw->vb.upload_index == NULL)
+ return FALSE;
+
+ return TRUE;
+}
+
+void brw_draw_cleanup( struct brw_context *brw )
+{
+ u_upload_destroy( brw->vb.upload_vertex );
+ u_upload_destroy( brw->vb.upload_index );
+
+ bo_reference(&brw->ib.bo, NULL);
+}
diff --git a/src/gallium/drivers/i965/brw_draw.h b/src/gallium/drivers/i965/brw_draw.h
new file mode 100644
index 00000000000..8dc5dbce622
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_draw.h
@@ -0,0 +1,39 @@
+ /**************************************************************************
+ *
+ * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef BRW_DRAW_H
+#define BRW_DRAW_H
+
+#include "brw_types.h"
+
+struct brw_context;
+
+boolean brw_draw_init( struct brw_context *brw );
+void brw_draw_cleanup( struct brw_context *brw );
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
new file mode 100644
index 00000000000..a27da5f1c17
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -0,0 +1,542 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_context.h"
+
+#include "util/u_upload_mgr.h"
+#include "util/u_math.h"
+
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_screen.h"
+#include "brw_batchbuffer.h"
+#include "brw_debug.h"
+
+
+
+
+static unsigned brw_translate_surface_format( unsigned id )
+{
+ switch (id) {
+ case PIPE_FORMAT_R64_FLOAT:
+ return BRW_SURFACEFORMAT_R64_FLOAT;
+ case PIPE_FORMAT_R64G64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64_FLOAT;
+ case PIPE_FORMAT_R64G64B64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64B64_FLOAT;
+ case PIPE_FORMAT_R64G64B64A64_FLOAT:
+ return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT;
+
+ case PIPE_FORMAT_R32_FLOAT:
+ return BRW_SURFACEFORMAT_R32_FLOAT;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32_FLOAT;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32B32_FLOAT;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+ case PIPE_FORMAT_R32_UNORM:
+ return BRW_SURFACEFORMAT_R32_UNORM;
+ case PIPE_FORMAT_R32G32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32_UNORM;
+ case PIPE_FORMAT_R32G32B32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32B32_UNORM;
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return BRW_SURFACEFORMAT_R32G32B32A32_UNORM;
+
+ case PIPE_FORMAT_R32_USCALED:
+ return BRW_SURFACEFORMAT_R32_USCALED;
+ case PIPE_FORMAT_R32G32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32_USCALED;
+ case PIPE_FORMAT_R32G32B32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32B32_USCALED;
+ case PIPE_FORMAT_R32G32B32A32_USCALED:
+ return BRW_SURFACEFORMAT_R32G32B32A32_USCALED;
+
+ case PIPE_FORMAT_R32_SNORM:
+ return BRW_SURFACEFORMAT_R32_SNORM;
+ case PIPE_FORMAT_R32G32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32_SNORM;
+ case PIPE_FORMAT_R32G32B32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32B32_SNORM;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ return BRW_SURFACEFORMAT_R32G32B32A32_SNORM;
+
+ case PIPE_FORMAT_R32_SSCALED:
+ return BRW_SURFACEFORMAT_R32_SSCALED;
+ case PIPE_FORMAT_R32G32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32_SSCALED;
+ case PIPE_FORMAT_R32G32B32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32B32_SSCALED;
+ case PIPE_FORMAT_R32G32B32A32_SSCALED:
+ return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return BRW_SURFACEFORMAT_R16_UNORM;
+ case PIPE_FORMAT_R16G16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16_UNORM;
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16B16_UNORM;
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ return BRW_SURFACEFORMAT_R16G16B16A16_UNORM;
+
+ case PIPE_FORMAT_R16_USCALED:
+ return BRW_SURFACEFORMAT_R16_USCALED;
+ case PIPE_FORMAT_R16G16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16_USCALED;
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16B16_USCALED;
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ return BRW_SURFACEFORMAT_R16G16B16A16_USCALED;
+
+ case PIPE_FORMAT_R16_SNORM:
+ return BRW_SURFACEFORMAT_R16_SNORM;
+ case PIPE_FORMAT_R16G16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16_SNORM;
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16B16_SNORM;
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ return BRW_SURFACEFORMAT_R16G16B16A16_SNORM;
+
+ case PIPE_FORMAT_R16_SSCALED:
+ return BRW_SURFACEFORMAT_R16_SSCALED;
+ case PIPE_FORMAT_R16G16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16_SSCALED;
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16B16_SSCALED;
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED;
+
+ case PIPE_FORMAT_R8_UNORM:
+ return BRW_SURFACEFORMAT_R8_UNORM;
+ case PIPE_FORMAT_R8G8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8_UNORM;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8B8_UNORM;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_R8_USCALED:
+ return BRW_SURFACEFORMAT_R8_USCALED;
+ case PIPE_FORMAT_R8G8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8_USCALED;
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8B8_USCALED;
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ return BRW_SURFACEFORMAT_R8G8B8A8_USCALED;
+
+ case PIPE_FORMAT_R8_SNORM:
+ return BRW_SURFACEFORMAT_R8_SNORM;
+ case PIPE_FORMAT_R8G8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8_SNORM;
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8B8_SNORM;
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
+
+ case PIPE_FORMAT_R8_SSCALED:
+ return BRW_SURFACEFORMAT_R8_SSCALED;
+ case PIPE_FORMAT_R8G8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8_SSCALED;
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8B8_SSCALED;
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED;
+
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static unsigned get_index_type(int type)
+{
+ switch (type) {
+ case 1: return BRW_INDEX_BYTE;
+ case 2: return BRW_INDEX_WORD;
+ case 4: return BRW_INDEX_DWORD;
+ default: assert(0); return 0;
+ }
+}
+
+
+static int brw_prepare_vertices(struct brw_context *brw)
+{
+ unsigned int min_index = brw->curr.min_index;
+ unsigned int max_index = brw->curr.max_index;
+ GLuint i;
+ int ret;
+
+ if (BRW_DEBUG & DEBUG_VERTS)
+ debug_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
+
+
+ for (i = 0; i < brw->curr.num_vertex_buffers; i++) {
+ struct pipe_vertex_buffer *vb = &brw->curr.vertex_buffer[i];
+ struct brw_winsys_buffer *bo;
+ struct pipe_buffer *upload_buf = NULL;
+ unsigned offset;
+
+ if (BRW_DEBUG & DEBUG_VERTS)
+ debug_printf("%s vb[%d] user:%d offset:0x%x sz:0x%x stride:0x%x\n",
+ __FUNCTION__, i,
+ brw_buffer_is_user_buffer(vb->buffer),
+ vb->buffer_offset,
+ vb->buffer->size,
+ vb->stride);
+
+ if (brw_buffer_is_user_buffer(vb->buffer)) {
+
+ /* XXX: simplify this. Stop the state trackers from generating
+ * zero-stride buffers & have them use additional constants (or
+ * add support for >1 constant buffer) instead.
+ */
+ unsigned size = (vb->stride == 0 ?
+ vb->buffer->size - vb->buffer_offset :
+ MAX2(vb->buffer->size - vb->buffer_offset,
+ vb->stride * (max_index + 1 - min_index)));
+
+ ret = u_upload_buffer( brw->vb.upload_vertex,
+ vb->buffer_offset + min_index * vb->stride,
+ size,
+ vb->buffer,
+ &offset,
+ &upload_buf );
+ if (ret)
+ return ret;
+
+ bo = brw_buffer(upload_buf)->bo;
+
+ assert(offset + size <= bo->size);
+ }
+ else
+ {
+ offset = vb->buffer_offset;
+ bo = brw_buffer(vb->buffer)->bo;
+ }
+
+ assert(offset < bo->size);
+
+ /* Set up post-upload info about this vertex buffer:
+ */
+ brw->vb.vb[i].offset = offset;
+ brw->vb.vb[i].stride = vb->stride;
+ brw->vb.vb[i].vertex_count = (vb->stride == 0 ?
+ 1 :
+ (bo->size - offset) / vb->stride);
+
+ bo_reference( &brw->vb.vb[i].bo, bo );
+
+ /* Don't need to retain this reference. We have a reference on
+ * the underlying winsys buffer:
+ */
+ pipe_buffer_reference( &upload_buf, NULL );
+ }
+
+ brw->vb.nr_vb = i;
+ brw_prepare_query_begin(brw);
+
+ for (i = 0; i < brw->vb.nr_vb; i++) {
+ brw_add_validated_bo(brw, brw->vb.vb[i].bo);
+ }
+
+ return 0;
+}
+
+static int brw_emit_vertex_buffers( struct brw_context *brw )
+{
+ int i;
+
+ /* If the VS doesn't read any inputs (calculating vertex position from
+ * a state variable for some reason, for example), just bail.
+ *
+ * The stale VB state stays in place, but they don't do anything unless
+ * a VE loads from them.
+ */
+ if (brw->vb.nr_vb == 0) {
+ if (BRW_DEBUG & DEBUG_VERTS)
+ debug_printf("%s: no active vertex buffers\n", __FUNCTION__);
+
+ return 0;
+ }
+
+ /* Emit VB state packets.
+ */
+ BEGIN_BATCH(1 + brw->vb.nr_vb * 4, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
+ ((1 + brw->vb.nr_vb * 4) - 2));
+
+ for (i = 0; i < brw->vb.nr_vb; i++) {
+ OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) |
+ BRW_VB0_ACCESS_VERTEXDATA |
+ (brw->vb.vb[i].stride << BRW_VB0_PITCH_SHIFT));
+ OUT_RELOC(brw->vb.vb[i].bo,
+ BRW_USAGE_VERTEX,
+ brw->vb.vb[i].offset);
+ if (BRW_IS_IGDNG(brw)) {
+ OUT_RELOC(brw->vb.vb[i].bo,
+ BRW_USAGE_VERTEX,
+ brw->vb.vb[i].bo->size - 1);
+ } else
+ OUT_BATCH(brw->vb.vb[i].stride ? brw->vb.vb[i].vertex_count : 0);
+ OUT_BATCH(0); /* Instance data step rate */
+ }
+ ADVANCE_BATCH();
+ return 0;
+}
+
+
+
+
+static int brw_emit_vertex_elements(struct brw_context *brw)
+{
+ GLuint nr = brw->curr.num_vertex_elements;
+ GLuint i;
+
+ brw_emit_query_begin(brw);
+
+ /* If the VS doesn't read any inputs (calculating vertex position from
+ * a state variable for some reason, for example), emit a single pad
+ * VERTEX_ELEMENT struct and bail.
+ *
+ * The stale VB state stays in place, but they don't do anything unless
+ * a VE loads from them.
+ */
+ if (nr == 0) {
+ BEGIN_BATCH(3, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
+ OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
+ BRW_VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
+ (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+ OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
+ ADVANCE_BATCH();
+ return 0;
+ }
+
+ /* Now emit vertex element (VEP) state packets.
+ *
+ */
+ BEGIN_BATCH(1 + nr * 2, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr * 2) - 2));
+ for (i = 0; i < nr; i++) {
+ const struct pipe_vertex_element *input = &brw->curr.vertex_element[i];
+ uint32_t format = brw_translate_surface_format( input->src_format );
+ uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
+ uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
+ uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
+ uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
+
+ switch (input->nr_components) {
+ case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
+ case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
+ case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
+ case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT;
+ break;
+ }
+
+ OUT_BATCH((input->vertex_buffer_index << BRW_VE0_INDEX_SHIFT) |
+ BRW_VE0_VALID |
+ (format << BRW_VE0_FORMAT_SHIFT) |
+ (input->src_offset << BRW_VE0_SRC_OFFSET_SHIFT));
+
+ if (BRW_IS_IGDNG(brw))
+ OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+ (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+ (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+ (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
+ else
+ OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+ (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+ (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+ (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
+ ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
+ }
+ ADVANCE_BATCH();
+ return 0;
+}
+
+
+static int brw_emit_vertices( struct brw_context *brw )
+{
+ int ret;
+
+ ret = brw_emit_vertex_buffers( brw );
+ if (ret)
+ return ret;
+
+ ret = brw_emit_vertex_elements( brw );
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+
+const struct brw_tracked_state brw_vertices = {
+ .dirty = {
+ .mesa = (PIPE_NEW_INDEX_RANGE |
+ PIPE_NEW_VERTEX_BUFFER),
+ .brw = BRW_NEW_BATCH,
+ .cache = 0,
+ },
+ .prepare = brw_prepare_vertices,
+ .emit = brw_emit_vertices,
+};
+
+
+static int brw_prepare_indices(struct brw_context *brw)
+{
+ struct pipe_buffer *index_buffer = brw->curr.index_buffer;
+ struct pipe_buffer *upload_buf = NULL;
+ struct brw_winsys_buffer *bo = NULL;
+ GLuint offset;
+ GLuint index_size;
+ GLuint ib_size;
+ int ret;
+
+ if (index_buffer == NULL)
+ return 0;
+
+ if (BRW_DEBUG & DEBUG_VERTS)
+ debug_printf("%s: index_size:%d index_buffer->size:%d\n",
+ __FUNCTION__,
+ brw->curr.index_size,
+ brw->curr.index_buffer->size);
+
+ ib_size = index_buffer->size;
+ index_size = brw->curr.index_size;
+
+ /* Turn userbuffer into a proper hardware buffer?
+ */
+ if (brw_buffer_is_user_buffer(index_buffer)) {
+
+ ret = u_upload_buffer( brw->vb.upload_index,
+ 0,
+ ib_size,
+ index_buffer,
+ &offset,
+ &upload_buf );
+ if (ret)
+ return ret;
+
+ bo = brw_buffer(upload_buf)->bo;
+
+ /* XXX: annotate the userbuffer with the upload information so
+ * that successive calls don't get re-uploaded.
+ */
+ }
+ else {
+ bo = brw_buffer(index_buffer)->bo;
+ ib_size = bo->size;
+ offset = 0;
+ }
+
+ /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading the
+ * index buffer state when we're just moving the start index of our
+ * drawing.
+ *
+ * In gallium this will happen in the case where successive draw
+ * calls are made with (distinct?) userbuffers, but the upload_mgr
+ * places the data into a single winsys buffer.
+ *
+ * This statechange doesn't raise any state flags and is always
+ * just merged into the final draw packet:
+ */
+ if (1) {
+ assert((offset & (index_size - 1)) == 0);
+ brw->ib.start_vertex_offset = offset / index_size;
+ }
+
+ /* These statechanges trigger a new CMD_INDEX_BUFFER packet:
+ */
+ if (brw->ib.bo != bo ||
+ brw->ib.size != ib_size)
+ {
+ bo_reference(&brw->ib.bo, bo);
+ brw->ib.size = ib_size;
+ brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
+ }
+
+ pipe_buffer_reference( &upload_buf, NULL );
+ brw_add_validated_bo(brw, brw->ib.bo);
+ return 0;
+}
+
+const struct brw_tracked_state brw_indices = {
+ .dirty = {
+ .mesa = PIPE_NEW_INDEX_BUFFER,
+ .brw = 0,
+ .cache = 0,
+ },
+ .prepare = brw_prepare_indices,
+};
+
+static int brw_emit_index_buffer(struct brw_context *brw)
+{
+ /* Emit the indexbuffer packet:
+ */
+ if (brw->ib.bo)
+ {
+ struct brw_indexbuffer ib;
+
+ memset(&ib, 0, sizeof(ib));
+
+ ib.header.bits.opcode = CMD_INDEX_BUFFER;
+ ib.header.bits.length = sizeof(ib)/4 - 2;
+ ib.header.bits.index_format = get_index_type(brw->ib.size);
+ ib.header.bits.cut_index_enable = 0;
+
+ BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+ OUT_BATCH( ib.header.dword );
+ OUT_RELOC(brw->ib.bo,
+ BRW_USAGE_VERTEX,
+ brw->ib.offset);
+ OUT_RELOC(brw->ib.bo,
+ BRW_USAGE_VERTEX,
+ brw->ib.offset + brw->ib.size - 1);
+ OUT_BATCH( 0 );
+ ADVANCE_BATCH();
+ }
+
+ return 0;
+}
+
+const struct brw_tracked_state brw_index_buffer = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER,
+ .cache = 0,
+ },
+ .emit = brw_emit_index_buffer,
+};
diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c
new file mode 100644
index 00000000000..a8fcb5f97eb
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu.c
@@ -0,0 +1,262 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+
+/* How does predicate control work when execution_size != 8? Do I
+ * need to test/set for 0xffff when execution_size is 16?
+ */
+void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value )
+{
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ if (value != 0xff) {
+ if (value != p->flag_value) {
+ brw_push_insn_state(p);
+ brw_MOV(p, brw_flag_reg(), brw_imm_uw(value));
+ p->flag_value = value;
+ brw_pop_insn_state(p);
+ }
+
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ }
+}
+
+void brw_set_predicate_control( struct brw_compile *p, GLuint pc )
+{
+ p->current->header.predicate_control = pc;
+}
+
+void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional )
+{
+ p->current->header.destreg__conditionalmod = conditional;
+}
+
+void brw_set_access_mode( struct brw_compile *p, GLuint access_mode )
+{
+ p->current->header.access_mode = access_mode;
+}
+
+void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control )
+{
+ p->current->header.compression_control = compression_control;
+}
+
+void brw_set_mask_control( struct brw_compile *p, GLuint value )
+{
+ p->current->header.mask_control = value;
+}
+
+void brw_set_saturate( struct brw_compile *p, GLuint value )
+{
+ p->current->header.saturate = value;
+}
+
+void brw_push_insn_state( struct brw_compile *p )
+{
+ assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
+ memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
+ p->current++;
+}
+
+void brw_pop_insn_state( struct brw_compile *p )
+{
+ assert(p->current != p->stack);
+ p->current--;
+}
+
+
+/***********************************************************************
+ */
+void brw_init_compile( struct brw_context *brw, struct brw_compile *p )
+{
+ p->brw = brw;
+ p->nr_insn = 0;
+ p->current = p->stack;
+ memset(p->current, 0, sizeof(p->current[0]));
+
+ /* Some defaults?
+ */
+ brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
+ brw_set_saturate(p, 0);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+
+enum pipe_error brw_get_program( struct brw_compile *p,
+ const GLuint **data,
+ GLuint *sz )
+{
+ GLuint i;
+
+ for (i = 0; i < 8; i++)
+ brw_NOP(p);
+
+ /* Is the generated program malformed for some reason?
+ */
+ if (p->error)
+ return PIPE_ERROR_BAD_INPUT;
+
+ *sz = p->nr_insn * sizeof(struct brw_instruction);
+ *data = (const GLuint *)p->store;
+ return PIPE_OK;
+}
+
+
+
+/**
+ * Subroutine calls require special attention.
+ * Mesa instructions may be expanded into multiple hardware instructions
+ * so the prog_instruction::BranchTarget field can't be used as an index
+ * into the hardware instructions.
+ *
+ * The BranchTarget field isn't needed, however. Mesa's GLSL compiler
+ * emits CAL and BGNSUB instructions with labels that can be used to map
+ * subroutine calls to actual subroutine code blocks.
+ *
+ * The structures and function here implement patching of CAL instructions
+ * so they jump to the right subroutine code...
+ */
+
+
+/**
+ * For each OPCODE_BGNSUB we create one of these.
+ */
+struct brw_eu_label
+{
+ GLuint label; /**< the label number */
+ GLuint position; /**< the position of the brw instruction for this label */
+ struct brw_eu_label *next; /**< next in linked list */
+};
+
+
+/**
+ * For each OPCODE_CAL we create one of these.
+ */
+struct brw_eu_call
+{
+ GLuint call_inst_pos; /**< location of the CAL instruction */
+ GLuint label;
+ struct brw_eu_call *next; /**< next in linked list */
+};
+
+
+/**
+ * Called for each OPCODE_BGNSUB.
+ */
+void
+brw_save_label(struct brw_compile *c, unsigned l, GLuint position)
+{
+ struct brw_eu_label *label = CALLOC_STRUCT(brw_eu_label);
+ label->label = l;
+ label->position = position;
+ label->next = c->first_label;
+ c->first_label = label;
+}
+
+
+/**
+ * Called for each OPCODE_CAL.
+ */
+void
+brw_save_call(struct brw_compile *c, GLuint label, GLuint call_pos)
+{
+ struct brw_eu_call *call = CALLOC_STRUCT(brw_eu_call);
+ call->call_inst_pos = call_pos;
+ call->label = label;
+ call->next = c->first_call;
+ c->first_call = call;
+}
+
+
+/**
+ * Lookup a label, return label's position/offset.
+ */
+static GLuint
+brw_lookup_label(struct brw_compile *c, unsigned l)
+{
+ const struct brw_eu_label *label;
+ for (label = c->first_label; label; label = label->next) {
+ if (l == label->label) {
+ return label->position;
+ }
+ }
+ abort(); /* should never happen */
+ return ~0;
+}
+
+
+/**
+ * When we're done generating code, this function is called to resolve
+ * subroutine calls.
+ */
+void
+brw_resolve_cals(struct brw_compile *c)
+{
+ const struct brw_eu_call *call;
+
+ for (call = c->first_call; call; call = call->next) {
+ const GLuint sub_loc = brw_lookup_label(c, call->label);
+ struct brw_instruction *brw_call_inst = &c->store[call->call_inst_pos];
+ struct brw_instruction *brw_sub_inst = &c->store[sub_loc];
+ GLint offset = brw_sub_inst - brw_call_inst;
+
+ /* patch brw_inst1 to point to brw_inst2 */
+ brw_set_src1(brw_call_inst, brw_imm_d(offset * 16));
+ }
+
+ /* free linked list of calls */
+ {
+ struct brw_eu_call *call, *next;
+ for (call = c->first_call; call; call = next) {
+ next = call->next;
+ FREE(call);
+ }
+ c->first_call = NULL;
+ }
+
+ /* free linked list of labels */
+ {
+ struct brw_eu_label *label, *next;
+ for (label = c->first_label; label; label = next) {
+ next = label->next;
+ FREE(label);
+ }
+ c->first_label = NULL;
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h
new file mode 100644
index 00000000000..af509b2e5f4
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu.h
@@ -0,0 +1,992 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_EU_H
+#define BRW_EU_H
+
+#include "util/u_debug.h"
+#include "pipe/p_defines.h"
+
+#include "brw_structs.h"
+#include "brw_defines.h"
+
+#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
+#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
+
+#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
+
+#define BRW_WRITEMASK_NONE 0x00
+#define BRW_WRITEMASK_X 0x01
+#define BRW_WRITEMASK_Y 0x02
+#define BRW_WRITEMASK_XY 0x03
+#define BRW_WRITEMASK_Z 0x04
+#define BRW_WRITEMASK_XZ 0x05
+#define BRW_WRITEMASK_YZ 0x06
+#define BRW_WRITEMASK_XYZ 0x07
+#define BRW_WRITEMASK_W 0x08
+#define BRW_WRITEMASK_XW 0x09
+#define BRW_WRITEMASK_YW 0x0A
+#define BRW_WRITEMASK_XYW 0x0B
+#define BRW_WRITEMASK_ZW 0x0C
+#define BRW_WRITEMASK_XZW 0x0D
+#define BRW_WRITEMASK_YZW 0x0E
+#define BRW_WRITEMASK_XYZW 0x0F
+
+
+#define REG_SIZE (8*4)
+
+
+/* These aren't hardware structs, just something useful for us to pass around:
+ *
+ * Align1 operation has a lot of control over input ranges. Used in
+ * WM programs to implement shaders decomposed into "channel serial"
+ * or "structure of array" form:
+ */
+struct brw_reg
+{
+ GLuint type:4;
+ GLuint file:2;
+ GLuint nr:8;
+ GLuint subnr:5; /* :1 in align16 */
+ GLuint negate:1; /* source only */
+ GLuint abs:1; /* source only */
+ GLuint vstride:4; /* source only */
+ GLuint width:3; /* src only, align1 only */
+ GLuint hstride:2; /* align1 only */
+ GLuint address_mode:1; /* relative addressing, hopefully! */
+ GLuint pad0:1;
+
+ union {
+ struct {
+ GLuint swizzle:8; /* src only, align16 only */
+ GLuint writemask:4; /* dest only, align16 only */
+ GLint indirect_offset:10; /* relative addressing offset */
+ GLuint pad1:10; /* two dwords total */
+ } bits;
+
+ GLfloat f;
+ GLint d;
+ GLuint ud;
+ } dw1;
+};
+
+
+struct brw_indirect {
+ GLuint addr_subnr:4;
+ GLint addr_offset:10;
+ GLuint pad:18;
+};
+
+
+struct brw_eu_label;
+struct brw_eu_call;
+
+
+
+#define BRW_EU_MAX_INSN_STACK 5
+#define BRW_EU_MAX_INSN 10000
+
+struct brw_compile {
+ struct brw_instruction store[BRW_EU_MAX_INSN];
+ GLuint nr_insn;
+
+ /* Allow clients to push/pop instruction state:
+ */
+ struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
+ struct brw_instruction *current;
+
+ GLuint flag_value;
+ GLboolean single_program_flow;
+ struct brw_context *brw;
+
+ struct brw_eu_label *first_label; /**< linked list of labels */
+ struct brw_eu_call *first_call; /**< linked list of CALs */
+
+ boolean error;
+};
+
+
+void
+brw_save_label(struct brw_compile *c, unsigned label, GLuint position);
+
+void
+brw_save_call(struct brw_compile *c, unsigned label, GLuint call_pos);
+
+void
+brw_resolve_cals(struct brw_compile *c);
+
+
+
+static INLINE int type_sz( GLuint type )
+{
+ switch( type ) {
+ case BRW_REGISTER_TYPE_UD:
+ case BRW_REGISTER_TYPE_D:
+ case BRW_REGISTER_TYPE_F:
+ return 4;
+ case BRW_REGISTER_TYPE_HF:
+ case BRW_REGISTER_TYPE_UW:
+ case BRW_REGISTER_TYPE_W:
+ return 2;
+ case BRW_REGISTER_TYPE_UB:
+ case BRW_REGISTER_TYPE_B:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/**
+ * Construct a brw_reg.
+ * \param file one of the BRW_x_REGISTER_FILE values
+ * \param nr register number/index
+ * \param subnr register sub number
+ * \param type one of BRW_REGISTER_TYPE_x
+ * \param vstride one of BRW_VERTICAL_STRIDE_x
+ * \param width one of BRW_WIDTH_x
+ * \param hstride one of BRW_HORIZONTAL_STRIDE_x
+ * \param swizzle one of BRW_SWIZZLE_x
+ * \param writemask BRW_WRITEMASK_X/Y/Z/W bitfield
+ */
+static INLINE struct brw_reg brw_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr,
+ GLuint type,
+ GLuint vstride,
+ GLuint width,
+ GLuint hstride,
+ GLuint swizzle,
+ GLuint writemask )
+{
+ struct brw_reg reg;
+ if (type == BRW_GENERAL_REGISTER_FILE)
+ assert(nr < BRW_MAX_GRF);
+ else if (type == BRW_MESSAGE_REGISTER_FILE)
+ assert(nr < BRW_MAX_MRF);
+ else if (type == BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(nr <= BRW_ARF_IP);
+
+ reg.type = type;
+ reg.file = file;
+ reg.nr = nr;
+ reg.subnr = subnr * type_sz(type);
+ reg.negate = 0;
+ reg.abs = 0;
+ reg.vstride = vstride;
+ reg.width = width;
+ reg.hstride = hstride;
+ reg.address_mode = BRW_ADDRESS_DIRECT;
+ reg.pad0 = 0;
+
+ /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
+ * set swizzle and writemask to W, as the lower bits of subnr will
+ * be lost when converted to align16. This is probably too much to
+ * keep track of as you'd want it adjusted by suboffset(), etc.
+ * Perhaps fix up when converting to align16?
+ */
+ reg.dw1.bits.swizzle = swizzle;
+ reg.dw1.bits.writemask = writemask;
+ reg.dw1.bits.indirect_offset = 0;
+ reg.dw1.bits.pad1 = 0;
+ return reg;
+}
+
+/** Construct float[16] register */
+static INLINE struct brw_reg brw_vec16_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_16,
+ BRW_WIDTH_16,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ BRW_WRITEMASK_XYZW);
+}
+
+/** Construct float[8] register */
+static INLINE struct brw_reg brw_vec8_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_8,
+ BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ BRW_WRITEMASK_XYZW);
+}
+
+/** Construct float[4] register */
+static INLINE struct brw_reg brw_vec4_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ BRW_WRITEMASK_XYZW);
+}
+
+/** Construct float[2] register */
+static INLINE struct brw_reg brw_vec2_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYXY,
+ BRW_WRITEMASK_XY);
+}
+
+/** Construct float[1] register */
+static INLINE struct brw_reg brw_vec1_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XXXX,
+ BRW_WRITEMASK_X);
+}
+
+
+static INLINE struct brw_reg retype( struct brw_reg reg,
+ GLuint type )
+{
+ reg.type = type;
+ return reg;
+}
+
+static INLINE struct brw_reg suboffset( struct brw_reg reg,
+ GLuint delta )
+{
+ reg.subnr += delta * type_sz(reg.type);
+ return reg;
+}
+
+
+static INLINE struct brw_reg offset( struct brw_reg reg,
+ GLuint delta )
+{
+ reg.nr += delta;
+ return reg;
+}
+
+
+static INLINE struct brw_reg byte_offset( struct brw_reg reg,
+ GLuint bytes )
+{
+ GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
+ reg.nr = newoffset / REG_SIZE;
+ reg.subnr = newoffset % REG_SIZE;
+ return reg;
+}
+
+
+/** Construct unsigned word[16] register */
+static INLINE struct brw_reg brw_uw16_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+/** Construct unsigned word[8] register */
+static INLINE struct brw_reg brw_uw8_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+/** Construct unsigned word[1] register */
+static INLINE struct brw_reg brw_uw1_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+static INLINE struct brw_reg brw_imm_reg( GLuint type )
+{
+ return brw_reg( BRW_IMMEDIATE_VALUE,
+ 0,
+ 0,
+ type,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ 0,
+ 0);
+}
+
+/** Construct float immediate register */
+static INLINE struct brw_reg brw_imm_f( GLfloat f )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
+ imm.dw1.f = f;
+ return imm;
+}
+
+/** Construct integer immediate register */
+static INLINE struct brw_reg brw_imm_d( GLint d )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
+ imm.dw1.d = d;
+ return imm;
+}
+
+/** Construct uint immediate register */
+static INLINE struct brw_reg brw_imm_ud( GLuint ud )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
+ imm.dw1.ud = ud;
+ return imm;
+}
+
+/** Construct ushort immediate register */
+static INLINE struct brw_reg brw_imm_uw( GLushort uw )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
+ imm.dw1.ud = uw | (uw << 16);
+ return imm;
+}
+
+/** Construct short immediate register */
+static INLINE struct brw_reg brw_imm_w( GLshort w )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
+ imm.dw1.d = w | (w << 16);
+ return imm;
+}
+
+/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
+ * numbers alias with _V and _VF below:
+ */
+
+/** Construct vector of eight signed half-byte values */
+static INLINE struct brw_reg brw_imm_v( GLuint v )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_8;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = v;
+ return imm;
+}
+
+/** Construct vector of four 8-bit float values */
+static INLINE struct brw_reg brw_imm_vf( GLuint v )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_4;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = v;
+ return imm;
+}
+
+#define VF_ZERO 0x0
+#define VF_ONE 0x30
+#define VF_NEG (1<<7)
+
+static INLINE struct brw_reg brw_imm_vf4( GLuint v0,
+ GLuint v1,
+ GLuint v2,
+ GLuint v3)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_4;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = ((v0 << 0) |
+ (v1 << 8) |
+ (v2 << 16) |
+ (v3 << 24));
+ return imm;
+}
+
+
+static INLINE struct brw_reg brw_address( struct brw_reg reg )
+{
+ return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
+}
+
+/** Construct float[1] general-purpose register */
+static INLINE struct brw_reg brw_vec1_grf( GLuint nr, GLuint subnr )
+{
+ return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[2] general-purpose register */
+static INLINE struct brw_reg brw_vec2_grf( GLuint nr, GLuint subnr )
+{
+ return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[4] general-purpose register */
+static INLINE struct brw_reg brw_vec4_grf( GLuint nr, GLuint subnr )
+{
+ return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[8] general-purpose register */
+static INLINE struct brw_reg brw_vec8_grf( GLuint nr, GLuint subnr )
+{
+ return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+static INLINE struct brw_reg brw_uw8_grf( GLuint nr, GLuint subnr )
+{
+ return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static INLINE struct brw_reg brw_uw16_grf( GLuint nr, GLuint subnr )
+{
+ return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+/** Construct null register (usually used for setting condition codes) */
+static INLINE struct brw_reg brw_null_reg( void )
+{
+ return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_NULL,
+ 0);
+}
+
+static INLINE struct brw_reg brw_address_reg( GLuint subnr )
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ADDRESS,
+ subnr);
+}
+
+/* If/else instructions break in align16 mode if writemask & swizzle
+ * aren't xyzw. This goes against the convention for other scalar
+ * regs:
+ */
+static INLINE struct brw_reg brw_ip_reg( void )
+{
+ return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_IP,
+ 0,
+ BRW_REGISTER_TYPE_UD,
+ BRW_VERTICAL_STRIDE_4, /* ? */
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, /* NOTE! */
+ BRW_WRITEMASK_XYZW); /* NOTE! */
+}
+
+static INLINE struct brw_reg brw_acc_reg( void )
+{
+ return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ACCUMULATOR,
+ 0);
+}
+
+
+static INLINE struct brw_reg brw_flag_reg( void )
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_FLAG,
+ 0);
+}
+
+
+static INLINE struct brw_reg brw_mask_reg( GLuint subnr )
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_MASK,
+ subnr);
+}
+
+static INLINE struct brw_reg brw_message_reg( GLuint nr )
+{
+ assert(nr < BRW_MAX_MRF);
+ return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
+ nr,
+ 0);
+}
+
+
+
+
+/* This is almost always called with a numeric constant argument, so
+ * make things easy to evaluate at compile time:
+ */
+static INLINE GLuint cvt( GLuint val )
+{
+ switch (val) {
+ case 0: return 0;
+ case 1: return 1;
+ case 2: return 2;
+ case 4: return 3;
+ case 8: return 4;
+ case 16: return 5;
+ case 32: return 6;
+ }
+ return 0;
+}
+
+static INLINE struct brw_reg stride( struct brw_reg reg,
+ GLuint vstride,
+ GLuint width,
+ GLuint hstride )
+{
+ reg.vstride = cvt(vstride);
+ reg.width = cvt(width) - 1;
+ reg.hstride = cvt(hstride);
+ return reg;
+}
+
+
+static INLINE struct brw_reg vec16( struct brw_reg reg )
+{
+ return stride(reg, 16,16,1);
+}
+
+static INLINE struct brw_reg vec8( struct brw_reg reg )
+{
+ return stride(reg, 8,8,1);
+}
+
+static INLINE struct brw_reg vec4( struct brw_reg reg )
+{
+ return stride(reg, 4,4,1);
+}
+
+static INLINE struct brw_reg vec2( struct brw_reg reg )
+{
+ return stride(reg, 2,2,1);
+}
+
+static INLINE struct brw_reg vec1( struct brw_reg reg )
+{
+ return stride(reg, 0,1,0);
+}
+
+
+static INLINE struct brw_reg get_element( struct brw_reg reg, GLuint elt )
+{
+ return vec1(suboffset(reg, elt));
+}
+
+static INLINE struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt )
+{
+ return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
+}
+
+
+static INLINE struct brw_reg brw_swizzle( struct brw_reg reg,
+ GLuint x,
+ GLuint y,
+ GLuint z,
+ GLuint w)
+{
+ reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
+ return reg;
+}
+
+
+static INLINE struct brw_reg brw_swizzle1( struct brw_reg reg,
+ GLuint x )
+{
+ return brw_swizzle(reg, x, x, x, x);
+}
+
+static INLINE struct brw_reg brw_writemask( struct brw_reg reg,
+ GLuint mask )
+{
+ reg.dw1.bits.writemask &= mask;
+ return reg;
+}
+
+static INLINE struct brw_reg brw_set_writemask( struct brw_reg reg,
+ GLuint mask )
+{
+ reg.dw1.bits.writemask = mask;
+ return reg;
+}
+
+static INLINE struct brw_reg negate( struct brw_reg reg )
+{
+ reg.negate ^= 1;
+ return reg;
+}
+
+static INLINE struct brw_reg brw_abs( struct brw_reg reg )
+{
+ reg.abs = 1;
+ return reg;
+}
+
+/***********************************************************************
+ */
+static INLINE struct brw_reg brw_vec4_indirect( GLuint subnr,
+ GLint offset )
+{
+ struct brw_reg reg = brw_vec4_grf(0, 0);
+ reg.subnr = subnr;
+ reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.dw1.bits.indirect_offset = offset;
+ return reg;
+}
+
+static INLINE struct brw_reg brw_vec1_indirect( GLuint subnr,
+ GLint offset )
+{
+ struct brw_reg reg = brw_vec1_grf(0, 0);
+ reg.subnr = subnr;
+ reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.dw1.bits.indirect_offset = offset;
+ return reg;
+}
+
+static INLINE struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset)
+{
+ return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static INLINE struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset)
+{
+ return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static INLINE struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset)
+{
+ return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
+}
+
+static INLINE struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
+}
+
+static INLINE struct brw_reg deref_1d(struct brw_indirect ptr, GLint offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
+}
+
+static INLINE struct brw_reg deref_1ud(struct brw_indirect ptr, GLint offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
+}
+
+static INLINE struct brw_reg get_addr_reg(struct brw_indirect ptr)
+{
+ return brw_address_reg(ptr.addr_subnr);
+}
+
+static INLINE struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset )
+{
+ ptr.addr_offset += offset;
+ return ptr;
+}
+
+static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset )
+{
+ struct brw_indirect ptr;
+ ptr.addr_subnr = addr_subnr;
+ ptr.addr_offset = offset;
+ ptr.pad = 0;
+ return ptr;
+}
+
+/** Do two brw_regs refer to the same register? */
+static INLINE GLboolean
+brw_same_reg(struct brw_reg r1, struct brw_reg r2)
+{
+ return r1.file == r2.file && r1.nr == r2.nr;
+}
+
+static INLINE struct brw_instruction *current_insn( struct brw_compile *p)
+{
+ return &p->store[p->nr_insn];
+}
+
+void brw_pop_insn_state( struct brw_compile *p );
+void brw_push_insn_state( struct brw_compile *p );
+void brw_set_mask_control( struct brw_compile *p, GLuint value );
+void brw_set_saturate( struct brw_compile *p, GLuint value );
+void brw_set_access_mode( struct brw_compile *p, GLuint access_mode );
+void brw_set_compression_control( struct brw_compile *p, GLboolean control );
+void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
+void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
+void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
+
+void brw_init_compile( struct brw_context *, struct brw_compile *p );
+
+enum pipe_error brw_get_program( struct brw_compile *p,
+ const GLuint **program,
+ GLuint *sz );
+
+
+/* Helpers for regular instructions:
+ */
+#define ALU1(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0);
+
+#define ALU2(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1);
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(JMPI)
+ALU2(ADD)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU1(RNDZ)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+
+#undef ALU1
+#undef ALU2
+
+
+
+/* Helpers for SEND instruction:
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle);
+
+void brw_ff_sync(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle);
+
+void brw_fb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot);
+
+void brw_SAMPLE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ GLuint sampler,
+ GLuint writemask,
+ GLuint msg_type,
+ GLuint response_length,
+ GLuint msg_length,
+ GLboolean eot,
+ GLuint header_present,
+ GLuint simd_mode);
+
+void brw_math_16( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint function,
+ GLuint saturate,
+ GLuint msg_reg_nr,
+ struct brw_reg src,
+ GLuint precision );
+
+void brw_math( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint function,
+ GLuint saturate,
+ GLuint msg_reg_nr,
+ struct brw_reg src,
+ GLuint data_type,
+ GLuint precision );
+
+void brw_dp_READ_16( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint scratch_offset );
+
+void brw_dp_READ_4( struct brw_compile *p,
+ struct brw_reg dest,
+ GLboolean relAddr,
+ GLuint location,
+ GLuint bind_table_index );
+
+void brw_dp_READ_4_vs( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint oword,
+ GLboolean relAddr,
+ struct brw_reg addrReg,
+ GLuint location,
+ GLuint bind_table_index );
+
+void brw_dp_WRITE_16( struct brw_compile *p,
+ struct brw_reg src,
+ GLuint scratch_offset );
+
+/* If/else/endif. Works by manipulating the execution flags on each
+ * channel.
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p,
+ GLuint execute_size);
+
+struct brw_instruction *brw_ELSE(struct brw_compile *p,
+ struct brw_instruction *if_insn);
+
+void brw_ENDIF(struct brw_compile *p,
+ struct brw_instruction *if_or_else_insn);
+
+
+/* DO/WHILE loops:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p,
+ GLuint execute_size);
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p,
+ struct brw_instruction *patch_insn);
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p);
+struct brw_instruction *brw_CONT(struct brw_compile *p);
+/* Forward jumps:
+ */
+void brw_land_fwd_jump(struct brw_compile *p,
+ struct brw_instruction *jmp_insn);
+
+
+
+void brw_NOP(struct brw_compile *p);
+
+/* Special case: there is never a destination, execution size will be
+ * taken from src0:
+ */
+void brw_CMP(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint conditional,
+ struct brw_reg src0,
+ struct brw_reg src1);
+
+void brw_print_reg( struct brw_reg reg );
+
+
+/***********************************************************************
+ * brw_eu_util.c:
+ */
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+ struct brw_indirect dst_ptr,
+ struct brw_indirect src_ptr,
+ GLuint count);
+
+void brw_copy_from_indirect(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_indirect ptr,
+ GLuint count);
+
+void brw_copy4(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ GLuint count);
+
+void brw_copy8(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ GLuint count);
+
+void brw_math_invert( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src);
+
+void brw_set_src1( struct brw_instruction *insn,
+ struct brw_reg reg );
+#endif
diff --git a/src/gallium/drivers/i965/brw_eu_debug.c b/src/gallium/drivers/i965/brw_eu_debug.c
new file mode 100644
index 00000000000..5989f5a04ee
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu_debug.c
@@ -0,0 +1,94 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_debug.h"
+
+#include "brw_eu.h"
+
+void brw_print_reg( struct brw_reg hwreg )
+{
+ static const char *file[] = {
+ "arf",
+ "grf",
+ "msg",
+ "imm"
+ };
+
+ static const char *type[] = {
+ "ud",
+ "d",
+ "uw",
+ "w",
+ "ub",
+ "vf",
+ "hf",
+ "f"
+ };
+
+ debug_printf("%s%s",
+ hwreg.abs ? "abs/" : "",
+ hwreg.negate ? "-" : "");
+
+ if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+ hwreg.nr % 2 == 0 &&
+ hwreg.subnr == 0 &&
+ hwreg.vstride == BRW_VERTICAL_STRIDE_8 &&
+ hwreg.width == BRW_WIDTH_8 &&
+ hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ hwreg.type == BRW_REGISTER_TYPE_F) {
+ /* vector register */
+ debug_printf("vec%d", hwreg.nr);
+ }
+ else if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+ hwreg.vstride == BRW_VERTICAL_STRIDE_0 &&
+ hwreg.width == BRW_WIDTH_1 &&
+ hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 &&
+ hwreg.type == BRW_REGISTER_TYPE_F) {
+ /* "scalar" register */
+ debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
+ }
+ else if (hwreg.file == BRW_IMMEDIATE_VALUE) {
+ debug_printf("imm %f", hwreg.dw1.f);
+ }
+ else {
+ debug_printf("%s%d.%d<%d;%d,%d>:%s",
+ file[hwreg.file],
+ hwreg.nr,
+ hwreg.subnr / type_sz(hwreg.type),
+ hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0,
+ 1<<hwreg.width,
+ hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0,
+ type[hwreg.type]);
+ }
+}
+
+
+
diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c
new file mode 100644
index 00000000000..00d8eaccbc4
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu_emit.c
@@ -0,0 +1,1433 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "brw_debug.h"
+#include "brw_disasm.h"
+
+
+
+
+/***********************************************************************
+ * Internal helper for constructing instructions
+ */
+
+static void guess_execution_size( struct brw_instruction *insn,
+ struct brw_reg reg )
+{
+ if (reg.width == BRW_WIDTH_8 &&
+ insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
+ insn->header.execution_size = BRW_EXECUTE_16;
+ else
+ insn->header.execution_size = reg.width; /* note - definitions are compatible */
+}
+
+
+static void brw_set_dest( struct brw_instruction *insn,
+ struct brw_reg dest )
+{
+ if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(dest.nr < 128);
+
+ insn->bits1.da1.dest_reg_file = dest.file;
+ insn->bits1.da1.dest_reg_type = dest.type;
+ insn->bits1.da1.dest_address_mode = dest.address_mode;
+
+ if (dest.address_mode == BRW_ADDRESS_DIRECT) {
+ insn->bits1.da1.dest_reg_nr = dest.nr;
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits1.da1.dest_subreg_nr = dest.subnr;
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ insn->bits1.da1.dest_horiz_stride = dest.hstride;
+ }
+ else {
+ insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
+ insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
+ }
+ }
+ else {
+ insn->bits1.ia1.dest_subreg_nr = dest.subnr;
+
+ /* These are different sizes in align1 vs align16:
+ */
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ insn->bits1.ia1.dest_horiz_stride = dest.hstride;
+ }
+ else {
+ insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+ }
+ }
+
+ /* NEW: Set the execution size based on dest.width and
+ * insn->compression_control:
+ */
+ guess_execution_size(insn, dest);
+}
+
+static void brw_set_src0( struct brw_instruction *insn,
+ struct brw_reg reg )
+{
+ assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+
+ if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(reg.nr < 128);
+
+ insn->bits1.da1.src0_reg_file = reg.file;
+ insn->bits1.da1.src0_reg_type = reg.type;
+ insn->bits2.da1.src0_abs = reg.abs;
+ insn->bits2.da1.src0_negate = reg.negate;
+ insn->bits2.da1.src0_address_mode = reg.address_mode;
+
+ if (reg.file == BRW_IMMEDIATE_VALUE) {
+ insn->bits3.ud = reg.dw1.ud;
+
+ /* Required to set some fields in src1 as well:
+ */
+ insn->bits1.da1.src1_reg_file = 0; /* arf */
+ insn->bits1.da1.src1_reg_type = reg.type;
+ }
+ else
+ {
+ if (reg.address_mode == BRW_ADDRESS_DIRECT) {
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits2.da1.src0_subreg_nr = reg.subnr;
+ insn->bits2.da1.src0_reg_nr = reg.nr;
+ }
+ else {
+ insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
+ insn->bits2.da16.src0_reg_nr = reg.nr;
+ }
+ }
+ else {
+ insn->bits2.ia1.src0_subreg_nr = reg.subnr;
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
+ }
+ else {
+ insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
+ }
+ }
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ if (reg.width == BRW_WIDTH_1 &&
+ insn->header.execution_size == BRW_EXECUTE_1) {
+ insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+ insn->bits2.da1.src0_width = BRW_WIDTH_1;
+ insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
+ }
+ else {
+ insn->bits2.da1.src0_horiz_stride = reg.hstride;
+ insn->bits2.da1.src0_width = reg.width;
+ insn->bits2.da1.src0_vert_stride = reg.vstride;
+ }
+ }
+ else {
+ insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+ insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+ insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+ insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+ insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
+ else
+ insn->bits2.da16.src0_vert_stride = reg.vstride;
+ }
+ }
+}
+
+
+void brw_set_src1( struct brw_instruction *insn,
+ struct brw_reg reg )
+{
+ assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+
+ assert(reg.nr < 128);
+
+ insn->bits1.da1.src1_reg_file = reg.file;
+ insn->bits1.da1.src1_reg_type = reg.type;
+ insn->bits3.da1.src1_abs = reg.abs;
+ insn->bits3.da1.src1_negate = reg.negate;
+
+ /* Only src1 can be immediate in two-argument instructions.
+ */
+ assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
+
+ if (reg.file == BRW_IMMEDIATE_VALUE) {
+ insn->bits3.ud = reg.dw1.ud;
+ }
+ else {
+ /* This is a hardware restriction, which may or may not be lifted
+ * in the future:
+ */
+ assert (reg.address_mode == BRW_ADDRESS_DIRECT);
+ /*assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits3.da1.src1_subreg_nr = reg.subnr;
+ insn->bits3.da1.src1_reg_nr = reg.nr;
+ }
+ else {
+ insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
+ insn->bits3.da16.src1_reg_nr = reg.nr;
+ }
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ if (reg.width == BRW_WIDTH_1 &&
+ insn->header.execution_size == BRW_EXECUTE_1) {
+ insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+ insn->bits3.da1.src1_width = BRW_WIDTH_1;
+ insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
+ }
+ else {
+ insn->bits3.da1.src1_horiz_stride = reg.hstride;
+ insn->bits3.da1.src1_width = reg.width;
+ insn->bits3.da1.src1_vert_stride = reg.vstride;
+ }
+ }
+ else {
+ insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+ insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+ insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+ insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+ insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
+ else
+ insn->bits3.da16.src1_vert_stride = reg.vstride;
+ }
+ }
+}
+
+
+
+static void brw_set_math_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLuint msg_length,
+ GLuint response_length,
+ GLuint function,
+ GLuint integer_type,
+ GLboolean low_precision,
+ GLboolean saturate,
+ GLuint dataType )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.math_igdng.function = function;
+ insn->bits3.math_igdng.int_type = integer_type;
+ insn->bits3.math_igdng.precision = low_precision;
+ insn->bits3.math_igdng.saturate = saturate;
+ insn->bits3.math_igdng.data_type = dataType;
+ insn->bits3.math_igdng.snapshot = 0;
+ insn->bits3.math_igdng.header_present = 0;
+ insn->bits3.math_igdng.response_length = response_length;
+ insn->bits3.math_igdng.msg_length = msg_length;
+ insn->bits3.math_igdng.end_of_thread = 0;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH;
+ insn->bits2.send_igdng.end_of_thread = 0;
+ } else {
+ insn->bits3.math.function = function;
+ insn->bits3.math.int_type = integer_type;
+ insn->bits3.math.precision = low_precision;
+ insn->bits3.math.saturate = saturate;
+ insn->bits3.math.data_type = dataType;
+ insn->bits3.math.response_length = response_length;
+ insn->bits3.math.msg_length = msg_length;
+ insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
+ insn->bits3.math.end_of_thread = 0;
+ }
+}
+
+
+static void brw_set_ff_sync_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean end_of_thread,
+ GLboolean complete,
+ GLuint offset,
+ GLuint swizzle_control )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->bits3.urb_igdng.opcode = 1;
+ insn->bits3.urb_igdng.offset = offset;
+ insn->bits3.urb_igdng.swizzle_control = swizzle_control;
+ insn->bits3.urb_igdng.allocate = allocate;
+ insn->bits3.urb_igdng.used = used;
+ insn->bits3.urb_igdng.complete = complete;
+ insn->bits3.urb_igdng.header_present = 1;
+ insn->bits3.urb_igdng.response_length = response_length;
+ insn->bits3.urb_igdng.msg_length = msg_length;
+ insn->bits3.urb_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+}
+
+static void brw_set_urb_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean end_of_thread,
+ GLboolean complete,
+ GLuint offset,
+ GLuint swizzle_control )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.urb_igdng.opcode = 0; /* ? */
+ insn->bits3.urb_igdng.offset = offset;
+ insn->bits3.urb_igdng.swizzle_control = swizzle_control;
+ insn->bits3.urb_igdng.allocate = allocate;
+ insn->bits3.urb_igdng.used = used; /* ? */
+ insn->bits3.urb_igdng.complete = complete;
+ insn->bits3.urb_igdng.header_present = 1;
+ insn->bits3.urb_igdng.response_length = response_length;
+ insn->bits3.urb_igdng.msg_length = msg_length;
+ insn->bits3.urb_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ } else {
+ insn->bits3.urb.opcode = 0; /* ? */
+ insn->bits3.urb.offset = offset;
+ insn->bits3.urb.swizzle_control = swizzle_control;
+ insn->bits3.urb.allocate = allocate;
+ insn->bits3.urb.used = used; /* ? */
+ insn->bits3.urb.complete = complete;
+ insn->bits3.urb.response_length = response_length;
+ insn->bits3.urb.msg_length = msg_length;
+ insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
+ insn->bits3.urb.end_of_thread = end_of_thread;
+ }
+}
+
+static void brw_set_dp_write_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint msg_control,
+ GLuint msg_type,
+ GLuint msg_length,
+ GLuint pixel_scoreboard_clear,
+ GLuint response_length,
+ GLuint end_of_thread )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
+ insn->bits3.dp_write_igdng.msg_control = msg_control;
+ insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.dp_write_igdng.msg_type = msg_type;
+ insn->bits3.dp_write_igdng.send_commit_msg = 0;
+ insn->bits3.dp_write_igdng.header_present = 1;
+ insn->bits3.dp_write_igdng.response_length = response_length;
+ insn->bits3.dp_write_igdng.msg_length = msg_length;
+ insn->bits3.dp_write_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ } else {
+ insn->bits3.dp_write.binding_table_index = binding_table_index;
+ insn->bits3.dp_write.msg_control = msg_control;
+ insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.dp_write.msg_type = msg_type;
+ insn->bits3.dp_write.send_commit_msg = 0;
+ insn->bits3.dp_write.response_length = response_length;
+ insn->bits3.dp_write.msg_length = msg_length;
+ insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ insn->bits3.dp_write.end_of_thread = end_of_thread;
+ }
+}
+
+static void brw_set_dp_read_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint msg_control,
+ GLuint msg_type,
+ GLuint target_cache,
+ GLuint msg_length,
+ GLuint response_length,
+ GLuint end_of_thread )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
+ insn->bits3.dp_read_igdng.msg_control = msg_control;
+ insn->bits3.dp_read_igdng.msg_type = msg_type;
+ insn->bits3.dp_read_igdng.target_cache = target_cache;
+ insn->bits3.dp_read_igdng.header_present = 1;
+ insn->bits3.dp_read_igdng.response_length = response_length;
+ insn->bits3.dp_read_igdng.msg_length = msg_length;
+ insn->bits3.dp_read_igdng.pad1 = 0;
+ insn->bits3.dp_read_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ } else {
+ insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
+ insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
+ insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
+ insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
+ insn->bits3.dp_read.response_length = response_length; /*16:19*/
+ insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
+ insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
+ insn->bits3.dp_read.pad1 = 0; /*28:30*/
+ insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
+ }
+}
+
+static void brw_set_sampler_message(struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint sampler,
+ GLuint msg_type,
+ GLuint response_length,
+ GLuint msg_length,
+ GLboolean eot,
+ GLuint header_present,
+ GLuint simd_mode)
+{
+ assert(eot == 0);
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
+ insn->bits3.sampler_igdng.sampler = sampler;
+ insn->bits3.sampler_igdng.msg_type = msg_type;
+ insn->bits3.sampler_igdng.simd_mode = simd_mode;
+ insn->bits3.sampler_igdng.header_present = header_present;
+ insn->bits3.sampler_igdng.response_length = response_length;
+ insn->bits3.sampler_igdng.msg_length = msg_length;
+ insn->bits3.sampler_igdng.end_of_thread = eot;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
+ insn->bits2.send_igdng.end_of_thread = eot;
+ } else if (BRW_IS_G4X(brw)) {
+ insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
+ insn->bits3.sampler_g4x.sampler = sampler;
+ insn->bits3.sampler_g4x.msg_type = msg_type;
+ insn->bits3.sampler_g4x.response_length = response_length;
+ insn->bits3.sampler_g4x.msg_length = msg_length;
+ insn->bits3.sampler_g4x.end_of_thread = eot;
+ insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
+ } else {
+ insn->bits3.sampler.binding_table_index = binding_table_index;
+ insn->bits3.sampler.sampler = sampler;
+ insn->bits3.sampler.msg_type = msg_type;
+ insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
+ insn->bits3.sampler.response_length = response_length;
+ insn->bits3.sampler.msg_length = msg_length;
+ insn->bits3.sampler.end_of_thread = eot;
+ insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
+ }
+}
+
+
+
+static struct brw_instruction *next_insn( struct brw_compile *p,
+ GLuint opcode )
+{
+ struct brw_instruction *insn;
+
+ if (0 && (BRW_DEBUG & DEBUG_DISASSEM))
+ {
+ if (p->nr_insn)
+ brw_disasm_insn(stderr, &p->store[p->nr_insn-1]);
+ }
+
+ assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
+
+ insn = &p->store[p->nr_insn++];
+ memcpy(insn, p->current, sizeof(*insn));
+
+ /* Reset this one-shot flag:
+ */
+
+ if (p->current->header.destreg__conditionalmod) {
+ p->current->header.destreg__conditionalmod = 0;
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ }
+
+ insn->header.opcode = opcode;
+ return insn;
+}
+
+
+static struct brw_instruction *brw_alu1( struct brw_compile *p,
+ GLuint opcode,
+ struct brw_reg dest,
+ struct brw_reg src )
+{
+ struct brw_instruction *insn = next_insn(p, opcode);
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ return insn;
+}
+
+static struct brw_instruction *brw_alu2(struct brw_compile *p,
+ GLuint opcode,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1 )
+{
+ struct brw_instruction *insn = next_insn(p, opcode);
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, src1);
+ return insn;
+}
+
+
+/***********************************************************************
+ * Convenience routines.
+ */
+#define ALU1(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0) \
+{ \
+ return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
+}
+
+#define ALU2(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1) \
+{ \
+ return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
+}
+
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(ADD)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU1(RNDZ)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+
+
+
+
+void brw_NOP(struct brw_compile *p)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
+ brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src1(insn, brw_imm_ud(0x0));
+}
+
+
+
+
+
+/***********************************************************************
+ * Comparisons, if/else/endif
+ */
+
+struct brw_instruction *brw_JMPI(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
+
+ insn->header.execution_size = 1;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ return insn;
+}
+
+/* EU takes the value from the flag register and pushes it onto some
+ * sort of a stack (presumably merging with any flag value already on
+ * the stack). Within an if block, the flags at the top of the stack
+ * control execution on each channel of the unit, eg. on each of the
+ * 16 pixel values in our wm programs.
+ *
+ * When the matching 'else' instruction is reached (presumably by
+ * countdown of the instruction count patched in by our ELSE/ENDIF
+ * functions), the relevent flags are inverted.
+ *
+ * When the matching 'endif' instruction is reached, the flags are
+ * popped off. If the stack is now empty, normal execution resumes.
+ *
+ * No attempt is made to deal with stack overflow (14 elements?).
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
+{
+ struct brw_instruction *insn;
+
+ if (p->single_program_flow) {
+ assert(execute_size == BRW_EXECUTE_1);
+
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ insn->header.predicate_inverse = 1;
+ } else {
+ insn = next_insn(p, BRW_OPCODE_IF);
+ }
+
+ /* Override the defaults for this instruction:
+ */
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.execution_size = execute_size;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.predicate_control = BRW_PREDICATE_NORMAL;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+ if (!p->single_program_flow)
+ insn->header.thread_control = BRW_THREAD_SWITCH;
+
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ return insn;
+}
+
+
+struct brw_instruction *brw_ELSE(struct brw_compile *p,
+ struct brw_instruction *if_insn)
+{
+ struct brw_instruction *insn;
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ br = 2;
+
+ if (p->single_program_flow) {
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ } else {
+ insn = next_insn(p, BRW_OPCODE_ELSE);
+ }
+
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = if_insn->header.execution_size;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+ if (!p->single_program_flow)
+ insn->header.thread_control = BRW_THREAD_SWITCH;
+
+ /* Patch the if instruction to point at this instruction.
+ */
+ if (p->single_program_flow) {
+ assert(if_insn->header.opcode == BRW_OPCODE_ADD);
+
+ if_insn->bits3.ud = (insn - if_insn + 1) * 16;
+ } else {
+ assert(if_insn->header.opcode == BRW_OPCODE_IF);
+
+ if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
+ if_insn->bits3.if_else.pop_count = 0;
+ if_insn->bits3.if_else.pad0 = 0;
+ }
+
+ return insn;
+}
+
+void brw_ENDIF(struct brw_compile *p,
+ struct brw_instruction *patch_insn)
+{
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ br = 2;
+
+ if (p->single_program_flow) {
+ /* In single program flow mode, there's no need to execute an ENDIF,
+ * since we don't need to do any stack operations, and if we're executing
+ * currently, we want to just continue executing.
+ */
+ struct brw_instruction *next = &p->store[p->nr_insn];
+
+ assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
+
+ patch_insn->bits3.ud = (next - patch_insn) * 16;
+ } else {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
+
+ brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = patch_insn->header.execution_size;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+ insn->header.thread_control = BRW_THREAD_SWITCH;
+
+ assert(patch_insn->bits3.if_else.jump_count == 0);
+
+ /* Patch the if or else instructions to point at this or the next
+ * instruction respectively.
+ */
+ if (patch_insn->header.opcode == BRW_OPCODE_IF) {
+ /* Automagically turn it into an IFF:
+ */
+ patch_insn->header.opcode = BRW_OPCODE_IFF;
+ patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
+ patch_insn->bits3.if_else.pop_count = 0;
+ patch_insn->bits3.if_else.pad0 = 0;
+ } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
+ patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
+ patch_insn->bits3.if_else.pop_count = 1;
+ patch_insn->bits3.if_else.pad0 = 0;
+ } else {
+ assert(0);
+ }
+
+ /* Also pop item off the stack in the endif instruction:
+ */
+ insn->bits3.if_else.jump_count = 0;
+ insn->bits3.if_else.pop_count = 1;
+ insn->bits3.if_else.pad0 = 0;
+ }
+}
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p)
+{
+ struct brw_instruction *insn;
+ insn = next_insn(p, BRW_OPCODE_BREAK);
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+ /* insn->header.mask_control = BRW_MASK_DISABLE; */
+ insn->bits3.if_else.pad0 = 0;
+ return insn;
+}
+
+struct brw_instruction *brw_CONT(struct brw_compile *p)
+{
+ struct brw_instruction *insn;
+ insn = next_insn(p, BRW_OPCODE_CONTINUE);
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+ /* insn->header.mask_control = BRW_MASK_DISABLE; */
+ insn->bits3.if_else.pad0 = 0;
+ return insn;
+}
+
+/* DO/WHILE loop:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
+{
+ if (p->single_program_flow) {
+ return &p->store[p->nr_insn];
+ } else {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
+
+ /* Override the defaults for this instruction:
+ */
+ brw_set_dest(insn, brw_null_reg());
+ brw_set_src0(insn, brw_null_reg());
+ brw_set_src1(insn, brw_null_reg());
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = execute_size;
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ /* insn->header.mask_control = BRW_MASK_ENABLE; */
+ /* insn->header.mask_control = BRW_MASK_DISABLE; */
+
+ return insn;
+ }
+}
+
+
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p,
+ struct brw_instruction *do_insn)
+{
+ struct brw_instruction *insn;
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ br = 2;
+
+ if (p->single_program_flow)
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ else
+ insn = next_insn(p, BRW_OPCODE_WHILE);
+
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+
+ if (p->single_program_flow) {
+ insn->header.execution_size = BRW_EXECUTE_1;
+
+ insn->bits3.d = (do_insn - insn) * 16;
+ } else {
+ insn->header.execution_size = do_insn->header.execution_size;
+
+ assert(do_insn->header.opcode == BRW_OPCODE_DO);
+ insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
+ insn->bits3.if_else.pop_count = 0;
+ insn->bits3.if_else.pad0 = 0;
+ }
+
+/* insn->header.mask_control = BRW_MASK_ENABLE; */
+
+ /* insn->header.mask_control = BRW_MASK_DISABLE; */
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+ return insn;
+}
+
+
+/* FORWARD JUMPS:
+ */
+void brw_land_fwd_jump(struct brw_compile *p,
+ struct brw_instruction *jmp_insn)
+{
+ struct brw_instruction *landing = &p->store[p->nr_insn];
+ GLuint jmpi = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ jmpi = 2;
+
+ assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
+ assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
+
+ jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
+}
+
+
+
+/* To integrate with the above, it makes sense that the comparison
+ * instruction should populate the flag register. It might be simpler
+ * just to use the flag reg for most WM tasks?
+ */
+void brw_CMP(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint conditional,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
+
+ insn->header.destreg__conditionalmod = conditional;
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, src1);
+
+/* guess_execution_size(insn, src0); */
+
+
+ /* Make it so that future instructions will use the computed flag
+ * value until brw_set_predicate_control_flag_value() is called
+ * again.
+ */
+ if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+ dest.nr == 0) {
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ p->flag_value = 0xff;
+ }
+}
+
+
+
+/***********************************************************************
+ * Helpers for the various SEND message types:
+ */
+
+/** Extended math function, float[8].
+ */
+void brw_math( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint function,
+ GLuint saturate,
+ GLuint msg_reg_nr,
+ struct brw_reg src,
+ GLuint data_type,
+ GLuint precision )
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+ GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
+ GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
+
+ /* Example code doesn't set predicate_control for send
+ * instructions.
+ */
+ insn->header.predicate_control = 0;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ brw_set_math_message(p->brw,
+ insn,
+ msg_length, response_length,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ data_type);
+}
+
+/**
+ * Extended math function, float[16].
+ * Use 2 send instructions.
+ */
+void brw_math_16( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint function,
+ GLuint saturate,
+ GLuint msg_reg_nr,
+ struct brw_reg src,
+ GLuint precision )
+{
+ struct brw_instruction *insn;
+ GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
+ GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
+
+ /* First instruction:
+ */
+ brw_push_insn_state(p);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ brw_set_math_message(p->brw,
+ insn,
+ msg_length, response_length,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ BRW_MATH_DATA_VECTOR);
+
+ /* Second instruction:
+ */
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
+ insn->header.destreg__conditionalmod = msg_reg_nr+1;
+
+ brw_set_dest(insn, offset(dest,1));
+ brw_set_src0(insn, src);
+ brw_set_math_message(p->brw,
+ insn,
+ msg_length, response_length,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ BRW_MATH_DATA_VECTOR);
+
+ brw_pop_insn_state(p);
+}
+
+
+/**
+ * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
+ * Scratch offset should be a multiple of 64.
+ * Used for register spilling.
+ */
+void brw_dp_WRITE_16( struct brw_compile *p,
+ struct brw_reg src,
+ GLuint scratch_offset )
+{
+ GLuint msg_reg_nr = 1;
+ {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ /* set message header global offset field (reg 0, element 2) */
+ brw_MOV(p,
+ retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
+ brw_imm_d(scratch_offset));
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ GLuint msg_length = 3;
+ struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+
+ brw_set_dp_write_message(p->brw,
+ insn,
+ 255, /* binding table index (255=stateless) */
+ BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
+ BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
+ msg_length,
+ 0, /* pixel scoreboard */
+ 0, /* response_length */
+ 0); /* eot */
+ }
+}
+
+
+/**
+ * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
+ * Scratch offset should be a multiple of 64.
+ * Used for register spilling.
+ */
+void brw_dp_READ_16( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint scratch_offset )
+{
+ GLuint msg_reg_nr = 1;
+ {
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ /* set message header global offset field (reg 0, element 2) */
+ brw_MOV(p,
+ retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
+ brw_imm_d(scratch_offset));
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest); /* UW? */
+ brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
+
+ brw_set_dp_read_message(p->brw,
+ insn,
+ 255, /* binding table index (255=stateless) */
+ 3, /* msg_control (3 means 4 Owords) */
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 1, /* target cache (render/scratch) */
+ 1, /* msg_length */
+ 2, /* response_length */
+ 0); /* eot */
+ }
+}
+
+
+/**
+ * Read a float[4] vector from the data port Data Cache (const buffer).
+ * Location (in buffer) should be a multiple of 16.
+ * Used for fetching shader constants.
+ * If relAddr is true, we'll do an indirect fetch using the address register.
+ */
+void brw_dp_READ_4( struct brw_compile *p,
+ struct brw_reg dest,
+ GLboolean relAddr,
+ GLuint location,
+ GLuint bind_table_index )
+{
+ /* XXX: relAddr not implemented */
+ GLuint msg_reg_nr = 1;
+ {
+ struct brw_reg b;
+ brw_push_insn_state(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ /* Setup MRF[1] with location/offset into const buffer */
+ b = brw_message_reg(msg_reg_nr);
+ b = retype(b, BRW_REGISTER_TYPE_UD);
+ /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
+ * when the docs say only dword[2] should be set. Hmmm. But it works.
+ */
+ brw_MOV(p, b, brw_imm_ud(location));
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+
+ /* cast dest to a uword[8] vector */
+ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, brw_null_reg());
+
+ brw_set_dp_read_message(p->brw,
+ insn,
+ bind_table_index,
+ 0, /* msg_control (0 means 1 Oword) */
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 0, /* source cache = data cache */
+ 1, /* msg_length */
+ 1, /* response_length (1 Oword) */
+ 0); /* eot */
+ }
+}
+
+
+/**
+ * Read float[4] constant(s) from VS constant buffer.
+ * For relative addressing, two float[4] constants will be read into 'dest'.
+ * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
+ */
+void brw_dp_READ_4_vs(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint oword,
+ GLboolean relAddr,
+ struct brw_reg addrReg,
+ GLuint location,
+ GLuint bind_table_index)
+{
+ GLuint msg_reg_nr = 1;
+
+ assert(oword < 2);
+ /*
+ printf("vs const read msg, location %u, msg_reg_nr %d\n",
+ location, msg_reg_nr);
+ */
+
+ /* Setup MRF[1] with location/offset into const buffer */
+ {
+ struct brw_reg b;
+
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ /*brw_set_access_mode(p, BRW_ALIGN_16);*/
+
+ /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
+ * when the docs say only dword[2] should be set. Hmmm. But it works.
+ */
+ b = brw_message_reg(msg_reg_nr);
+ b = retype(b, BRW_REGISTER_TYPE_UD);
+ /*b = get_element_ud(b, 2);*/
+ if (relAddr) {
+ brw_ADD(p, b, addrReg, brw_imm_ud(location));
+ }
+ else {
+ brw_MOV(p, b, brw_imm_ud(location));
+ }
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+ /*insn->header.access_mode = BRW_ALIGN_16;*/
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, brw_null_reg());
+
+ brw_set_dp_read_message(p->brw,
+ insn,
+ bind_table_index,
+ oword, /* 0 = lower Oword, 1 = upper Oword */
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 0, /* source cache = data cache */
+ 1, /* msg_length */
+ 1, /* response_length (1 Oword) */
+ 0); /* eot */
+ }
+}
+
+
+
+void brw_fb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_dp_write_message(p->brw,
+ insn,
+ binding_table_index,
+ BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
+ BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
+ msg_length,
+ 1, /* pixel scoreboard */
+ response_length,
+ eot);
+}
+
+
+/**
+ * Texture sample instruction.
+ * Note: the msg_type plus msg_length values determine exactly what kind
+ * of sampling operation is performed. See volume 4, page 161 of docs.
+ */
+void brw_SAMPLE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ GLuint sampler,
+ GLuint writemask,
+ GLuint msg_type,
+ GLuint response_length,
+ GLuint msg_length,
+ GLboolean eot,
+ GLuint header_present,
+ GLuint simd_mode)
+{
+ GLboolean need_stall = 0;
+
+ if (writemask == 0) {
+ /*debug_printf("%s: zero writemask??\n", __FUNCTION__); */
+ return;
+ }
+
+ /* Hardware doesn't do destination dependency checking on send
+ * instructions properly. Add a workaround which generates the
+ * dependency by other means. In practice it seems like this bug
+ * only crops up for texture samples, and only where registers are
+ * written by the send and then written again later without being
+ * read in between. Luckily for us, we already track that
+ * information and use it to modify the writemask for the
+ * instruction, so that is a guide for whether a workaround is
+ * needed.
+ */
+ if (writemask != BRW_WRITEMASK_XYZW) {
+ GLuint dst_offset = 0;
+ GLuint i, newmask = 0, len = 0;
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<<i))
+ break;
+ dst_offset += 2;
+ }
+ for (; i < 4; i++) {
+ if (!(writemask & (1<<i)))
+ break;
+ newmask |= 1<<i;
+ len++;
+ }
+
+ if (newmask != writemask) {
+ need_stall = 1;
+ /* debug_printf("need stall %x %x\n", newmask , writemask); */
+ }
+ else {
+ struct brw_reg m1 = brw_message_reg(msg_reg_nr);
+
+ newmask = ~newmask & BRW_WRITEMASK_XYZW;
+
+ brw_push_insn_state(p);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ brw_MOV(p, m1, brw_vec8_grf(0,0));
+ brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
+
+ brw_pop_insn_state(p);
+
+ src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
+ dest = offset(dest, dst_offset);
+ response_length = len * 2;
+ }
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_sampler_message(p->brw, insn,
+ binding_table_index,
+ sampler,
+ msg_type,
+ response_length,
+ msg_length,
+ eot,
+ header_present,
+ simd_mode);
+ }
+
+ if (need_stall) {
+ struct brw_reg reg = vec8(offset(dest, response_length-1));
+
+ /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
+ */
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, reg, reg);
+ brw_pop_insn_state(p);
+ }
+
+}
+
+/* All these variables are pretty confusing - we might be better off
+ * using bitmasks and macros for this, in the old style. Or perhaps
+ * just having the caller instantiate the fields in dword3 itself.
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ assert(msg_length < BRW_MAX_MRF);
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_urb_message(p->brw,
+ insn,
+ allocate,
+ used,
+ msg_length,
+ response_length,
+ eot,
+ writes_complete,
+ offset,
+ swizzle);
+}
+
+void brw_ff_sync(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ assert(msg_length < 16);
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_ff_sync_message(p->brw,
+ insn,
+ allocate,
+ used,
+ msg_length,
+ response_length,
+ eot,
+ writes_complete,
+ offset,
+ swizzle);
+}
diff --git a/src/gallium/drivers/i965/brw_eu_util.c b/src/gallium/drivers/i965/brw_eu_util.c
new file mode 100644
index 00000000000..5405cf17a4e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu_util.c
@@ -0,0 +1,126 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+void brw_math_invert( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ brw_math( p,
+ dst,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 0,
+ src,
+ BRW_MATH_PRECISION_FULL,
+ BRW_MATH_DATA_VECTOR );
+}
+
+
+
+void brw_copy4(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ GLuint count)
+{
+ GLuint i;
+
+ dst = vec4(dst);
+ src = vec4(src);
+
+ for (i = 0; i < count; i++)
+ {
+ GLuint delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta));
+ brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16));
+ }
+}
+
+
+void brw_copy8(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ GLuint count)
+{
+ GLuint i;
+
+ dst = vec8(dst);
+ src = vec8(src);
+
+ for (i = 0; i < count; i++)
+ {
+ GLuint delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta));
+ }
+}
+
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+ struct brw_indirect dst_ptr,
+ struct brw_indirect src_ptr,
+ GLuint count)
+{
+ GLuint i;
+
+ for (i = 0; i < count; i++)
+ {
+ GLuint delta = i*32;
+ brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta));
+ brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16));
+ }
+}
+
+
+void brw_copy_from_indirect(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_indirect ptr,
+ GLuint count)
+{
+ GLuint i;
+
+ dst = vec4(dst);
+
+ for (i = 0; i < count; i++)
+ {
+ GLuint delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta));
+ brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16));
+ }
+}
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c
new file mode 100644
index 00000000000..921b201bae2
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs.c
@@ -0,0 +1,216 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_gs.h"
+
+
+
+static enum pipe_error compile_gs_prog( struct brw_context *brw,
+ struct brw_gs_prog_key *key,
+ struct brw_winsys_buffer **bo_out )
+{
+ struct brw_gs_compile c;
+ enum pipe_error ret;
+ const GLuint *program;
+ GLuint program_size;
+
+ memset(&c, 0, sizeof(c));
+
+ c.key = *key;
+ c.need_ff_sync = BRW_IS_IGDNG(brw);
+ /* Need to locate the two positions present in vertex + header.
+ * These are currently hardcoded:
+ */
+ c.nr_attrs = c.key.nr_attrs;
+
+ if (BRW_IS_IGDNG(brw))
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
+ else
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+
+ c.nr_bytes = c.nr_regs * REG_SIZE;
+
+
+ /* Begin the compilation:
+ */
+ brw_init_compile(brw, &c.func);
+
+ c.func.single_program_flow = 1;
+
+ /* For some reason the thread is spawned with only 4 channels
+ * unmasked.
+ */
+ brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+
+
+ /* Note that primitives which don't require a GS program have
+ * already been weeded out by this stage:
+ */
+ switch (key->primitive) {
+ case PIPE_PRIM_QUADS:
+ brw_gs_quads( &c );
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ brw_gs_quad_strip( &c );
+ break;
+ case PIPE_PRIM_LINE_LOOP:
+ brw_gs_lines( &c );
+ break;
+ case PIPE_PRIM_LINES:
+ if (key->hint_gs_always)
+ brw_gs_lines( &c );
+ else {
+ return PIPE_OK;
+ }
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ if (key->hint_gs_always)
+ brw_gs_tris( &c );
+ else {
+ return PIPE_OK;
+ }
+ break;
+ case PIPE_PRIM_POINTS:
+ if (key->hint_gs_always)
+ brw_gs_points( &c );
+ else {
+ return PIPE_OK;
+ }
+ break;
+ default:
+ assert(0);
+ return PIPE_ERROR_BAD_INPUT;
+ }
+
+ /* get the program
+ */
+ ret = brw_get_program(&c.func, &program, &program_size);
+ if (ret)
+ return ret;
+
+ /* Upload
+ */
+ ret = brw_upload_cache( &brw->cache, BRW_GS_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ &brw->gs.prog_data,
+ bo_out );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+static const unsigned gs_prim[PIPE_PRIM_MAX] = {
+ PIPE_PRIM_POINTS,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_LINE_LOOP,
+ PIPE_PRIM_LINES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_TRIANGLES,
+ PIPE_PRIM_QUADS,
+ PIPE_PRIM_QUAD_STRIP,
+ PIPE_PRIM_TRIANGLES
+};
+
+static void populate_key( struct brw_context *brw,
+ struct brw_gs_prog_key *key )
+{
+ const struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature;
+
+ memset(key, 0, sizeof(*key));
+
+ /* PIPE_NEW_FRAGMENT_SIGNATURE */
+ key->nr_attrs = sig->nr_inputs + 1;
+
+ /* BRW_NEW_PRIMITIVE */
+ key->primitive = gs_prim[brw->primitive];
+
+ key->hint_gs_always = 0; /* debug code? */
+
+ key->need_gs_prog = (key->hint_gs_always ||
+ brw->primitive == PIPE_PRIM_QUADS ||
+ brw->primitive == PIPE_PRIM_QUAD_STRIP ||
+ brw->primitive == PIPE_PRIM_LINE_LOOP);
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static int prepare_gs_prog(struct brw_context *brw)
+{
+ struct brw_gs_prog_key key;
+ enum pipe_error ret;
+
+ /* Populate the key:
+ */
+ populate_key(brw, &key);
+
+ if (brw->gs.prog_active != key.need_gs_prog) {
+ brw->state.dirty.cache |= CACHE_NEW_GS_PROG;
+ brw->gs.prog_active = key.need_gs_prog;
+ }
+
+ if (!brw->gs.prog_active)
+ return PIPE_OK;
+
+ if (brw_search_cache(&brw->cache, BRW_GS_PROG,
+ &key, sizeof(key),
+ NULL, 0,
+ &brw->gs.prog_data,
+ &brw->gs.prog_bo))
+ return PIPE_OK;
+
+ ret = compile_gs_prog( brw, &key, &brw->gs.prog_bo );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+const struct brw_tracked_state brw_gs_prog = {
+ .dirty = {
+ .mesa = PIPE_NEW_FRAGMENT_SIGNATURE,
+ .brw = BRW_NEW_PRIMITIVE,
+ .cache = 0,
+ },
+ .prepare = prepare_gs_prog
+};
diff --git a/src/gallium/drivers/i965/brw_gs.h b/src/gallium/drivers/i965/brw_gs.h
new file mode 100644
index 00000000000..6e616dcb875
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs.h
@@ -0,0 +1,76 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_GS_H
+#define BRW_GS_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define MAX_GS_VERTS (4)
+
+struct brw_gs_prog_key {
+ GLuint nr_attrs:8;
+ GLuint primitive:4;
+ GLuint hint_gs_always:1;
+ GLuint need_gs_prog:1;
+ GLuint pad:18;
+};
+
+struct brw_gs_compile {
+ struct brw_compile func;
+ struct brw_gs_prog_key key;
+ struct brw_gs_prog_data prog_data;
+
+ struct {
+ struct brw_reg R0;
+ struct brw_reg vertex[MAX_GS_VERTS];
+ } reg;
+
+ /* 3 different ways of expressing vertex size:
+ */
+ GLuint nr_attrs;
+ GLuint nr_regs;
+ GLuint nr_bytes;
+ GLboolean need_ff_sync;
+};
+
+#define ATTR_SIZE (4*4)
+
+void brw_gs_quads( struct brw_gs_compile *c );
+void brw_gs_quad_strip( struct brw_gs_compile *c );
+void brw_gs_tris( struct brw_gs_compile *c );
+void brw_gs_lines( struct brw_gs_compile *c );
+void brw_gs_points( struct brw_gs_compile *c );
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_gs_emit.c b/src/gallium/drivers/i965/brw_gs_emit.c
new file mode 100644
index 00000000000..fd8e2accedd
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs_emit.c
@@ -0,0 +1,181 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_gs.h"
+
+static void brw_gs_alloc_regs( struct brw_gs_compile *c,
+ GLuint nr_verts )
+{
+ GLuint i = 0,j;
+
+ /* Register usage is static, precompute here:
+ */
+ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+ /* Payload vertices plus space for more generated vertices:
+ */
+ for (j = 0; j < nr_verts; j++) {
+ c->reg.vertex[j] = brw_vec4_grf(i, 0);
+ i += c->nr_regs;
+ }
+
+ c->prog_data.urb_read_length = c->nr_regs;
+ c->prog_data.total_grf = i;
+}
+
+
+static void brw_gs_emit_vue(struct brw_gs_compile *c,
+ struct brw_reg vert,
+ GLboolean last,
+ GLuint header)
+{
+ struct brw_compile *p = &c->func;
+ GLboolean allocate = !last;
+
+ /* Overwrite PrimType and PrimStart in the message header, for
+ * each vertex in turn:
+ */
+ brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+
+ /* Copy the vertex from vertn into m1..mN+1:
+ */
+ brw_copy8(p, brw_message_reg(1), vert, c->nr_regs);
+
+ /* Send each vertex as a seperate write to the urb. This is
+ * different to the concept in brw_sf_emit.c, where subsequent
+ * writes are used to build up a single urb entry. Each of these
+ * writes instantiates a seperate urb entry, and a new one must be
+ * allocated each time.
+ */
+ brw_urb_WRITE(p,
+ allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ 0,
+ c->reg.R0,
+ allocate,
+ 1, /* used */
+ c->nr_regs + 1, /* msg length */
+ allocate ? 1 : 0, /* response length */
+ allocate ? 0 : 1, /* eot */
+ 1, /* writes_complete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+}
+
+static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
+{
+ struct brw_compile *p = &c->func;
+ brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim));
+ brw_ff_sync(p,
+ c->reg.R0,
+ 0,
+ c->reg.R0,
+ 1,
+ 1, /* used */
+ 1, /* msg length */
+ 1, /* response length */
+ 0, /* eot */
+ 1, /* write compelete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+}
+
+
+void brw_gs_quads( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 4);
+
+ /* Use polygons for correct edgeflag behaviour. Note that vertex 3
+ * is the PV for quads, but vertex 0 for polygons:
+ */
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+}
+
+void brw_gs_quad_strip( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 4);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+}
+
+void brw_gs_tris( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 3);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END));
+}
+
+void brw_gs_lines( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 2);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END));
+}
+
+void brw_gs_points( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 1);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END));
+}
+
+
+
+
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c
new file mode 100644
index 00000000000..b64ec286cea
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs_state.c
@@ -0,0 +1,169 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_debug.h"
+
+struct brw_gs_unit_key {
+ unsigned int total_grf;
+ unsigned int urb_entry_read_length;
+
+ unsigned int curbe_offset;
+
+ unsigned int nr_urb_entries, urb_size;
+ GLboolean prog_active;
+};
+
+static void
+gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key)
+{
+ memset(key, 0, sizeof(*key));
+
+ /* CACHE_NEW_GS_PROG */
+ key->prog_active = brw->gs.prog_active;
+ if (key->prog_active) {
+ key->total_grf = brw->gs.prog_data->total_grf;
+ key->urb_entry_read_length = brw->gs.prog_data->urb_read_length;
+ } else {
+ key->total_grf = 1;
+ key->urb_entry_read_length = 1;
+ }
+
+ /* BRW_NEW_CURBE_OFFSETS */
+ key->curbe_offset = brw->curbe.clip_start;
+
+ /* BRW_NEW_URB_FENCE */
+ key->nr_urb_entries = brw->urb.nr_gs_entries;
+ key->urb_size = brw->urb.vsize;
+}
+
+static enum pipe_error
+gs_unit_create_from_key(struct brw_context *brw,
+ struct brw_gs_unit_key *key,
+ struct brw_winsys_reloc *reloc,
+ unsigned nr_reloc,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_gs_unit_state gs;
+ enum pipe_error ret;
+
+
+ memset(&gs, 0, sizeof(gs));
+
+ /* reloc */
+ gs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
+ gs.thread0.kernel_start_pointer = 0;
+
+ gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ gs.thread1.single_program_flow = 1;
+
+ gs.thread3.dispatch_grf_start_reg = 1;
+ gs.thread3.const_urb_entry_read_offset = 0;
+ gs.thread3.const_urb_entry_read_length = 0;
+ gs.thread3.urb_entry_read_offset = 0;
+ gs.thread3.urb_entry_read_length = key->urb_entry_read_length;
+
+ gs.thread4.nr_urb_entries = key->nr_urb_entries;
+ gs.thread4.urb_entry_allocation_size = key->urb_size - 1;
+
+ if (key->nr_urb_entries >= 8)
+ gs.thread4.max_threads = 1;
+ else
+ gs.thread4.max_threads = 0;
+
+ if (BRW_IS_IGDNG(brw))
+ gs.thread4.rendering_enable = 1;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ gs.thread4.stats_enable = 1;
+
+ ret = brw_upload_cache(&brw->cache, BRW_GS_UNIT,
+ key, sizeof(*key),
+ reloc, nr_reloc,
+ &gs, sizeof(gs),
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+static enum pipe_error prepare_gs_unit(struct brw_context *brw)
+{
+ struct brw_gs_unit_key key;
+ enum pipe_error ret;
+ struct brw_winsys_reloc reloc[1];
+ unsigned nr_reloc = 0;
+ unsigned grf_reg_count;
+
+ gs_unit_populate_key(brw, &key);
+
+ grf_reg_count = (align(key.total_grf, 16) / 16 - 1);
+
+ /* GS program relocation */
+ if (key.prog_active) {
+ make_reloc(&reloc[nr_reloc++],
+ BRW_USAGE_STATE,
+ grf_reg_count << 1,
+ offsetof(struct brw_gs_unit_state, thread0),
+ brw->gs.prog_bo);
+ }
+
+ if (brw_search_cache(&brw->cache, BRW_GS_UNIT,
+ &key, sizeof(key),
+ reloc, nr_reloc,
+ NULL,
+ &brw->gs.state_bo))
+ return PIPE_OK;
+
+ ret = gs_unit_create_from_key(brw, &key,
+ reloc, nr_reloc,
+ &brw->gs.state_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_gs_unit = {
+ .dirty = {
+ .mesa = 0,
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_URB_FENCE),
+ .cache = CACHE_NEW_GS_PROG
+ },
+ .prepare = prepare_gs_unit,
+};
diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
new file mode 100644
index 00000000000..e4b24229db3
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -0,0 +1,513 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+
+#include "brw_debug.h"
+#include "brw_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_screen.h"
+#include "brw_pipe_rast.h"
+
+
+
+
+
+/***********************************************************************
+ * Blend color
+ */
+
+static int upload_blend_constant_color(struct brw_context *brw)
+{
+ BRW_CACHED_BATCH_STRUCT(brw, &brw->curr.bcc);
+ return 0;
+}
+
+
+const struct brw_tracked_state brw_blend_constant_color = {
+ .dirty = {
+ .mesa = PIPE_NEW_BLEND_COLOR,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_blend_constant_color
+};
+
+/***********************************************************************
+ * Drawing rectangle - framebuffer dimensions
+ */
+static int upload_drawing_rect(struct brw_context *brw)
+{
+ BEGIN_BATCH(4, NO_LOOP_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965);
+ OUT_BATCH(0);
+ OUT_BATCH(((brw->curr.fb.width - 1) & 0xffff) |
+ ((brw->curr.fb.height - 1) << 16));
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ return 0;
+}
+
+const struct brw_tracked_state brw_drawing_rect = {
+ .dirty = {
+ .mesa = PIPE_NEW_FRAMEBUFFER_DIMENSIONS,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_drawing_rect
+};
+
+
+/***********************************************************************
+ * Binding table pointers
+ */
+
+static int prepare_binding_table_pointers(struct brw_context *brw)
+{
+ brw_add_validated_bo(brw, brw->vs.bind_bo);
+ brw_add_validated_bo(brw, brw->wm.bind_bo);
+ return 0;
+}
+
+/**
+ * Upload the binding table pointers, which point each stage's array of surface
+ * state pointers.
+ *
+ * The binding table pointers are relative to the surface state base address,
+ * which is 0.
+ */
+static int upload_binding_table_pointers(struct brw_context *brw)
+{
+ BEGIN_BATCH(6, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
+ if (brw->vs.bind_bo != NULL)
+ OUT_RELOC(brw->vs.bind_bo,
+ BRW_USAGE_SAMPLER,
+ 0); /* vs */
+ else
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* gs */
+ OUT_BATCH(0); /* clip */
+ OUT_BATCH(0); /* sf */
+ OUT_RELOC(brw->wm.bind_bo,
+ BRW_USAGE_SAMPLER,
+ 0); /* wm/ps */
+ ADVANCE_BATCH();
+ return 0;
+}
+
+const struct brw_tracked_state brw_binding_table_pointers = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH,
+ .cache = CACHE_NEW_SURF_BIND,
+ },
+ .prepare = prepare_binding_table_pointers,
+ .emit = upload_binding_table_pointers,
+};
+
+
+/**********************************************************************
+ * Upload pointers to the per-stage state.
+ *
+ * The state pointers in this packet are all relative to the general state
+ * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
+ */
+static int upload_pipelined_state_pointers(struct brw_context *brw )
+{
+ BEGIN_BATCH(7, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
+ OUT_RELOC(brw->vs.state_bo,
+ BRW_USAGE_STATE,
+ 0);
+ if (brw->gs.prog_active)
+ OUT_RELOC(brw->gs.state_bo,
+ BRW_USAGE_STATE,
+ 1);
+ else
+ OUT_BATCH(0);
+ OUT_RELOC(brw->clip.state_bo,
+ BRW_USAGE_STATE,
+ 1);
+ OUT_RELOC(brw->sf.state_bo,
+ BRW_USAGE_STATE,
+ 0);
+ OUT_RELOC(brw->wm.state_bo,
+ BRW_USAGE_STATE,
+ 0);
+ OUT_RELOC(brw->cc.state_bo,
+ BRW_USAGE_STATE,
+ 0);
+ ADVANCE_BATCH();
+
+ brw->state.dirty.brw |= BRW_NEW_PSP;
+ return 0;
+}
+
+
+static int prepare_psp_urb_cbs(struct brw_context *brw)
+{
+ brw_add_validated_bo(brw, brw->vs.state_bo);
+ brw_add_validated_bo(brw, brw->gs.state_bo);
+ brw_add_validated_bo(brw, brw->clip.state_bo);
+ brw_add_validated_bo(brw, brw->sf.state_bo);
+ brw_add_validated_bo(brw, brw->wm.state_bo);
+ brw_add_validated_bo(brw, brw->cc.state_bo);
+ return 0;
+}
+
+static int upload_psp_urb_cbs(struct brw_context *brw )
+{
+ int ret;
+
+ ret = upload_pipelined_state_pointers(brw);
+ if (ret)
+ return ret;
+
+ ret = brw_upload_urb_fence(brw);
+ if (ret)
+ return ret;
+
+ ret = brw_upload_cs_urb_state(brw);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+const struct brw_tracked_state brw_psp_urb_cbs = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_URB_FENCE | BRW_NEW_BATCH,
+ .cache = (CACHE_NEW_VS_UNIT |
+ CACHE_NEW_GS_UNIT |
+ CACHE_NEW_GS_PROG |
+ CACHE_NEW_CLIP_UNIT |
+ CACHE_NEW_SF_UNIT |
+ CACHE_NEW_WM_UNIT |
+ CACHE_NEW_CC_UNIT)
+ },
+ .prepare = prepare_psp_urb_cbs,
+ .emit = upload_psp_urb_cbs,
+};
+
+
+/***********************************************************************
+ * Depth buffer
+ */
+
+static int prepare_depthbuffer(struct brw_context *brw)
+{
+ struct pipe_surface *zsbuf = brw->curr.fb.zsbuf;
+
+ if (zsbuf)
+ brw_add_validated_bo(brw, brw_surface(zsbuf)->bo);
+
+ return 0;
+}
+
+static int emit_depthbuffer(struct brw_context *brw)
+{
+ struct pipe_surface *surface = brw->curr.fb.zsbuf;
+ unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5;
+
+ if (surface == NULL) {
+ BEGIN_BATCH(len, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+ OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
+ (BRW_SURFACE_NULL << 29));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ OUT_BATCH(0);
+
+ ADVANCE_BATCH();
+ } else {
+ struct brw_winsys_buffer *bo;
+ unsigned int format;
+ unsigned int pitch;
+ unsigned int cpp;
+
+ switch (surface->format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ format = BRW_DEPTHFORMAT_D16_UNORM;
+ cpp = 2;
+ break;
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+ cpp = 4;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ format = BRW_DEPTHFORMAT_D32_FLOAT;
+ cpp = 4;
+ break;
+ default:
+ assert(0);
+ return PIPE_ERROR_BAD_INPUT;
+ }
+
+ bo = brw_surface(surface)->bo;
+ pitch = brw_surface(surface)->pitch;
+
+ BEGIN_BATCH(len, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+ OUT_BATCH(((pitch * cpp) - 1) |
+ (format << 18) |
+ (BRW_TILEWALK_YMAJOR << 26) |
+ ((surface->layout != PIPE_SURFACE_LAYOUT_LINEAR) << 27) |
+ (BRW_SURFACE_2D << 29));
+ OUT_RELOC(bo,
+ BRW_USAGE_DEPTH_BUFFER,
+ surface->offset);
+ OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
+ ((pitch - 1) << 6) |
+ ((surface->height - 1) << 19));
+ OUT_BATCH(0);
+
+ if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ OUT_BATCH(0);
+
+ ADVANCE_BATCH();
+ }
+
+ return 0;
+}
+
+const struct brw_tracked_state brw_depthbuffer = {
+ .dirty = {
+ .mesa = PIPE_NEW_DEPTH_BUFFER,
+ .brw = BRW_NEW_BATCH,
+ .cache = 0,
+ },
+ .prepare = prepare_depthbuffer,
+ .emit = emit_depthbuffer,
+};
+
+
+
+/***********************************************************************
+ * Polygon stipple packet
+ */
+
+static int upload_polygon_stipple(struct brw_context *brw)
+{
+ BRW_CACHED_BATCH_STRUCT(brw, &brw->curr.bps);
+ return 0;
+}
+
+const struct brw_tracked_state brw_polygon_stipple = {
+ .dirty = {
+ .mesa = PIPE_NEW_POLYGON_STIPPLE,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_polygon_stipple
+};
+
+
+/***********************************************************************
+ * Line stipple packet
+ */
+
+static int upload_line_stipple(struct brw_context *brw)
+{
+ const struct brw_line_stipple *bls = &brw->curr.rast->bls;
+ if (bls->header.opcode) {
+ BRW_CACHED_BATCH_STRUCT(brw, bls);
+ }
+ return 0;
+}
+
+const struct brw_tracked_state brw_line_stipple = {
+ .dirty = {
+ .mesa = PIPE_NEW_RAST,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_line_stipple
+};
+
+
+/***********************************************************************
+ * Misc invarient state packets
+ */
+
+static int upload_invarient_state( struct brw_context *brw )
+{
+ {
+ /* 0x61040000 Pipeline Select */
+ /* PipelineSelect : 0 */
+ struct brw_pipeline_select ps;
+
+ memset(&ps, 0, sizeof(ps));
+ if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ ps.header.opcode = CMD_PIPELINE_SELECT_GM45;
+ else
+ ps.header.opcode = CMD_PIPELINE_SELECT_965;
+ ps.header.pipeline_select = 0;
+ BRW_BATCH_STRUCT(brw, &ps);
+ }
+
+ {
+ struct brw_global_depth_offset_clamp gdo;
+ memset(&gdo, 0, sizeof(gdo));
+
+ /* Disable depth offset clamping.
+ */
+ gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP;
+ gdo.header.length = sizeof(gdo)/4 - 2;
+ gdo.depth_offset_clamp = 0.0;
+
+ BRW_BATCH_STRUCT(brw, &gdo);
+ }
+
+
+ /* 0x61020000 State Instruction Pointer */
+ {
+ struct brw_system_instruction_pointer sip;
+ memset(&sip, 0, sizeof(sip));
+
+ sip.header.opcode = CMD_STATE_INSN_POINTER;
+ sip.header.length = 0;
+ sip.bits0.pad = 0;
+ sip.bits0.system_instruction_pointer = 0;
+ BRW_BATCH_STRUCT(brw, &sip);
+ }
+
+ /* VF Statistics */
+ {
+ struct brw_vf_statistics vfs;
+ memset(&vfs, 0, sizeof(vfs));
+
+ if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ vfs.opcode = CMD_VF_STATISTICS_GM45;
+ else
+ vfs.opcode = CMD_VF_STATISTICS_965;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ vfs.statistics_enable = 1;
+
+ BRW_BATCH_STRUCT(brw, &vfs);
+ }
+
+ if (!BRW_IS_965(brw))
+ {
+ struct brw_aa_line_parameters balp;
+
+ /* use legacy aa line coverage computation */
+ memset(&balp, 0, sizeof(balp));
+ balp.header.opcode = CMD_AA_LINE_PARAMETERS;
+ balp.header.length = sizeof(balp) / 4 - 2;
+
+ BRW_BATCH_STRUCT(brw, &balp);
+ }
+
+ {
+ struct brw_polygon_stipple_offset bpso;
+
+ /* This is invarient state in gallium:
+ */
+ memset(&bpso, 0, sizeof(bpso));
+ bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
+ bpso.header.length = sizeof(bpso)/4-2;
+ bpso.bits0.y_offset = 0;
+ bpso.bits0.x_offset = 0;
+
+ BRW_BATCH_STRUCT(brw, &bpso);
+ }
+
+ return 0;
+}
+
+const struct brw_tracked_state brw_invarient_state = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0
+ },
+ .emit = upload_invarient_state
+};
+
+
+/***********************************************************************
+ * State base address
+ */
+
+/**
+ * Define the base addresses which some state is referenced from.
+ *
+ * This allows us to avoid having to emit relocations in many places for
+ * cached state, and instead emit pointers inside of large, mostly-static
+ * state pools. This comes at the expense of memory, and more expensive cache
+ * misses.
+ */
+static int upload_state_base_address( struct brw_context *brw )
+{
+ /* Output the structure (brw_state_base_address) directly to the
+ * batchbuffer, so we can emit relocations inline.
+ */
+ if (BRW_IS_IGDNG(brw)) {
+ BEGIN_BATCH(8, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
+ OUT_BATCH(1); /* General state base address */
+ OUT_BATCH(1); /* Surface state base address */
+ OUT_BATCH(1); /* Indirect object base address */
+ OUT_BATCH(1); /* Instruction base address */
+ OUT_BATCH(1); /* General state upper bound */
+ OUT_BATCH(1); /* Indirect object upper bound */
+ OUT_BATCH(1); /* Instruction access upper bound */
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(6, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
+ OUT_BATCH(1); /* General state base address */
+ OUT_BATCH(1); /* Surface state base address */
+ OUT_BATCH(1); /* Indirect object base address */
+ OUT_BATCH(1); /* General state upper bound */
+ OUT_BATCH(1); /* Indirect object upper bound */
+ ADVANCE_BATCH();
+ }
+ return 0;
+}
+
+const struct brw_tracked_state brw_state_base_address = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0,
+ },
+ .emit = upload_state_base_address
+};
diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c
new file mode 100644
index 00000000000..b759a910b63
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_blend.c
@@ -0,0 +1,208 @@
+
+#include "util/u_memory.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_debug.h"
+
+static int translate_logicop(unsigned logicop)
+{
+ switch (logicop) {
+ case PIPE_LOGICOP_CLEAR:
+ return BRW_LOGICOPFUNCTION_CLEAR;
+ case PIPE_LOGICOP_AND:
+ return BRW_LOGICOPFUNCTION_AND;
+ case PIPE_LOGICOP_AND_REVERSE:
+ return BRW_LOGICOPFUNCTION_AND_REVERSE;
+ case PIPE_LOGICOP_COPY:
+ return BRW_LOGICOPFUNCTION_COPY;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ return BRW_LOGICOPFUNCTION_COPY_INVERTED;
+ case PIPE_LOGICOP_AND_INVERTED:
+ return BRW_LOGICOPFUNCTION_AND_INVERTED;
+ case PIPE_LOGICOP_NOOP:
+ return BRW_LOGICOPFUNCTION_NOOP;
+ case PIPE_LOGICOP_XOR:
+ return BRW_LOGICOPFUNCTION_XOR;
+ case PIPE_LOGICOP_OR:
+ return BRW_LOGICOPFUNCTION_OR;
+ case PIPE_LOGICOP_OR_INVERTED:
+ return BRW_LOGICOPFUNCTION_OR_INVERTED;
+ case PIPE_LOGICOP_NOR:
+ return BRW_LOGICOPFUNCTION_NOR;
+ case PIPE_LOGICOP_EQUIV:
+ return BRW_LOGICOPFUNCTION_EQUIV;
+ case PIPE_LOGICOP_INVERT:
+ return BRW_LOGICOPFUNCTION_INVERT;
+ case PIPE_LOGICOP_OR_REVERSE:
+ return BRW_LOGICOPFUNCTION_OR_REVERSE;
+ case PIPE_LOGICOP_NAND:
+ return BRW_LOGICOPFUNCTION_NAND;
+ case PIPE_LOGICOP_SET:
+ return BRW_LOGICOPFUNCTION_SET;
+ default:
+ assert(0);
+ return BRW_LOGICOPFUNCTION_SET;
+ }
+}
+
+
+static unsigned translate_blend_equation( unsigned mode )
+{
+ switch (mode) {
+ case PIPE_BLEND_ADD:
+ return BRW_BLENDFUNCTION_ADD;
+ case PIPE_BLEND_MIN:
+ return BRW_BLENDFUNCTION_MIN;
+ case PIPE_BLEND_MAX:
+ return BRW_BLENDFUNCTION_MAX;
+ case PIPE_BLEND_SUBTRACT:
+ return BRW_BLENDFUNCTION_SUBTRACT;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
+ default:
+ assert(0);
+ return BRW_BLENDFUNCTION_ADD;
+ }
+}
+
+static unsigned translate_blend_factor( unsigned factor )
+{
+ switch(factor) {
+ case PIPE_BLENDFACTOR_ZERO:
+ return BRW_BLENDFACTOR_ZERO;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return BRW_BLENDFACTOR_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_ONE:
+ return BRW_BLENDFACTOR_ONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return BRW_BLENDFACTOR_SRC_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return BRW_BLENDFACTOR_INV_SRC_COLOR;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return BRW_BLENDFACTOR_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return BRW_BLENDFACTOR_INV_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return BRW_BLENDFACTOR_INV_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return BRW_BLENDFACTOR_DST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return BRW_BLENDFACTOR_INV_DST_ALPHA;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return BRW_BLENDFACTOR_CONST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return BRW_BLENDFACTOR_INV_CONST_COLOR;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return BRW_BLENDFACTOR_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return BRW_BLENDFACTOR_INV_CONST_ALPHA;
+ default:
+ assert(0);
+ return BRW_BLENDFACTOR_ZERO;
+ }
+}
+
+static void *brw_create_blend_state( struct pipe_context *pipe,
+ const struct pipe_blend_state *templ )
+{
+ struct brw_blend_state *blend = CALLOC_STRUCT(brw_blend_state);
+ if (blend == NULL)
+ return NULL;
+
+ if (templ->logicop_enable) {
+ blend->cc2.logicop_enable = 1;
+ blend->cc5.logicop_func = translate_logicop(templ->logicop_func);
+ }
+ else if (templ->blend_enable) {
+ blend->cc6.dest_blend_factor = translate_blend_factor(templ->rgb_dst_factor);
+ blend->cc6.src_blend_factor = translate_blend_factor(templ->rgb_src_factor);
+ blend->cc6.blend_function = translate_blend_equation(templ->rgb_func);
+
+ blend->cc5.ia_dest_blend_factor = translate_blend_factor(templ->alpha_dst_factor);
+ blend->cc5.ia_src_blend_factor = translate_blend_factor(templ->alpha_src_factor);
+ blend->cc5.ia_blend_function = translate_blend_equation(templ->alpha_func);
+
+ blend->cc3.blend_enable = 1;
+ blend->cc3.ia_blend_enable =
+ (blend->cc6.dest_blend_factor != blend->cc5.ia_dest_blend_factor ||
+ blend->cc6.src_blend_factor != blend->cc5.ia_src_blend_factor ||
+ blend->cc6.blend_function != blend->cc5.ia_blend_function);
+
+ /* Per-surface blend enables, currently just follow global
+ * state:
+ */
+ blend->ss0.color_blend = 1;
+ }
+
+ blend->cc5.dither_enable = templ->dither;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ blend->cc5.statistics_enable = 1;
+
+ /* Per-surface color mask -- just follow global state:
+ */
+ blend->ss0.writedisable_red = (templ->colormask & PIPE_MASK_R) ? 0 : 1;
+ blend->ss0.writedisable_green = (templ->colormask & PIPE_MASK_G) ? 0 : 1;
+ blend->ss0.writedisable_blue = (templ->colormask & PIPE_MASK_B) ? 0 : 1;
+ blend->ss0.writedisable_alpha = (templ->colormask & PIPE_MASK_A) ? 0 : 1;
+
+ return (void *)blend;
+}
+
+static void brw_bind_blend_state(struct pipe_context *pipe,
+ void *cso)
+{
+ struct brw_context *brw = brw_context(pipe);
+ brw->curr.blend = (const struct brw_blend_state *)cso;
+ brw->state.dirty.mesa |= PIPE_NEW_BLEND;
+}
+
+static void brw_delete_blend_state(struct pipe_context *pipe,
+ void *cso)
+{
+ struct brw_context *brw = brw_context(pipe);
+ assert((const void *)cso != (const void *)brw->curr.blend);
+ FREE(cso);
+}
+
+
+static void brw_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *blend_color)
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_blend_constant_color *bcc = &brw->curr.bcc;
+
+ bcc->blend_constant_color[0] = blend_color->color[0];
+ bcc->blend_constant_color[1] = blend_color->color[1];
+ bcc->blend_constant_color[2] = blend_color->color[2];
+ bcc->blend_constant_color[3] = blend_color->color[3];
+
+ brw->state.dirty.mesa |= PIPE_NEW_BLEND_COLOR;
+}
+
+
+void brw_pipe_blend_init( struct brw_context *brw )
+{
+ brw->base.set_blend_color = brw_set_blend_color;
+ brw->base.create_blend_state = brw_create_blend_state;
+ brw->base.bind_blend_state = brw_bind_blend_state;
+ brw->base.delete_blend_state = brw_delete_blend_state;
+
+ {
+ struct brw_blend_constant_color *bcc = &brw->curr.bcc;
+
+ memset(bcc, 0, sizeof(*bcc));
+ bcc->header.opcode = CMD_BLEND_CONSTANT_COLOR;
+ bcc->header.length = sizeof(*bcc)/4-2;
+ }
+
+}
+
+void brw_pipe_blend_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c
new file mode 100644
index 00000000000..452e1e89f93
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_clear.c
@@ -0,0 +1,218 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_pack_color.h"
+
+#include "pipe/p_state.h"
+
+#include "brw_batchbuffer.h"
+#include "brw_screen.h"
+#include "brw_context.h"
+
+#define MASK16 0xffff
+#define MASK24 0xffffff
+
+
+/**
+ * Use blitting to clear the renderbuffers named by 'flags'.
+ * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field
+ * since that might include software renderbuffers or renderbuffers
+ * which we're clearing with triangles.
+ * \param mask bitmask of BUFFER_BIT_* values indicating buffers to clear
+ */
+static enum pipe_error
+try_clear( struct brw_context *brw,
+ struct brw_surface *surface,
+ unsigned value )
+{
+ uint32_t BR13, CMD;
+ int x1 = 0;
+ int y1 = 0;
+ int x2 = surface->base.width;
+ int y2 = surface->base.height;
+ int pitch = surface->pitch;
+ int cpp = surface->cpp;
+
+ if (x2 == 0 || y2 == 0)
+ return 0;
+
+ debug_printf("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
+ __FUNCTION__,
+ (void *)surface->bo, pitch * cpp,
+ surface->base.offset,
+ x1, y1, x2 - x1, y2 - y1);
+
+ BR13 = 0xf0 << 16;
+ CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_RGB | XY_BLT_WRITE_ALPHA;
+
+ /* Setup the blit command */
+ if (cpp == 4) {
+ BR13 |= BR13_8888;
+ CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+ }
+ else {
+ assert(cpp == 2);
+ BR13 |= BR13_565;
+ }
+
+ /* XXX: nasty hack for clearing depth buffers
+ */
+ if (surface->tiling == BRW_TILING_Y) {
+ x2 = pitch;
+ }
+
+ if (surface->tiling == BRW_TILING_X) {
+ CMD |= XY_DST_TILED;
+ pitch /= 4;
+ }
+
+ BR13 |= (pitch * cpp);
+
+ BEGIN_BATCH(6, 0);
+ OUT_BATCH(CMD);
+ OUT_BATCH(BR13);
+ OUT_BATCH((y1 << 16) | x1);
+ OUT_BATCH((y2 << 16) | x2);
+ OUT_RELOC(surface->bo,
+ BRW_USAGE_BLIT_DEST,
+ surface->base.offset);
+ OUT_BATCH(value);
+ ADVANCE_BATCH();
+
+ return 0;
+}
+
+
+
+
+static void color_clear(struct brw_context *brw,
+ struct brw_surface *bsurface,
+ const float *rgba )
+{
+ enum pipe_error ret;
+ union util_color value;
+
+ util_pack_color( rgba, bsurface->base.format, &value );
+
+ if (bsurface->cpp == 2)
+ value.ui |= value.ui << 16;
+
+ ret = try_clear( brw, bsurface, value.ui );
+
+ if (ret != 0) {
+ brw_context_flush( brw );
+ ret = try_clear( brw, bsurface, value.ui );
+ assert( ret == 0 );
+ }
+}
+
+static void zstencil_clear(struct brw_context *brw,
+ struct brw_surface *bsurface,
+ double depth,
+ unsigned stencil )
+{
+ enum pipe_error ret;
+ unsigned value;
+
+ switch (bsurface->base.format) {
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ value = ((unsigned)(depth * MASK24) & MASK24);
+ break;
+ case PIPE_FORMAT_Z16_UNORM:
+ value = ((unsigned)(depth * MASK16) & MASK16);
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ switch (bsurface->base.format) {
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ value = value | (stencil << 24);
+ break;
+
+ case PIPE_FORMAT_Z16_UNORM:
+ value = value | (value << 16);
+ break;
+
+ default:
+ break;
+ }
+
+ ret = try_clear( brw, bsurface, value );
+
+ if (ret != 0) {
+ brw_context_flush( brw );
+ ret = try_clear( brw, bsurface, value );
+ assert( ret == 0 );
+ }
+}
+
+
+
+/**
+ * Clear the given surface to the specified value.
+ * No masking, no scissor (clear entire buffer).
+ */
+static void brw_clear(struct pipe_context *pipe,
+ unsigned buffers,
+ const float *rgba,
+ double depth,
+ unsigned stencil)
+{
+ struct brw_context *brw = brw_context( pipe );
+ int i;
+
+ if (buffers & PIPE_CLEAR_COLOR) {
+ for (i = 0; i < brw->curr.fb.nr_cbufs; i++) {
+ color_clear( brw,
+ brw_surface(brw->curr.fb.cbufs[i]),
+ rgba );
+ }
+ }
+
+ if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
+ if (brw->curr.fb.zsbuf) {
+ zstencil_clear( brw,
+ brw_surface(brw->curr.fb.zsbuf),
+ depth, stencil );
+ }
+ }
+}
+
+
+void brw_pipe_clear_init( struct brw_context *brw )
+{
+ brw->base.clear = brw_clear;
+}
+
+
+void brw_pipe_clear_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_depth.c b/src/gallium/drivers/i965/brw_pipe_depth.c
new file mode 100644
index 00000000000..e010d76e0d3
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_depth.c
@@ -0,0 +1,172 @@
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+
+/* XXX: Fixme - include this to get IZ_ defines
+ */
+#include "brw_wm.h"
+
+static unsigned brw_translate_compare_func(unsigned func)
+{
+ switch (func) {
+ case PIPE_FUNC_NEVER:
+ return BRW_COMPAREFUNCTION_NEVER;
+ case PIPE_FUNC_LESS:
+ return BRW_COMPAREFUNCTION_LESS;
+ case PIPE_FUNC_LEQUAL:
+ return BRW_COMPAREFUNCTION_LEQUAL;
+ case PIPE_FUNC_GREATER:
+ return BRW_COMPAREFUNCTION_GREATER;
+ case PIPE_FUNC_GEQUAL:
+ return BRW_COMPAREFUNCTION_GEQUAL;
+ case PIPE_FUNC_NOTEQUAL:
+ return BRW_COMPAREFUNCTION_NOTEQUAL;
+ case PIPE_FUNC_EQUAL:
+ return BRW_COMPAREFUNCTION_EQUAL;
+ case PIPE_FUNC_ALWAYS:
+ return BRW_COMPAREFUNCTION_ALWAYS;
+ default:
+ assert(0);
+ return BRW_COMPAREFUNCTION_ALWAYS;
+ }
+}
+
+static unsigned translate_stencil_op(unsigned op)
+{
+ switch (op) {
+ case PIPE_STENCIL_OP_KEEP:
+ return BRW_STENCILOP_KEEP;
+ case PIPE_STENCIL_OP_ZERO:
+ return BRW_STENCILOP_ZERO;
+ case PIPE_STENCIL_OP_REPLACE:
+ return BRW_STENCILOP_REPLACE;
+ case PIPE_STENCIL_OP_INCR:
+ return BRW_STENCILOP_INCRSAT;
+ case PIPE_STENCIL_OP_DECR:
+ return BRW_STENCILOP_DECRSAT;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ return BRW_STENCILOP_INCR;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ return BRW_STENCILOP_DECR;
+ case PIPE_STENCIL_OP_INVERT:
+ return BRW_STENCILOP_INVERT;
+ default:
+ assert(0);
+ return BRW_STENCILOP_ZERO;
+ }
+}
+
+static void create_bcc_state( struct brw_depth_stencil_state *zstencil,
+ const struct pipe_depth_stencil_alpha_state *templ )
+{
+ if (templ->stencil[0].enabled) {
+ zstencil->cc0.stencil_enable = 1;
+ zstencil->cc0.stencil_func =
+ brw_translate_compare_func(templ->stencil[0].func);
+ zstencil->cc0.stencil_fail_op =
+ translate_stencil_op(templ->stencil[0].fail_op);
+ zstencil->cc0.stencil_pass_depth_fail_op =
+ translate_stencil_op(templ->stencil[0].zfail_op);
+ zstencil->cc0.stencil_pass_depth_pass_op =
+ translate_stencil_op(templ->stencil[0].zpass_op);
+ zstencil->cc1.stencil_ref = templ->stencil[0].ref_value;
+ zstencil->cc1.stencil_write_mask = templ->stencil[0].writemask;
+ zstencil->cc1.stencil_test_mask = templ->stencil[0].valuemask;
+
+ if (templ->stencil[1].enabled) {
+ zstencil->cc0.bf_stencil_enable = 1;
+ zstencil->cc0.bf_stencil_func =
+ brw_translate_compare_func(templ->stencil[1].func);
+ zstencil->cc0.bf_stencil_fail_op =
+ translate_stencil_op(templ->stencil[1].fail_op);
+ zstencil->cc0.bf_stencil_pass_depth_fail_op =
+ translate_stencil_op(templ->stencil[1].zfail_op);
+ zstencil->cc0.bf_stencil_pass_depth_pass_op =
+ translate_stencil_op(templ->stencil[1].zpass_op);
+ zstencil->cc1.bf_stencil_ref = templ->stencil[1].ref_value;
+ zstencil->cc2.bf_stencil_write_mask = templ->stencil[1].writemask;
+ zstencil->cc2.bf_stencil_test_mask = templ->stencil[1].valuemask;
+ }
+
+ zstencil->cc0.stencil_write_enable = (zstencil->cc1.stencil_write_mask ||
+ zstencil->cc2.bf_stencil_write_mask);
+ }
+
+
+ if (templ->alpha.enabled) {
+ zstencil->cc3.alpha_test = 1;
+ zstencil->cc3.alpha_test_func = brw_translate_compare_func(templ->alpha.func);
+ zstencil->cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+ zstencil->cc7.alpha_ref.ub[0] = float_to_ubyte(templ->alpha.ref_value);
+ }
+
+ if (templ->depth.enabled) {
+ zstencil->cc2.depth_test = 1;
+ zstencil->cc2.depth_test_function = brw_translate_compare_func(templ->depth.func);
+ zstencil->cc2.depth_write_enable = templ->depth.writemask;
+ }
+}
+
+static void create_wm_iz_state( struct brw_depth_stencil_state *zstencil )
+{
+ if (zstencil->cc3.alpha_test)
+ zstencil->iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+ if (zstencil->cc2.depth_test)
+ zstencil->iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+
+ if (zstencil->cc2.depth_write_enable)
+ zstencil->iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+
+ if (zstencil->cc0.stencil_enable)
+ zstencil->iz_lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
+
+ if (zstencil->cc0.stencil_write_enable)
+ zstencil->iz_lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
+
+}
+
+
+static void *
+brw_create_depth_stencil_state( struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *templ )
+{
+ struct brw_depth_stencil_state *zstencil = CALLOC_STRUCT(brw_depth_stencil_state);
+
+ create_bcc_state( zstencil, templ );
+ create_wm_iz_state( zstencil );
+
+ return (void *)zstencil;
+}
+
+
+static void brw_bind_depth_stencil_state(struct pipe_context *pipe,
+ void *cso)
+{
+ struct brw_context *brw = brw_context(pipe);
+ brw->curr.zstencil = (const struct brw_depth_stencil_state *)cso;
+ brw->state.dirty.mesa |= PIPE_NEW_DEPTH_STENCIL_ALPHA;
+}
+
+static void brw_delete_depth_stencil_state(struct pipe_context *pipe,
+ void *cso)
+{
+ struct brw_context *brw = brw_context(pipe);
+ assert((const void *)cso != (const void *)brw->curr.zstencil);
+ FREE(cso);
+}
+
+
+void brw_pipe_depth_stencil_init( struct brw_context *brw )
+{
+ brw->base.create_depth_stencil_alpha_state = brw_create_depth_stencil_state;
+ brw->base.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state;
+ brw->base.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state;
+}
+
+void brw_pipe_depth_stencil_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c
new file mode 100644
index 00000000000..5d4e5025f97
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_fb.c
@@ -0,0 +1,84 @@
+#include "util/u_math.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "brw_context.h"
+#include "brw_debug.h"
+
+/**
+ * called from intelDrawBuffer()
+ */
+static void brw_set_framebuffer_state( struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb )
+{
+ struct brw_context *brw = brw_context(pipe);
+ unsigned i;
+
+ /* Dimensions:
+ */
+ if (brw->curr.fb.width != fb->width ||
+ brw->curr.fb.height != fb->height) {
+ brw->curr.fb.width = fb->width;
+ brw->curr.fb.height = fb->height;
+ brw->state.dirty.mesa |= PIPE_NEW_FRAMEBUFFER_DIMENSIONS;
+ }
+
+ /* Z/Stencil
+ */
+ if (brw->curr.fb.zsbuf != fb->zsbuf) {
+ pipe_surface_reference(&brw->curr.fb.zsbuf, fb->zsbuf);
+ brw->state.dirty.mesa |= PIPE_NEW_DEPTH_BUFFER;
+ }
+
+ /* Color buffers:
+ */
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ if (brw->curr.fb.cbufs[i] != fb->cbufs[i]) {
+ brw->state.dirty.mesa |= PIPE_NEW_COLOR_BUFFERS;
+ pipe_surface_reference(&brw->curr.fb.cbufs[i], fb->cbufs[i]);
+ }
+ }
+
+ if (brw->curr.fb.nr_cbufs != fb->nr_cbufs) {
+ brw->curr.fb.nr_cbufs = MIN2(BRW_MAX_DRAW_BUFFERS, fb->nr_cbufs);
+ brw->state.dirty.mesa |= PIPE_NEW_NR_CBUFS;
+ }
+}
+
+
+static void brw_set_viewport_state( struct pipe_context *pipe,
+ const struct pipe_viewport_state *viewport )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->curr.viewport = *viewport;
+ brw->curr.ccv.min_depth = viewport->scale[2] * -1.0 + viewport->translate[2];
+ brw->curr.ccv.max_depth = viewport->scale[2] * 1.0 + viewport->translate[2];
+
+ if (0)
+ debug_printf("%s depth range %f .. %f\n",
+ __FUNCTION__,
+ brw->curr.ccv.min_depth,
+ brw->curr.ccv.max_depth);
+
+ brw->state.dirty.mesa |= PIPE_NEW_VIEWPORT;
+}
+
+
+void brw_pipe_framebuffer_init( struct brw_context *brw )
+{
+ brw->base.set_framebuffer_state = brw_set_framebuffer_state;
+ brw->base.set_viewport_state = brw_set_viewport_state;
+}
+
+void brw_pipe_framebuffer_cleanup( struct brw_context *brw )
+{
+ struct pipe_framebuffer_state *fb = &brw->curr.fb;
+ int i;
+
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ pipe_surface_reference(&fb->cbufs[i], NULL);
+ }
+
+ pipe_surface_reference(&fb->zsbuf, NULL);
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c
new file mode 100644
index 00000000000..fdc4814b221
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_flush.c
@@ -0,0 +1,83 @@
+
+#include "util/u_upload_mgr.h"
+
+#include "brw_context.h"
+#include "brw_screen.h"
+#include "brw_batchbuffer.h"
+
+
+
+/* All batchbuffer flushes must go through this function.
+ */
+void brw_context_flush( struct brw_context *brw )
+{
+ /*
+ *
+ */
+ brw_emit_query_end(brw);
+
+ /* Move to the end of the current upload buffer so that we'll force choosing
+ * a new buffer next time.
+ */
+ u_upload_flush( brw->vb.upload_vertex );
+ u_upload_flush( brw->vb.upload_index );
+
+ _brw_batchbuffer_flush( brw->batch, __FILE__, __LINE__ );
+
+ /* Mark all context state as needing to be re-emitted.
+ * This is probably not as severe as on 915, since almost all of our state
+ * is just in referenced buffers.
+ */
+ brw->state.dirty.brw |= BRW_NEW_CONTEXT;
+ brw->state.dirty.mesa |= ~0;
+ brw->state.dirty.brw |= ~0;
+ brw->state.dirty.cache |= ~0;
+
+ brw->curbe.need_new_bo = GL_TRUE;
+}
+
+static void
+brw_flush( struct pipe_context *pipe,
+ unsigned flags,
+ struct pipe_fence_handle **fence )
+{
+ brw_context_flush( brw_context( pipe ) );
+ if (fence)
+ *fence = NULL;
+}
+
+static unsigned brw_is_buffer_referenced(struct pipe_context *pipe,
+ struct pipe_buffer *buffer)
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_screen *bscreen = brw_screen(brw->base.screen);
+
+ return brw_is_buffer_referenced_by_bo( bscreen,
+ buffer,
+ brw->batch->buf );
+}
+
+static unsigned brw_is_texture_referenced(struct pipe_context *pipe,
+ struct pipe_texture *texture,
+ unsigned face,
+ unsigned level)
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_screen *bscreen = brw_screen(brw->base.screen);
+
+ return brw_is_texture_referenced_by_bo( bscreen,
+ texture, face, level,
+ brw->batch->buf );
+}
+
+void brw_pipe_flush_init( struct brw_context *brw )
+{
+ brw->base.flush = brw_flush;
+ brw->base.is_buffer_referenced = brw_is_buffer_referenced;
+ brw->base.is_texture_referenced = brw_is_texture_referenced;
+}
+
+
+void brw_pipe_flush_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_misc.c b/src/gallium/drivers/i965/brw_pipe_misc.c
new file mode 100644
index 00000000000..30359078079
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_misc.c
@@ -0,0 +1,54 @@
+
+#include "brw_context.h"
+#include "brw_structs.h"
+#include "brw_defines.h"
+
+static void brw_set_polygon_stipple( struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stip )
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_polygon_stipple *bps = &brw->curr.bps;
+ GLuint i;
+
+ memset(bps, 0, sizeof *bps);
+ bps->header.opcode = CMD_POLY_STIPPLE_PATTERN;
+ bps->header.length = sizeof *bps/4-2;
+
+ for (i = 0; i < 32; i++)
+ bps->stipple[i] = stip->stipple[i]; /* don't invert */
+
+ brw->state.dirty.mesa |= PIPE_NEW_POLYGON_STIPPLE;
+}
+
+
+static void brw_set_scissor_state( struct pipe_context *pipe,
+ const struct pipe_scissor_state *scissor )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->curr.scissor = *scissor;
+ brw->state.dirty.mesa |= PIPE_NEW_SCISSOR;
+}
+
+
+static void brw_set_clip_state( struct pipe_context *pipe,
+ const struct pipe_clip_state *clip )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->curr.ucp = *clip;
+ brw->state.dirty.mesa |= PIPE_NEW_CLIP;
+}
+
+
+void brw_pipe_misc_init( struct brw_context *brw )
+{
+ brw->base.set_polygon_stipple = brw_set_polygon_stipple;
+ brw->base.set_scissor_state = brw_set_scissor_state;
+ brw->base.set_clip_state = brw_set_clip_state;
+}
+
+
+void brw_pipe_misc_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c
new file mode 100644
index 00000000000..2eb862635cc
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_query.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file support for ARB_query_object
+ *
+ * ARB_query_object is implemented by using the PIPE_CONTROL command to stall
+ * execution on the completion of previous depth tests, and write the
+ * current PS_DEPTH_COUNT to a buffer object.
+ *
+ * We use before and after counts when drawing during a query so that
+ * we don't pick up other clients' query data in ours. To reduce overhead,
+ * a single BO is used to record the query data for all active queries at
+ * once. This also gives us a simple bound on how much batchbuffer space is
+ * required for handling queries, so that we can be sure that we won't
+ * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT.
+ */
+#include "util/u_simple_list.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_batchbuffer.h"
+#include "brw_reg.h"
+
+/** Waits on the query object's BO and totals the results for this query */
+static boolean
+brw_query_get_result(struct pipe_context *pipe,
+ struct pipe_query *q,
+ boolean wait,
+ uint64_t *result)
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ /* Map and count the pixels from the current query BO */
+ if (query->bo) {
+ int i;
+ uint64_t *map;
+
+ if (brw->sws->bo_is_busy(query->bo) && !wait)
+ return FALSE;
+
+ map = bo_map_read(brw->sws, query->bo);
+ if (map == NULL)
+ return FALSE;
+
+ for (i = query->first_index; i <= query->last_index; i++) {
+ query->result += map[i * 2 + 1] - map[i * 2];
+ }
+
+ brw->sws->bo_unmap(query->bo);
+ bo_reference(&query->bo, NULL);
+ }
+
+ *result = query->result;
+ return TRUE;
+}
+
+static struct pipe_query *
+brw_query_create(struct pipe_context *pipe, unsigned type )
+{
+ struct brw_query_object *query;
+
+ switch (type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ query = CALLOC_STRUCT( brw_query_object );
+ if (query == NULL)
+ return NULL;
+ return (struct pipe_query *)query;
+
+ default:
+ return NULL;
+ }
+}
+
+static void
+brw_query_destroy(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ bo_reference(&query->bo, NULL);
+ FREE(query);
+}
+
+static void
+brw_query_begin(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ /* Reset our driver's tracking of query state. */
+ bo_reference(&query->bo, NULL);
+ query->result = 0;
+ query->first_index = -1;
+ query->last_index = -1;
+
+ insert_at_head(&brw->query.active_head, query);
+ brw->query.stats_wm++;
+ brw->state.dirty.mesa |= PIPE_NEW_QUERY;
+}
+
+static void
+brw_query_end(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ /* Flush the batchbuffer in case it has writes to our query BO.
+ * Have later queries write to a new query BO so that further rendering
+ * doesn't delay the collection of our results.
+ */
+ if (query->bo) {
+ brw_emit_query_end(brw);
+ brw_context_flush( brw );
+
+ bo_reference(&brw->query.bo, NULL);
+ }
+
+ remove_from_list(query);
+ brw->query.stats_wm--;
+ brw->state.dirty.mesa |= PIPE_NEW_QUERY;
+}
+
+/***********************************************************************
+ * Internal functions and callbacks to implement queries
+ */
+
+/** Called to set up the query BO and account for its aperture space */
+enum pipe_error
+brw_prepare_query_begin(struct brw_context *brw)
+{
+ enum pipe_error ret;
+
+ /* Skip if we're not doing any queries. */
+ if (is_empty_list(&brw->query.active_head))
+ return PIPE_OK;
+
+ /* Get a new query BO if we're going to need it. */
+ if (brw->query.bo == NULL ||
+ brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) {
+
+ ret = brw->sws->bo_alloc(brw->sws, BRW_BUFFER_TYPE_QUERY, 4096, 1,
+ &brw->query.bo);
+ if (ret)
+ return ret;
+
+ brw->query.index = 0;
+ }
+
+ brw_add_validated_bo(brw, brw->query.bo);
+
+ return PIPE_OK;
+}
+
+/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */
+void
+brw_emit_query_begin(struct brw_context *brw)
+{
+ struct brw_query_object *query;
+
+ /* Skip if we're not doing any queries, or we've emitted the start. */
+ if (brw->query.active || is_empty_list(&brw->query.active_head))
+ return;
+
+ BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT);
+ /* This object could be mapped cacheable, but we don't have an exposed
+ * mechanism to support that. Since it's going uncached, tell GEM that
+ * we're writing to it. The usual clflush should be all that's required
+ * to pick up the results.
+ */
+ OUT_RELOC(brw->query.bo,
+ BRW_USAGE_QUERY_RESULT,
+ PIPE_CONTROL_GLOBAL_GTT_WRITE |
+ ((brw->query.index * 2) * sizeof(uint64_t)));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ foreach(query, &brw->query.active_head) {
+ if (query->bo != brw->query.bo) {
+ uint64_t tmp;
+
+ /* Propogate the results from this buffer to all of the
+ * active queries, as the bo is going away.
+ */
+ if (query->bo != NULL)
+ brw_query_get_result( &brw->base,
+ (struct pipe_query *)query,
+ FALSE,
+ &tmp );
+
+ bo_reference( &query->bo, brw->query.bo );
+ query->first_index = brw->query.index;
+ }
+ query->last_index = brw->query.index;
+ }
+ brw->query.active = GL_TRUE;
+}
+
+/** Called at batchbuffer flush to get an ending PS_DEPTH_COUNT */
+void
+brw_emit_query_end(struct brw_context *brw)
+{
+ if (!brw->query.active)
+ return;
+
+ BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT);
+ OUT_RELOC(brw->query.bo,
+ BRW_USAGE_QUERY_RESULT,
+ PIPE_CONTROL_GLOBAL_GTT_WRITE |
+ ((brw->query.index * 2 + 1) * sizeof(uint64_t)));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ brw->query.active = GL_FALSE;
+ brw->query.index++;
+}
+
+void brw_pipe_query_init( struct brw_context *brw )
+{
+ brw->base.create_query = brw_query_create;
+ brw->base.destroy_query = brw_query_destroy;
+ brw->base.begin_query = brw_query_begin;
+ brw->base.end_query = brw_query_end;
+ brw->base.get_query_result = brw_query_get_result;
+}
+
+
+void brw_pipe_query_cleanup( struct brw_context *brw )
+{
+ /* Unreference brw->query.bo ??
+ */
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c
new file mode 100644
index 00000000000..2117e91a9e4
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_rast.c
@@ -0,0 +1,161 @@
+
+#include "util/u_memory.h"
+#include "pipe/p_defines.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_pipe_rast.h"
+#include "brw_wm.h"
+
+
+static unsigned translate_fill( unsigned fill )
+{
+ switch (fill) {
+ case PIPE_POLYGON_MODE_FILL:
+ return CLIP_FILL;
+ case PIPE_POLYGON_MODE_LINE:
+ return CLIP_LINE;
+ case PIPE_POLYGON_MODE_POINT:
+ return CLIP_POINT;
+ default:
+ assert(0);
+ return CLIP_FILL;
+ }
+}
+
+
+/* Calculates the key for triangle-mode clipping. Non-triangle
+ * clipping keys use much less information and are computed on the
+ * fly.
+ */
+static void
+calculate_clip_key_rast( const struct brw_context *brw,
+ const struct pipe_rasterizer_state *templ,
+ const struct brw_rasterizer_state *rast,
+ struct brw_clip_prog_key *key)
+{
+ memset(key, 0, sizeof *key);
+
+ if (brw->chipset.is_igdng)
+ key->clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
+ else
+ key->clip_mode = BRW_CLIPMODE_NORMAL;
+
+ key->do_flat_shading = templ->flatshade;
+
+ if (templ->cull_mode == PIPE_WINDING_BOTH) {
+ key->clip_mode = BRW_CLIPMODE_REJECT_ALL;
+ return;
+ }
+
+ key->fill_ccw = CLIP_CULL;
+ key->fill_cw = CLIP_CULL;
+
+ if (!(templ->cull_mode & PIPE_WINDING_CCW)) {
+ key->fill_ccw = translate_fill(templ->fill_ccw);
+ }
+
+ if (!(templ->cull_mode & PIPE_WINDING_CW)) {
+ key->fill_cw = translate_fill(templ->fill_cw);
+ }
+
+ if (key->fill_cw == CLIP_LINE ||
+ key->fill_ccw == CLIP_LINE ||
+ key->fill_cw == CLIP_POINT ||
+ key->fill_ccw == CLIP_POINT) {
+ key->do_unfilled = 1;
+ key->clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;
+ }
+
+ key->offset_ccw = templ->offset_ccw;
+ key->offset_cw = templ->offset_cw;
+
+ if (templ->light_twoside && key->fill_cw != CLIP_CULL)
+ key->copy_bfc_cw = 1;
+
+ if (templ->light_twoside && key->fill_ccw != CLIP_CULL)
+ key->copy_bfc_ccw = 1;
+}
+
+
+static void
+calculate_line_stipple_rast( const struct pipe_rasterizer_state *templ,
+ struct brw_line_stipple *bls )
+{
+ GLfloat tmp = 1.0f / (templ->line_stipple_factor + 1);
+ GLint tmpi = tmp * (1<<13);
+
+ bls->header.opcode = CMD_LINE_STIPPLE_PATTERN;
+ bls->header.length = sizeof(*bls)/4 - 2;
+ bls->bits0.pattern = templ->line_stipple_pattern;
+ bls->bits1.repeat_count = templ->line_stipple_factor + 1;
+ bls->bits1.inverse_repeat_count = tmpi;
+}
+
+static void *brw_create_rasterizer_state( struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *templ )
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_rasterizer_state *rast;
+
+ rast = CALLOC_STRUCT(brw_rasterizer_state);
+ if (rast == NULL)
+ return NULL;
+
+ rast->templ = *templ;
+
+ calculate_clip_key_rast( brw, templ, rast, &rast->clip_key );
+
+ if (templ->line_stipple_enable)
+ calculate_line_stipple_rast( templ, &rast->bls );
+
+ /* Caclculate lookup value for WM IZ table.
+ */
+ if (templ->line_smooth) {
+ if (templ->fill_cw == PIPE_POLYGON_MODE_LINE &&
+ templ->fill_ccw == PIPE_POLYGON_MODE_LINE) {
+ rast->unfilled_aa_line = AA_ALWAYS;
+ }
+ else if (templ->fill_cw == PIPE_POLYGON_MODE_LINE ||
+ templ->fill_ccw == PIPE_POLYGON_MODE_LINE) {
+ rast->unfilled_aa_line = AA_SOMETIMES;
+ }
+ else {
+ rast->unfilled_aa_line = AA_NEVER;
+ }
+ }
+ else {
+ rast->unfilled_aa_line = AA_NEVER;
+ }
+
+ return (void *)rast;
+}
+
+
+static void brw_bind_rasterizer_state(struct pipe_context *pipe,
+ void *cso)
+{
+ struct brw_context *brw = brw_context(pipe);
+ brw->curr.rast = (const struct brw_rasterizer_state *)cso;
+ brw->state.dirty.mesa |= PIPE_NEW_RAST;
+}
+
+static void brw_delete_rasterizer_state(struct pipe_context *pipe,
+ void *cso)
+{
+ struct brw_context *brw = brw_context(pipe);
+ assert((const void *)cso != (const void *)brw->curr.rast);
+ FREE(cso);
+}
+
+
+
+void brw_pipe_rast_init( struct brw_context *brw )
+{
+ brw->base.create_rasterizer_state = brw_create_rasterizer_state;
+ brw->base.bind_rasterizer_state = brw_bind_rasterizer_state;
+ brw->base.delete_rasterizer_state = brw_delete_rasterizer_state;
+}
+
+void brw_pipe_rast_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_rast.h b/src/gallium/drivers/i965/brw_pipe_rast.h
new file mode 100644
index 00000000000..9354f01e18a
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_rast.h
@@ -0,0 +1,16 @@
+#ifndef BRW_PIPE_RAST_H
+#define BRW_PIPE_RAST_H
+
+#include "brw_clip.h"
+
+struct brw_rasterizer_state {
+ struct pipe_rasterizer_state templ; /* for draw module */
+
+ /* Precalculated hardware state:
+ */
+ struct brw_clip_prog_key clip_key;
+ struct brw_line_stipple bls;
+ unsigned unfilled_aa_line;
+};
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c
new file mode 100644
index 00000000000..81712798a5d
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_sampler.c
@@ -0,0 +1,231 @@
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_debug.h"
+
+
+
+/* The brw (and related graphics cores) do not support GL_CLAMP. The
+ * Intel drivers for "other operating systems" implement GL_CLAMP as
+ * GL_CLAMP_TO_EDGE, so the same is done here.
+ */
+static GLuint translate_wrap_mode( unsigned wrap )
+{
+ switch( wrap ) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return BRW_TEXCOORDMODE_WRAP;
+
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return BRW_TEXCOORDMODE_CLAMP;
+
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return BRW_TEXCOORDMODE_CLAMP_BORDER;
+
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return BRW_TEXCOORDMODE_MIRROR;
+
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return BRW_TEXCOORDMODE_MIRROR_ONCE;
+
+ default:
+ return BRW_TEXCOORDMODE_WRAP;
+ }
+}
+
+static GLuint translate_img_filter( unsigned filter )
+{
+ switch (filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ return BRW_MAPFILTER_NEAREST;
+ case PIPE_TEX_FILTER_LINEAR:
+ return BRW_MAPFILTER_LINEAR;
+ default:
+ assert(0);
+ return BRW_MAPFILTER_NEAREST;
+ }
+}
+
+static GLuint translate_mip_filter( unsigned filter )
+{
+ switch (filter) {
+ case PIPE_TEX_MIPFILTER_NONE:
+ return BRW_MIPFILTER_NONE;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ return BRW_MIPFILTER_NEAREST;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ return BRW_MIPFILTER_LINEAR;
+ default:
+ assert(0);
+ return BRW_MIPFILTER_NONE;
+ }
+}
+
+/* XXX: not sure why there are special translations for the shadow tex
+ * compare functions. In particular ALWAYS is translated to NEVER.
+ * Is this a hardware issue? Does i965 really suffer from this?
+ */
+static GLuint translate_shadow_compare_func( unsigned func )
+{
+ switch (func) {
+ case PIPE_FUNC_NEVER:
+ return BRW_COMPAREFUNCTION_ALWAYS;
+ case PIPE_FUNC_LESS:
+ return BRW_COMPAREFUNCTION_LEQUAL;
+ case PIPE_FUNC_LEQUAL:
+ return BRW_COMPAREFUNCTION_LESS;
+ case PIPE_FUNC_GREATER:
+ return BRW_COMPAREFUNCTION_GEQUAL;
+ case PIPE_FUNC_GEQUAL:
+ return BRW_COMPAREFUNCTION_GREATER;
+ case PIPE_FUNC_NOTEQUAL:
+ return BRW_COMPAREFUNCTION_EQUAL;
+ case PIPE_FUNC_EQUAL:
+ return BRW_COMPAREFUNCTION_NOTEQUAL;
+ case PIPE_FUNC_ALWAYS:
+ return BRW_COMPAREFUNCTION_NEVER;
+ default:
+ assert(0);
+ return BRW_COMPAREFUNCTION_NEVER;
+ }
+}
+
+
+
+
+static void *
+brw_create_sampler_state( struct pipe_context *pipe,
+ const struct pipe_sampler_state *template )
+{
+ struct brw_sampler *sampler = CALLOC_STRUCT(brw_sampler);
+
+ sampler->ss0.min_filter = translate_img_filter( template->min_img_filter );
+ sampler->ss0.mag_filter = translate_img_filter( template->mag_img_filter );
+ sampler->ss0.mip_filter = translate_mip_filter( template->min_mip_filter );
+
+
+ /* XXX: anisotropy logic slightly changed:
+ */
+ if (template->max_anisotropy > 1.0) {
+ sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC;
+ sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
+
+ if (template->max_anisotropy > 2.0) {
+ sampler->ss3.max_aniso = MIN2((template->max_anisotropy - 2) / 2,
+ BRW_ANISORATIO_16);
+ }
+ }
+
+ sampler->ss1.r_wrap_mode = translate_wrap_mode(template->wrap_r);
+ sampler->ss1.s_wrap_mode = translate_wrap_mode(template->wrap_s);
+ sampler->ss1.t_wrap_mode = translate_wrap_mode(template->wrap_t);
+
+ /* Set LOD bias:
+ */
+ sampler->ss0.lod_bias =
+ util_signed_fixed(CLAMP(template->lod_bias, -16, 15), 6);
+
+
+ sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
+ sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
+
+ /* Set shadow function:
+ */
+ if (template->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+
+ /* Shadowing is "enabled" by emitting a particular sampler
+ * message (sample_c). So need to recompile WM program when
+ * shadow comparison is enabled on each/any texture unit.
+ */
+ sampler->ss0.shadow_function =
+ translate_shadow_compare_func(template->compare_func);
+ }
+
+ /* Set BaseMipLevel, MaxLOD, MinLOD:
+ */
+ sampler->ss0.base_level =
+ util_unsigned_fixed(0, 1);
+
+ sampler->ss1.max_lod =
+ util_unsigned_fixed(CLAMP(template->max_lod, 0, 13), 6);
+
+ sampler->ss1.min_lod =
+ util_unsigned_fixed(CLAMP(template->min_lod, 0, 13), 6);
+
+ return (void *)sampler;
+}
+
+static void brw_bind_sampler_state(struct pipe_context *pipe,
+ unsigned num, void **sampler)
+{
+ struct brw_context *brw = brw_context(pipe);
+ int i;
+
+ for (i = 0; i < num; i++)
+ brw->curr.sampler[i] = sampler[i];
+
+ for (i = num; i < brw->curr.num_samplers; i++)
+ brw->curr.sampler[i] = NULL;
+
+ brw->curr.num_samplers = num;
+ brw->state.dirty.mesa |= PIPE_NEW_SAMPLERS;
+}
+
+static void brw_delete_sampler_state(struct pipe_context *pipe,
+ void *cso)
+{
+ FREE(cso);
+}
+
+static void brw_set_sampler_textures(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_texture **texture)
+{
+ struct brw_context *brw = brw_context(pipe);
+ int i;
+
+ for (i = 0; i < num; i++)
+ pipe_texture_reference(&brw->curr.texture[i], texture[i]);
+
+ for (i = num; i < brw->curr.num_textures; i++)
+ pipe_texture_reference(&brw->curr.texture[i], NULL);
+
+ brw->curr.num_textures = num;
+ brw->state.dirty.mesa |= PIPE_NEW_BOUND_TEXTURES;
+}
+
+static void brw_set_vertex_sampler_textures(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_texture **texture)
+{
+}
+
+static void brw_bind_vertex_sampler_state(struct pipe_context *pipe,
+ unsigned num, void **sampler)
+{
+}
+
+
+void brw_pipe_sampler_init( struct brw_context *brw )
+{
+ brw->base.create_sampler_state = brw_create_sampler_state;
+ brw->base.delete_sampler_state = brw_delete_sampler_state;
+
+ brw->base.set_fragment_sampler_textures = brw_set_sampler_textures;
+ brw->base.bind_fragment_sampler_states = brw_bind_sampler_state;
+
+ brw->base.set_vertex_sampler_textures = brw_set_vertex_sampler_textures;
+ brw->base.bind_vertex_sampler_states = brw_bind_vertex_sampler_state;
+
+}
+void brw_pipe_sampler_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
new file mode 100644
index 00000000000..bb32d90e331
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -0,0 +1,303 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+
+#include "brw_context.h"
+#include "brw_util.h"
+#include "brw_wm.h"
+
+
+/**
+ * Determine if the given shader uses complex features such as flow
+ * conditionals, loops, subroutines.
+ */
+static GLboolean has_flow_control(const struct tgsi_shader_info *info)
+{
+ return (info->opcode_count[TGSI_OPCODE_ARL] > 0 ||
+ info->opcode_count[TGSI_OPCODE_IF] > 0 ||
+ info->opcode_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */
+ info->opcode_count[TGSI_OPCODE_CAL] > 0 ||
+ info->opcode_count[TGSI_OPCODE_BRK] > 0 || /* redundant - BGNLOOP */
+ info->opcode_count[TGSI_OPCODE_RET] > 0 || /* redundant - CAL */
+ info->opcode_count[TGSI_OPCODE_BGNLOOP] > 0);
+}
+
+
+static void scan_immediates(const struct tgsi_token *tokens,
+ const struct tgsi_shader_info *info,
+ struct brw_immediate_data *imm)
+{
+ struct tgsi_parse_context parse;
+ boolean done = FALSE;
+
+ imm->nr = 0;
+ imm->data = MALLOC(info->immediate_count * 4 * sizeof(float));
+
+ tgsi_parse_init( &parse, tokens );
+ while (!tgsi_parse_end_of_tokens( &parse ) && !done) {
+ tgsi_parse_token( &parse );
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE: {
+ static const float id[4] = {0,0,0,1};
+ const float *value = &parse.FullToken.FullImmediate.u[0].Float;
+ unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+ unsigned i;
+
+ for (i = 0; i < size; i++)
+ imm->data[imm->nr][i] = value[i];
+
+ for (; i < 4; i++)
+ imm->data[imm->nr][i] = id[i];
+
+ imm->nr++;
+ break;
+ }
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ done = 1;
+ break;
+ }
+ }
+}
+
+
+static void brw_bind_fs_state( struct pipe_context *pipe, void *prog )
+{
+ struct brw_fragment_shader *fs = (struct brw_fragment_shader *)prog;
+ struct brw_context *brw = brw_context(pipe);
+
+ if (brw->curr.fragment_shader == fs)
+ return;
+
+ if (brw->curr.fragment_shader == NULL ||
+ fs == NULL ||
+ memcmp(&brw->curr.fragment_shader->signature, &fs->signature,
+ brw_fs_signature_size(&fs->signature)) != 0) {
+ brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_SIGNATURE;
+ }
+
+ brw->curr.fragment_shader = fs;
+ brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_SHADER;
+}
+
+static void brw_bind_vs_state( struct pipe_context *pipe, void *prog )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ brw->curr.vertex_shader = (struct brw_vertex_shader *)prog;
+ brw->state.dirty.mesa |= PIPE_NEW_VERTEX_SHADER;
+}
+
+
+
+static void *brw_create_fs_state( struct pipe_context *pipe,
+ const struct pipe_shader_state *shader )
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_fragment_shader *fs;
+ int i;
+
+ fs = CALLOC_STRUCT(brw_fragment_shader);
+ if (fs == NULL)
+ return NULL;
+
+ /* Duplicate tokens, scan shader
+ */
+ fs->id = brw->program_id++;
+ fs->has_flow_control = has_flow_control(&fs->info);
+
+ fs->tokens = tgsi_dup_tokens(shader->tokens);
+ if (fs->tokens == NULL)
+ goto fail;
+
+ tgsi_scan_shader(fs->tokens, &fs->info);
+ scan_immediates(fs->tokens, &fs->info, &fs->immediates);
+
+ fs->signature.nr_inputs = fs->info.num_inputs;
+ for (i = 0; i < fs->info.num_inputs; i++) {
+ fs->signature.input[i].interp = fs->info.input_interpolate[i];
+ fs->signature.input[i].semantic = fs->info.input_semantic_name[i];
+ fs->signature.input[i].semantic_index = fs->info.input_semantic_index[i];
+ }
+
+ for (i = 0; i < fs->info.num_inputs; i++)
+ if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION)
+ fs->uses_depth = 1;
+
+ if (fs->info.uses_kill)
+ fs->iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+ if (fs->info.writes_z)
+ fs->iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+
+ return (void *)fs;
+
+fail:
+ FREE(fs);
+ return NULL;
+}
+
+
+static void *brw_create_vs_state( struct pipe_context *pipe,
+ const struct pipe_shader_state *shader )
+{
+ struct brw_context *brw = brw_context(pipe);
+ struct brw_vertex_shader *vs;
+ unsigned i;
+
+ vs = CALLOC_STRUCT(brw_vertex_shader);
+ if (vs == NULL)
+ return NULL;
+
+ /* Duplicate tokens, scan shader
+ */
+ vs->tokens = tgsi_dup_tokens(shader->tokens);
+ if (vs->tokens == NULL)
+ goto fail;
+
+ tgsi_scan_shader(vs->tokens, &vs->info);
+ scan_immediates(vs->tokens, &vs->info, &vs->immediates);
+
+ vs->id = brw->program_id++;
+ vs->has_flow_control = has_flow_control(&vs->info);
+
+ vs->output_hpos = BRW_OUTPUT_NOT_PRESENT;
+ vs->output_color0 = BRW_OUTPUT_NOT_PRESENT;
+ vs->output_color1 = BRW_OUTPUT_NOT_PRESENT;
+ vs->output_bfc0 = BRW_OUTPUT_NOT_PRESENT;
+ vs->output_bfc1 = BRW_OUTPUT_NOT_PRESENT;
+ vs->output_edgeflag = BRW_OUTPUT_NOT_PRESENT;
+
+ for (i = 0; i < vs->info.num_outputs; i++) {
+ int index = vs->info.output_semantic_index[i];
+ switch (vs->info.output_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ vs->output_hpos = i;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (index == 0)
+ vs->output_color0 = i;
+ else
+ vs->output_color1 = i;
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ if (index == 0)
+ vs->output_bfc0 = i;
+ else
+ vs->output_bfc1 = i;
+ break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ vs->output_edgeflag = i;
+ break;
+ }
+ }
+
+
+ /* Done:
+ */
+ return (void *)vs;
+
+fail:
+ FREE(vs);
+ return NULL;
+}
+
+
+static void brw_delete_fs_state( struct pipe_context *pipe, void *prog )
+{
+ struct brw_fragment_shader *fs = (struct brw_fragment_shader *)prog;
+
+ bo_reference(&fs->const_buffer, NULL);
+ FREE( (void *)fs->tokens );
+ FREE( fs );
+}
+
+
+static void brw_delete_vs_state( struct pipe_context *pipe, void *prog )
+{
+ struct brw_fragment_shader *vs = (struct brw_fragment_shader *)prog;
+
+ /* Delete draw shader
+ */
+ FREE( (void *)vs->tokens );
+ FREE( vs );
+}
+
+
+static void brw_set_constant_buffer(struct pipe_context *pipe,
+ uint shader, uint index,
+ const struct pipe_constant_buffer *buf)
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ assert(index == 0);
+
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ pipe_buffer_reference( &brw->curr.fragment_constants,
+ buf->buffer );
+
+ brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_CONSTANTS;
+ }
+ else {
+ pipe_buffer_reference( &brw->curr.vertex_constants,
+ buf->buffer );
+
+ brw->state.dirty.mesa |= PIPE_NEW_VERTEX_CONSTANTS;
+ }
+}
+
+
+void brw_pipe_shader_init( struct brw_context *brw )
+{
+ brw->base.set_constant_buffer = brw_set_constant_buffer;
+
+ brw->base.create_vs_state = brw_create_vs_state;
+ brw->base.bind_vs_state = brw_bind_vs_state;
+ brw->base.delete_vs_state = brw_delete_vs_state;
+
+ brw->base.create_fs_state = brw_create_fs_state;
+ brw->base.bind_fs_state = brw_bind_fs_state;
+ brw->base.delete_fs_state = brw_delete_fs_state;
+}
+
+void brw_pipe_shader_cleanup( struct brw_context *brw )
+{
+ pipe_buffer_reference( &brw->curr.fragment_constants, NULL );
+ pipe_buffer_reference( &brw->curr.vertex_constants, NULL );
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c
new file mode 100644
index 00000000000..e3c48e31493
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_vertex.c
@@ -0,0 +1,71 @@
+#include "brw_context.h"
+
+
+static void brw_set_vertex_elements( struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_element *elements )
+{
+ struct brw_context *brw = brw_context(pipe);
+
+ memcpy(brw->curr.vertex_element, elements, count * sizeof(elements[0]));
+ brw->curr.num_vertex_elements = count;
+
+ brw->state.dirty.mesa |= PIPE_NEW_VERTEX_ELEMENT;
+}
+
+
+static void brw_set_vertex_buffers(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_buffer *buffers)
+{
+ struct brw_context *brw = brw_context(pipe);
+ unsigned i;
+
+ /* Check for no change */
+ if (count == brw->curr.num_vertex_buffers &&
+ memcmp(brw->curr.vertex_buffer,
+ buffers,
+ count * sizeof buffers[0]) == 0)
+ return;
+
+ /* Adjust refcounts */
+ for (i = 0; i < count; i++)
+ pipe_buffer_reference(&brw->curr.vertex_buffer[i].buffer,
+ buffers[i].buffer);
+
+ for ( ; i < brw->curr.num_vertex_buffers; i++)
+ pipe_buffer_reference(&brw->curr.vertex_buffer[i].buffer,
+ NULL);
+
+ /* Copy remaining data */
+ memcpy(brw->curr.vertex_buffer, buffers, count * sizeof buffers[0]);
+ brw->curr.num_vertex_buffers = count;
+
+ brw->state.dirty.mesa |= PIPE_NEW_VERTEX_BUFFER;
+}
+
+
+void
+brw_pipe_vertex_init( struct brw_context *brw )
+{
+ brw->base.set_vertex_buffers = brw_set_vertex_buffers;
+ brw->base.set_vertex_elements = brw_set_vertex_elements;
+}
+
+
+void
+brw_pipe_vertex_cleanup( struct brw_context *brw )
+{
+
+ /* Release bound pipe vertex_buffers
+ */
+
+ /* Release some other stuff
+ */
+#if 0
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ bo_reference(&brw->vb.inputs[i].bo, NULL);
+ brw->vb.inputs[i].bo = NULL;
+ }
+#endif
+}
diff --git a/src/gallium/drivers/i965/brw_reg.h b/src/gallium/drivers/i965/brw_reg.h
new file mode 100644
index 00000000000..a63403b6afd
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_reg.h
@@ -0,0 +1,115 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef BRW_REG_H
+#define BRW_REG_H
+
+#define CMD_MI (0x0 << 29)
+#define CMD_2D (0x2 << 29)
+#define CMD_3D (0x3 << 29)
+
+#define MI_NOOP (CMD_MI | 0)
+#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23)
+#define MI_FLUSH (CMD_MI | (4 << 23))
+
+#define _3DSTATE_DRAWRECT_INFO_I965 (CMD_3D | (3 << 27) | (1 << 24) | 0x2)
+
+/** @{
+ *
+ * PIPE_CONTROL operation, a combination MI_FLUSH and register write with
+ * additional flushing control.
+ */
+#define _3DSTATE_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24) | 2)
+#define PIPE_CONTROL_NO_WRITE (0 << 14)
+#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14)
+#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14)
+#define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14)
+#define PIPE_CONTROL_DEPTH_STALL (1 << 13)
+#define PIPE_CONTROL_WRITE_FLUSH (1 << 12)
+#define PIPE_CONTROL_INSTRUCTION_FLUSH (1 << 11)
+#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8)
+#define PIPE_CONTROL_PPGTT_WRITE (0 << 2)
+#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2)
+
+/** @} */
+
+#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22) | 6)
+#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 4)
+#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22) | 6)
+
+/* BR00 */
+#define XY_BLT_WRITE_ALPHA (1 << 21)
+#define XY_BLT_WRITE_RGB (1 << 20)
+#define XY_SRC_TILED (1 << 15)
+#define XY_DST_TILED (1 << 11)
+
+/* BR13 */
+#define BR13_565 (0x1 << 24)
+#define BR13_8888 (0x3 << 24)
+
+#define FENCE_LINEAR 0
+#define FENCE_XMAJOR 1
+#define FENCE_YMAJOR 2
+
+
+
+/* PCI IDs
+ */
+#define PCI_CHIP_I965_G 0x29A2
+#define PCI_CHIP_I965_Q 0x2992
+#define PCI_CHIP_I965_G_1 0x2982
+#define PCI_CHIP_I946_GZ 0x2972
+#define PCI_CHIP_I965_GM 0x2A02
+#define PCI_CHIP_I965_GME 0x2A12
+
+#define PCI_CHIP_GM45_GM 0x2A42
+
+#define PCI_CHIP_IGD_E_G 0x2E02
+#define PCI_CHIP_Q45_G 0x2E12
+#define PCI_CHIP_G45_G 0x2E22
+#define PCI_CHIP_G41_G 0x2E32
+#define PCI_CHIP_B43_G 0x2E42
+
+#define PCI_CHIP_ILD_G 0x0042
+#define PCI_CHIP_ILM_G 0x0046
+
+struct brw_chipset {
+ unsigned pci_id:16;
+ unsigned is_965:1;
+ unsigned is_igdng:1;
+ unsigned is_g4x:1;
+ unsigned pad:13;
+};
+
+
+/* XXX: hacks
+ */
+#define VERT_RESULT_HPOS 0 /* not always true */
+#define VERT_RESULT_PSIZ 10000 /* disabled */
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
new file mode 100644
index 00000000000..0ecacac9a3a
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -0,0 +1,403 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
+
+#include "brw_reg.h"
+#include "brw_context.h"
+#include "brw_screen.h"
+#include "brw_winsys.h"
+#include "brw_debug.h"
+
+#ifdef DEBUG
+static const struct debug_named_value debug_names[] = {
+ { "tex", DEBUG_TEXTURE},
+ { "state", DEBUG_STATE},
+ { "ioctl", DEBUG_IOCTL},
+ { "blit", DEBUG_BLIT},
+ { "curbe", DEBUG_CURBE},
+ { "fall", DEBUG_FALLBACKS},
+ { "verb", DEBUG_VERBOSE},
+ { "bat", DEBUG_BATCH},
+ { "pix", DEBUG_PIXEL},
+ { "wins", DEBUG_WINSYS},
+ { "min", DEBUG_MIN_URB},
+ { "dis", DEBUG_DISASSEM},
+ { "sync", DEBUG_SYNC},
+ { "prim", DEBUG_PRIMS },
+ { "vert", DEBUG_VERTS },
+ { "dma", DEBUG_DMA },
+ { "san", DEBUG_SANITY },
+ { "sleep", DEBUG_SLEEP },
+ { "stats", DEBUG_STATS },
+ { "sing", DEBUG_SINGLE_THREAD },
+ { "thre", DEBUG_SINGLE_THREAD },
+ { "wm", DEBUG_WM },
+ { "urb", DEBUG_URB },
+ { "vs", DEBUG_VS },
+ { NULL, 0 }
+};
+
+static const struct debug_named_value dump_names[] = {
+ { "asm", DUMP_ASM},
+ { "state", DUMP_STATE},
+ { "batch", DUMP_BATCH},
+ { NULL, 0 }
+};
+
+int BRW_DEBUG = 0;
+int BRW_DUMP = 0;
+
+#endif
+
+
+/*
+ * Probe functions
+ */
+
+
+static const char *
+brw_get_vendor(struct pipe_screen *screen)
+{
+ return "VMware, Inc.";
+}
+
+static const char *
+brw_get_name(struct pipe_screen *screen)
+{
+ static char buffer[128];
+ const char *chipset;
+
+ switch (brw_screen(screen)->chipset.pci_id) {
+ case PCI_CHIP_I965_G:
+ chipset = "I965_G";
+ break;
+ case PCI_CHIP_I965_Q:
+ chipset = "I965_Q";
+ break;
+ case PCI_CHIP_I965_G_1:
+ chipset = "I965_G_1";
+ break;
+ case PCI_CHIP_I946_GZ:
+ chipset = "I946_GZ";
+ break;
+ case PCI_CHIP_I965_GM:
+ chipset = "I965_GM";
+ break;
+ case PCI_CHIP_I965_GME:
+ chipset = "I965_GME";
+ break;
+ case PCI_CHIP_GM45_GM:
+ chipset = "GM45_GM";
+ break;
+ case PCI_CHIP_IGD_E_G:
+ chipset = "IGD_E_G";
+ break;
+ case PCI_CHIP_Q45_G:
+ chipset = "Q45_G";
+ break;
+ case PCI_CHIP_G45_G:
+ chipset = "G45_G";
+ break;
+ case PCI_CHIP_G41_G:
+ chipset = "G41_G";
+ break;
+ case PCI_CHIP_B43_G:
+ chipset = "B43_G";
+ break;
+ case PCI_CHIP_ILD_G:
+ chipset = "ILD_G";
+ break;
+ case PCI_CHIP_ILM_G:
+ chipset = "ILM_G";
+ break;
+ }
+
+ util_snprintf(buffer, sizeof(buffer), "i965 (chipset: %s)", chipset);
+ return buffer;
+}
+
+static int
+brw_get_param(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 8;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 0;
+ case PIPE_CAP_POINT_SPRITE:
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 0;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 11; /* max 1024x1024 */
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 8; /* max 128x128x128 */
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 11; /* max 1024x1024 */
+ default:
+ return 0;
+ }
+}
+
+static float
+brw_get_paramf(struct pipe_screen *screen, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 7.5;
+
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 255.0;
+
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 4.0;
+
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 16.0;
+
+ default:
+ return 0;
+ }
+}
+
+static boolean
+brw_is_format_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage,
+ unsigned geom_flags)
+{
+ static const enum pipe_format tex_supported[] = {
+ PIPE_FORMAT_L8_UNORM,
+ PIPE_FORMAT_I8_UNORM,
+ PIPE_FORMAT_A8_UNORM,
+ PIPE_FORMAT_L16_UNORM,
+ /*PIPE_FORMAT_I16_UNORM,*/
+ /*PIPE_FORMAT_A16_UNORM,*/
+ PIPE_FORMAT_A8L8_UNORM,
+ PIPE_FORMAT_R5G6B5_UNORM,
+ PIPE_FORMAT_A1R5G5B5_UNORM,
+ PIPE_FORMAT_A4R4G4B4_UNORM,
+ PIPE_FORMAT_X8R8G8B8_UNORM,
+ PIPE_FORMAT_A8R8G8B8_UNORM,
+ /* video */
+ PIPE_FORMAT_YCBCR,
+ PIPE_FORMAT_YCBCR_REV,
+ /* compressed */
+ /*PIPE_FORMAT_FXT1_RGBA,*/
+ PIPE_FORMAT_DXT1_RGB,
+ PIPE_FORMAT_DXT1_RGBA,
+ PIPE_FORMAT_DXT3_RGBA,
+ PIPE_FORMAT_DXT5_RGBA,
+ /* sRGB */
+ PIPE_FORMAT_R8G8B8A8_SRGB,
+ PIPE_FORMAT_A8L8_SRGB,
+ PIPE_FORMAT_L8_SRGB,
+ PIPE_FORMAT_DXT1_SRGB,
+ /* depth */
+ PIPE_FORMAT_Z32_FLOAT,
+ PIPE_FORMAT_X8Z24_UNORM,
+ PIPE_FORMAT_S8Z24_UNORM,
+ PIPE_FORMAT_Z16_UNORM,
+ /* signed */
+ PIPE_FORMAT_R8G8_SNORM,
+ PIPE_FORMAT_R8G8B8A8_SNORM,
+ PIPE_FORMAT_NONE /* list terminator */
+ };
+ static const enum pipe_format render_supported[] = {
+ PIPE_FORMAT_X8R8G8B8_UNORM,
+ PIPE_FORMAT_A8R8G8B8_UNORM,
+ PIPE_FORMAT_R5G6B5_UNORM,
+ PIPE_FORMAT_NONE /* list terminator */
+ };
+ static const enum pipe_format depth_supported[] = {
+ PIPE_FORMAT_Z32_FLOAT,
+ PIPE_FORMAT_X8Z24_UNORM,
+ PIPE_FORMAT_S8Z24_UNORM,
+ PIPE_FORMAT_Z16_UNORM,
+ PIPE_FORMAT_NONE /* list terminator */
+ };
+ const enum pipe_format *list;
+ uint i;
+
+ if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL)
+ list = depth_supported;
+ else if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET)
+ list = render_supported;
+ else
+ list = tex_supported;
+
+ for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) {
+ if (list[i] == format)
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+/*
+ * Fence functions
+ */
+
+
+static void
+brw_fence_reference(struct pipe_screen *screen,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence)
+{
+}
+
+static int
+brw_fence_signalled(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ unsigned flags)
+{
+ return 0; /* XXX shouldn't this be a boolean? */
+}
+
+static int
+brw_fence_finish(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ unsigned flags)
+{
+ return 0;
+}
+
+
+/*
+ * Generic functions
+ */
+
+
+static void
+brw_destroy_screen(struct pipe_screen *screen)
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+
+ if (bscreen->sws)
+ bscreen->sws->destroy(bscreen->sws);
+
+ FREE(bscreen);
+}
+
+/**
+ * Create a new brw_screen object
+ */
+struct pipe_screen *
+brw_create_screen(struct brw_winsys_screen *sws, uint pci_id)
+{
+ struct brw_screen *bscreen;
+ struct brw_chipset chipset;
+
+#ifdef DEBUG
+ BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0);
+ BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0);
+ BRW_DEBUG |= DEBUG_STATS | DEBUG_MIN_URB | DEBUG_WM;
+
+ BRW_DUMP = debug_get_flags_option("BRW_DUMP", dump_names, 0);
+#endif
+
+ memset(&chipset, 0, sizeof chipset);
+
+ chipset.pci_id = pci_id;
+
+ switch (pci_id) {
+ case PCI_CHIP_I965_G:
+ case PCI_CHIP_I965_Q:
+ case PCI_CHIP_I965_G_1:
+ case PCI_CHIP_I946_GZ:
+ case PCI_CHIP_I965_GM:
+ case PCI_CHIP_I965_GME:
+ chipset.is_965 = TRUE;
+ break;
+
+ case PCI_CHIP_GM45_GM:
+ case PCI_CHIP_IGD_E_G:
+ case PCI_CHIP_Q45_G:
+ case PCI_CHIP_G45_G:
+ case PCI_CHIP_G41_G:
+ case PCI_CHIP_B43_G:
+ chipset.is_g4x = TRUE;
+ break;
+
+ case PCI_CHIP_ILD_G:
+ case PCI_CHIP_ILM_G:
+ chipset.is_igdng = TRUE;
+ break;
+
+ default:
+ debug_printf("%s: unknown pci id 0x%x, cannot create screen\n",
+ __FUNCTION__, pci_id);
+ return NULL;
+ }
+
+
+ bscreen = CALLOC_STRUCT(brw_screen);
+ if (!bscreen)
+ return NULL;
+
+ bscreen->chipset = chipset;
+ bscreen->sws = sws;
+ bscreen->base.winsys = NULL;
+ bscreen->base.destroy = brw_destroy_screen;
+ bscreen->base.get_name = brw_get_name;
+ bscreen->base.get_vendor = brw_get_vendor;
+ bscreen->base.get_param = brw_get_param;
+ bscreen->base.get_paramf = brw_get_paramf;
+ bscreen->base.is_format_supported = brw_is_format_supported;
+ bscreen->base.fence_reference = brw_fence_reference;
+ bscreen->base.fence_signalled = brw_fence_signalled;
+ bscreen->base.fence_finish = brw_fence_finish;
+
+ brw_screen_tex_init(bscreen);
+ brw_screen_tex_surface_init(bscreen);
+ brw_screen_buffer_init(bscreen);
+
+ bscreen->no_tiling = debug_get_option("BRW_NO_TILING", FALSE) != NULL;
+
+
+ return &bscreen->base;
+}
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
new file mode 100644
index 00000000000..7226d9228b7
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -0,0 +1,199 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef BRW_SCREEN_H
+#define BRW_SCREEN_H
+
+#include "pipe/p_state.h"
+#include "pipe/p_screen.h"
+
+#include "brw_reg.h"
+#include "brw_structs.h"
+
+struct brw_winsys_screen;
+
+
+/**
+ * Subclass of pipe_screen
+ */
+struct brw_screen
+{
+ struct pipe_screen base;
+ struct brw_chipset chipset;
+ struct brw_winsys_screen *sws;
+ boolean no_tiling;
+};
+
+/**
+ * Subclass of pipe_transfer
+ */
+struct brw_transfer
+{
+ struct pipe_transfer base;
+
+ unsigned offset;
+};
+
+struct brw_buffer
+{
+ struct pipe_buffer base;
+
+ /* One of either bo or user_buffer will be non-null, depending on
+ * whether this is a hardware or user buffer.
+ */
+ struct brw_winsys_buffer *bo;
+ void *user_buffer;
+
+ /* Mapped pointer??
+ */
+ void *ptr;
+};
+
+
+union brw_surface_id {
+ struct {
+ unsigned face:3;
+ unsigned zslice:13;
+ unsigned level:16;
+ } bits;
+ unsigned value;
+};
+
+
+struct brw_surface
+{
+ struct pipe_surface base;
+
+ union brw_surface_id id;
+ unsigned cpp;
+ unsigned pitch;
+ unsigned draw_offset;
+ unsigned tiling;
+
+ struct brw_surface_state ss;
+ struct brw_winsys_buffer *bo;
+ struct brw_surface *next, *prev;
+};
+
+
+
+struct brw_texture
+{
+ struct pipe_texture base;
+ struct brw_winsys_buffer *bo;
+ struct brw_surface_state ss;
+
+ unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS];
+
+ boolean compressed;
+ unsigned brw_target;
+ unsigned pitch;
+ unsigned tiling;
+ unsigned cpp;
+ unsigned total_height;
+
+ struct brw_surface views[2];
+};
+
+
+
+/*
+ * Cast wrappers
+ */
+static INLINE struct brw_screen *
+brw_screen(struct pipe_screen *pscreen)
+{
+ return (struct brw_screen *) pscreen;
+}
+
+static INLINE struct brw_transfer *
+brw_transfer(struct pipe_transfer *transfer)
+{
+ return (struct brw_transfer *)transfer;
+}
+
+static INLINE struct brw_surface *
+brw_surface(struct pipe_surface *surface)
+{
+ return (struct brw_surface *)surface;
+}
+
+static INLINE struct brw_buffer *
+brw_buffer(struct pipe_buffer *buffer)
+{
+ return (struct brw_buffer *)buffer;
+}
+
+static INLINE struct brw_texture *
+brw_texture(struct pipe_texture *texture)
+{
+ return (struct brw_texture *)texture;
+}
+
+
+/* Pipe buffer helpers
+ */
+static INLINE boolean
+brw_buffer_is_user_buffer( const struct pipe_buffer *buf )
+{
+ return ((const struct brw_buffer *)buf)->user_buffer != NULL;
+}
+
+unsigned
+brw_surface_pitch( const struct pipe_surface *surface );
+
+/***********************************************************************
+ * Internal functions
+ */
+GLboolean brw_texture_layout(struct brw_screen *brw_screen,
+ struct brw_texture *tex );
+
+void brw_update_texture( struct brw_screen *brw_screen,
+ struct brw_texture *tex );
+
+
+void brw_screen_tex_init( struct brw_screen *brw_screen );
+void brw_screen_tex_surface_init( struct brw_screen *brw_screen );
+
+void brw_screen_buffer_init(struct brw_screen *brw_screen);
+
+
+boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen,
+ struct pipe_texture *texture,
+ unsigned face,
+ unsigned level,
+ struct brw_winsys_buffer *bo );
+
+boolean brw_is_buffer_referenced_by_bo( struct brw_screen *brw_screen,
+ struct pipe_buffer *buffer,
+ struct brw_winsys_buffer *bo );
+
+
+
+#endif /* BRW_SCREEN_H */
diff --git a/src/gallium/drivers/i965/brw_screen_buffers.c b/src/gallium/drivers/i965/brw_screen_buffers.c
new file mode 100644
index 00000000000..d8141a3f5b9
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen_buffers.c
@@ -0,0 +1,202 @@
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "brw_screen.h"
+#include "brw_winsys.h"
+
+
+
+static void *
+brw_buffer_map_range( struct pipe_screen *screen,
+ struct pipe_buffer *buffer,
+ unsigned offset,
+ unsigned length,
+ unsigned usage )
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_winsys_screen *sws = bscreen->sws;
+ struct brw_buffer *buf = brw_buffer( buffer );
+
+ if (buf->user_buffer)
+ return buf->user_buffer;
+
+ return sws->bo_map( buf->bo,
+ BRW_DATA_OTHER,
+ offset,
+ length,
+ (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE,
+ (usage & PIPE_BUFFER_USAGE_DISCARD) ? TRUE : FALSE,
+ (usage & PIPE_BUFFER_USAGE_FLUSH_EXPLICIT) ? TRUE : FALSE);
+}
+
+static void *
+brw_buffer_map( struct pipe_screen *screen,
+ struct pipe_buffer *buffer,
+ unsigned usage )
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_winsys_screen *sws = bscreen->sws;
+ struct brw_buffer *buf = brw_buffer( buffer );
+
+ if (buf->user_buffer)
+ return buf->user_buffer;
+
+ return sws->bo_map( buf->bo,
+ BRW_DATA_OTHER,
+ 0,
+ buf->base.size,
+ (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE,
+ FALSE,
+ FALSE);
+}
+
+
+static void
+brw_buffer_flush_mapped_range( struct pipe_screen *screen,
+ struct pipe_buffer *buffer,
+ unsigned offset,
+ unsigned length )
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_winsys_screen *sws = bscreen->sws;
+ struct brw_buffer *buf = brw_buffer( buffer );
+
+ if (buf->user_buffer)
+ return;
+
+ sws->bo_flush_range( buf->bo,
+ offset,
+ length );
+}
+
+
+static void
+brw_buffer_unmap( struct pipe_screen *screen,
+ struct pipe_buffer *buffer )
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_winsys_screen *sws = bscreen->sws;
+ struct brw_buffer *buf = brw_buffer( buffer );
+
+ if (buf->bo)
+ sws->bo_unmap(buf->bo);
+}
+
+static void
+brw_buffer_destroy( struct pipe_buffer *buffer )
+{
+ struct brw_buffer *buf = brw_buffer( buffer );
+
+ assert(!p_atomic_read(&buffer->reference.count));
+
+ bo_reference(&buf->bo, NULL);
+ FREE(buf);
+}
+
+
+static struct pipe_buffer *
+brw_buffer_create(struct pipe_screen *screen,
+ unsigned alignment,
+ unsigned usage,
+ unsigned size)
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_winsys_screen *sws = bscreen->sws;
+ struct brw_buffer *buf;
+ unsigned buffer_type;
+ enum pipe_error ret;
+
+ buf = CALLOC_STRUCT(brw_buffer);
+ if (!buf)
+ return NULL;
+
+ pipe_reference_init(&buf->base.reference, 1);
+ buf->base.screen = screen;
+ buf->base.alignment = alignment;
+ buf->base.usage = usage;
+ buf->base.size = size;
+
+ switch (usage & (PIPE_BUFFER_USAGE_VERTEX |
+ PIPE_BUFFER_USAGE_INDEX |
+ PIPE_BUFFER_USAGE_PIXEL |
+ PIPE_BUFFER_USAGE_CONSTANT))
+ {
+ case PIPE_BUFFER_USAGE_VERTEX:
+ case PIPE_BUFFER_USAGE_INDEX:
+ case (PIPE_BUFFER_USAGE_VERTEX|PIPE_BUFFER_USAGE_INDEX):
+ buffer_type = BRW_BUFFER_TYPE_VERTEX;
+ break;
+
+ case PIPE_BUFFER_USAGE_PIXEL:
+ buffer_type = BRW_BUFFER_TYPE_PIXEL;
+ break;
+
+ case PIPE_BUFFER_USAGE_CONSTANT:
+ buffer_type = BRW_BUFFER_TYPE_SHADER_CONSTANTS;
+ break;
+
+ default:
+ buffer_type = BRW_BUFFER_TYPE_GENERIC;
+ break;
+ }
+
+ ret = sws->bo_alloc( sws, buffer_type,
+ size, alignment,
+ &buf->bo );
+ if (ret != PIPE_OK)
+ return NULL;
+
+ return &buf->base;
+}
+
+
+static struct pipe_buffer *
+brw_user_buffer_create(struct pipe_screen *screen,
+ void *ptr,
+ unsigned bytes)
+{
+ struct brw_buffer *buf;
+
+ buf = CALLOC_STRUCT(brw_buffer);
+ if (!buf)
+ return NULL;
+
+ buf->user_buffer = ptr;
+
+ pipe_reference_init(&buf->base.reference, 1);
+ buf->base.screen = screen;
+ buf->base.alignment = 1;
+ buf->base.usage = 0;
+ buf->base.size = bytes;
+
+ return &buf->base;
+}
+
+
+boolean brw_is_buffer_referenced_by_bo( struct brw_screen *brw_screen,
+ struct pipe_buffer *buffer,
+ struct brw_winsys_buffer *bo )
+{
+ struct brw_buffer *buf = brw_buffer(buffer);
+ if (buf->bo == NULL)
+ return FALSE;
+
+ return brw_screen->sws->bo_references( bo, buf->bo );
+}
+
+
+void brw_screen_buffer_init(struct brw_screen *brw_screen)
+{
+ brw_screen->base.buffer_create = brw_buffer_create;
+ brw_screen->base.user_buffer_create = brw_user_buffer_create;
+ brw_screen->base.buffer_map = brw_buffer_map;
+ brw_screen->base.buffer_map_range = brw_buffer_map_range;
+ brw_screen->base.buffer_flush_mapped_range = brw_buffer_flush_mapped_range;
+ brw_screen->base.buffer_unmap = brw_buffer_unmap;
+ brw_screen->base.buffer_destroy = brw_buffer_destroy;
+}
diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c
new file mode 100644
index 00000000000..e2b9954e596
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen_surface.c
@@ -0,0 +1,262 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
+#include "util/u_math.h"
+
+#include "pipe/p_screen.h"
+#include "brw_screen.h"
+#include "brw_defines.h"
+#include "brw_winsys.h"
+
+enum {
+ BRW_VIEW_LINEAR,
+ BRW_VIEW_IN_PLACE
+};
+
+
+static boolean need_linear_view( struct brw_screen *brw_screen,
+ struct brw_texture *brw_texture,
+ union brw_surface_id id,
+ unsigned usage )
+{
+#if 0
+ /* XXX: what about IDGNG?
+ */
+ if (!BRW_IS_G4X(brw->brw_screen->pci_id))
+ {
+ struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+
+ /* The original gen4 hardware couldn't set up WM surfaces pointing
+ * at an offset within a tile, which can happen when rendering to
+ * anything but the base level of a texture or the +X face/0 depth.
+ * This was fixed with the 4 Series hardware.
+ *
+ * For these original chips, you would have to make the depth and
+ * color destination surfaces include information on the texture
+ * type, LOD, face, and various limits to use them as a destination.
+ *
+ * This is easy in Gallium as surfaces are all backed by
+ * textures, but there's also a nasty requirement that the depth
+ * and the color surfaces all be of the same LOD, which is
+ * harder to get around as we can't look at a surface in
+ * isolation and decide if it's legal.
+ *
+ * Instead, end up being pessimistic and say that for i965,
+ * ... ??
+ */
+ if (brw_tex->tiling != I915_TILING_NONE &&
+ (brw_tex_image_offset(brw_tex, face, level, zslize) & 4095)) {
+ if (BRW_DEBUG & DEBUG_VIEW)
+ debug_printf("%s: need surface view for non-aligned tex image\n",
+ __FUNCTION__);
+ return GL_TRUE;
+ }
+ }
+#endif
+
+ /* Tiled 3d textures don't have subsets that look like 2d surfaces:
+ */
+
+ /* Everything else should be fine to render to in-place:
+ */
+ return GL_FALSE;
+}
+
+/* Look at all texture views and figure out if any of them need to be
+ * back-copied into the texture for sampling
+ */
+void brw_update_texture( struct brw_screen *brw_screen,
+ struct brw_texture *tex )
+{
+ /* currently nothing to do */
+}
+
+
+/* Create a new surface with linear layout to serve as a render-target
+ * where it would be illegal (perhaps due to tiling constraints) to do
+ * this in-place.
+ *
+ * Currently not implmented, not sure if it's needed.
+ */
+static struct brw_surface *create_linear_view( struct brw_screen *brw_screen,
+ struct brw_texture *tex,
+ union brw_surface_id id,
+ unsigned usage )
+{
+ return NULL;
+}
+
+
+/* Create a pipe_surface that just points directly into the existing
+ * texture's storage.
+ */
+static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen,
+ struct brw_texture *tex,
+ union brw_surface_id id,
+ unsigned usage )
+{
+ struct brw_surface *surface;
+
+ surface = CALLOC_STRUCT(brw_surface);
+ if (surface == NULL)
+ return NULL;
+
+ pipe_reference_init(&surface->base.reference, 1);
+
+ /* XXX: ignoring render-to-slice-of-3d-texture
+ */
+ assert(id.bits.zslice == 0);
+
+ surface->base.format = tex->base.format;
+ surface->base.width = u_minify(tex->base.width0, id.bits.level);
+ surface->base.height = u_minify(tex->base.height0, id.bits.level);
+ surface->base.offset = tex->image_offset[id.bits.level][id.bits.face];
+ surface->base.usage = usage;
+ surface->base.zslice = id.bits.zslice;
+ surface->base.face = id.bits.face;
+ surface->base.level = id.bits.level;
+ surface->id = id;
+ surface->cpp = tex->cpp;
+ surface->pitch = tex->pitch;
+ surface->tiling = tex->tiling;
+
+ bo_reference( &surface->bo, tex->bo );
+ pipe_texture_reference( &surface->base.texture, &tex->base );
+
+ surface->ss.ss0.surface_format = tex->ss.ss0.surface_format;
+ surface->ss.ss0.surface_type = BRW_SURFACE_2D;
+
+ if (tex->tiling == BRW_TILING_NONE) {
+ surface->ss.ss1.base_addr = surface->base.offset;
+ } else {
+ uint32_t tile_offset = surface->base.offset % 4096;
+
+ surface->ss.ss1.base_addr = surface->base.offset - tile_offset;
+
+ if (brw_screen->chipset.is_g4x) {
+ if (tex->tiling == BRW_TILING_X) {
+ /* Note that the low bits of these fields are missing, so
+ * there's the possibility of getting in trouble.
+ */
+ surface->ss.ss5.x_offset = (tile_offset % 512) / tex->cpp / 4;
+ surface->ss.ss5.y_offset = tile_offset / 512 / 2;
+ } else {
+ surface->ss.ss5.x_offset = (tile_offset % 128) / tex->cpp / 4;
+ surface->ss.ss5.y_offset = tile_offset / 128 / 2;
+ }
+ }
+ else {
+ assert(tile_offset == 0);
+ }
+ }
+
+#if 0
+ if (region_bo != NULL)
+ surface->ss.ss1.base_addr += region_bo->offset; /* reloc */
+#endif
+
+ surface->ss.ss2.width = surface->base.width - 1;
+ surface->ss.ss2.height = surface->base.height - 1;
+ surface->ss.ss3.tiled_surface = tex->ss.ss3.tiled_surface;
+ surface->ss.ss3.tile_walk = tex->ss.ss3.tile_walk;
+ surface->ss.ss3.pitch = tex->ss.ss3.pitch;
+
+ return surface;
+}
+
+/* Get a surface which is view into a texture
+ */
+static struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen,
+ struct pipe_texture *pt,
+ unsigned face, unsigned level,
+ unsigned zslice,
+ unsigned usage )
+{
+ struct brw_texture *tex = brw_texture(pt);
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_surface *surface;
+ union brw_surface_id id;
+ int type;
+
+ id.bits.face = face;
+ id.bits.level = level;
+ id.bits.zslice = zslice;
+
+ if (need_linear_view(bscreen, tex, id, usage))
+ type = BRW_VIEW_LINEAR;
+ else
+ type = BRW_VIEW_IN_PLACE;
+
+
+ foreach (surface, &tex->views[type]) {
+ if (id.value == surface->id.value)
+ return &surface->base;
+ }
+
+ switch (type) {
+ case BRW_VIEW_LINEAR:
+ surface = create_linear_view( bscreen, tex, id, usage );
+ break;
+ case BRW_VIEW_IN_PLACE:
+ surface = create_in_place_view( bscreen, tex, id, usage );
+ break;
+ default:
+ return NULL;
+ }
+
+ insert_at_head( &tex->views[type], surface );
+ return &surface->base;
+}
+
+
+static void brw_tex_surface_destroy( struct pipe_surface *surf )
+{
+ struct brw_surface *surface = brw_surface(surf);
+
+ /* Unreference texture, shared buffer:
+ */
+ remove_from_list(surface);
+ bo_reference(&surface->bo, NULL);
+ pipe_texture_reference( &surface->base.texture, NULL );
+
+
+ FREE(surface);
+}
+
+
+void brw_screen_tex_surface_init( struct brw_screen *brw_screen )
+{
+ brw_screen->base.get_tex_surface = brw_get_tex_surface;
+ brw_screen->base.tex_surface_destroy = brw_tex_surface_destroy;
+}
diff --git a/src/gallium/drivers/i965/brw_screen_tex_layout.c b/src/gallium/drivers/i965/brw_screen_tex_layout.c
new file mode 100644
index 00000000000..894f4bea401
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen_tex_layout.c
@@ -0,0 +1,414 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+
+#include "pipe/p_format.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "brw_screen.h"
+#include "brw_debug.h"
+#include "brw_winsys.h"
+
+/* Code to layout images in a mipmap tree for i965.
+ */
+
+static int
+brw_tex_pitch_align (struct brw_texture *tex,
+ int pitch)
+{
+ if (!tex->compressed) {
+ int pitch_align;
+
+ switch (tex->tiling) {
+ case BRW_TILING_X:
+ pitch_align = 512;
+ break;
+ case BRW_TILING_Y:
+ pitch_align = 128;
+ break;
+ default:
+ /* XXX: Untiled pitch alignment of 64 bytes for now to allow
+ * render-to-texture to work in all cases. This should
+ * probably be replaced at some point by some scheme to only
+ * do this when really necessary, for example standalone
+ * render target views.
+ */
+ pitch_align = 64;
+ break;
+ }
+
+ pitch = align(pitch * tex->cpp, pitch_align);
+ pitch /= tex->cpp;
+ }
+
+ return pitch;
+}
+
+
+static void
+brw_tex_alignment_unit(enum pipe_format pf,
+ GLuint *w, GLuint *h)
+{
+ switch (pf) {
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT5_RGBA:
+ case PIPE_FORMAT_DXT1_SRGB:
+ case PIPE_FORMAT_DXT1_SRGBA:
+ case PIPE_FORMAT_DXT3_SRGBA:
+ case PIPE_FORMAT_DXT5_SRGBA:
+ *w = 4;
+ *h = 4;
+ break;
+
+ default:
+ *w = 4;
+ *h = 2;
+ break;
+ }
+}
+
+
+static void
+brw_tex_set_level_info(struct brw_texture *tex,
+ GLuint level,
+ GLuint nr_images,
+ GLuint x, GLuint y,
+ GLuint w, GLuint h, GLuint d)
+{
+
+ if (BRW_DEBUG & DEBUG_TEXTURE)
+ debug_printf("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
+ level, w, h, d, x, y, tex->level_offset[level]);
+
+ assert(tex->image_offset[level] == NULL);
+ assert(nr_images >= 1);
+
+ tex->level_offset[level] = (x + y * tex->pitch) * tex->cpp;
+ tex->nr_images[level] = nr_images;
+
+ tex->image_offset[level] = MALLOC(nr_images * sizeof(GLuint));
+ tex->image_offset[level][0] = 0;
+}
+
+
+static void
+brw_tex_set_image_offset(struct brw_texture *tex,
+ GLuint level, GLuint img,
+ GLuint x, GLuint y,
+ GLuint offset)
+{
+ assert((x == 0 && y == 0) || img != 0 || level != 0);
+ assert(img < tex->nr_images[level]);
+
+ if (BRW_DEBUG & DEBUG_TEXTURE)
+ debug_printf("%s level %d img %d pos %d,%d image_offset %x\n",
+ __FUNCTION__, level, img, x, y,
+ tex->image_offset[level][img]);
+
+ tex->image_offset[level][img] = (x + y * tex->pitch) * tex->cpp + offset;
+}
+
+
+
+static void brw_layout_2d( struct brw_texture *tex )
+{
+ GLuint align_h = 2, align_w = 4;
+ GLuint level;
+ GLuint x = 0;
+ GLuint y = 0;
+ GLuint width = tex->base.width0;
+ GLuint height = tex->base.height0;
+
+ tex->pitch = tex->base.width0;
+ brw_tex_alignment_unit(tex->base.format, &align_w, &align_h);
+
+ if (tex->compressed) {
+ tex->pitch = align(tex->base.width0, align_w);
+ }
+
+ /* May need to adjust pitch to accomodate the placement of
+ * the 2nd mipmap. This occurs when the alignment
+ * constraints of mipmap placement push the right edge of the
+ * 2nd mipmap out past the width of its parent.
+ */
+ if (tex->base.last_level > 0) {
+ GLuint mip1_width;
+
+ if (tex->compressed) {
+ mip1_width = (align(u_minify(tex->base.width0, 1), align_w) +
+ align(u_minify(tex->base.width0, 2), align_w));
+ } else {
+ mip1_width = (align(u_minify(tex->base.width0, 1), align_w) +
+ u_minify(tex->base.width0, 2));
+ }
+
+ if (mip1_width > tex->pitch) {
+ tex->pitch = mip1_width;
+ }
+ }
+
+ /* Pitch must be a whole number of dwords, even though we
+ * express it in texels.
+ */
+ tex->pitch = brw_tex_pitch_align (tex, tex->pitch);
+ tex->total_height = 0;
+
+ for ( level = 0 ; level <= tex->base.last_level ; level++ ) {
+ GLuint img_height;
+
+ brw_tex_set_level_info(tex, level, 1, x, y, width, height, 1);
+
+ if (tex->compressed)
+ img_height = MAX2(1, height/4);
+ else
+ img_height = align(height, align_h);
+
+
+ /* Because the images are packed better, the final offset
+ * might not be the maximal one:
+ */
+ tex->total_height = MAX2(tex->total_height, y + img_height);
+
+ /* Layout_below: step right after second mipmap.
+ */
+ if (level == 1) {
+ x += align(width, align_w);
+ }
+ else {
+ y += img_height;
+ }
+
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ }
+}
+
+
+static boolean
+brw_layout_cubemap_idgng( struct brw_texture *tex )
+{
+ GLuint align_h = 2, align_w = 4;
+ GLuint level;
+ GLuint x = 0;
+ GLuint y = 0;
+ GLuint width = tex->base.width0;
+ GLuint height = tex->base.height0;
+ GLuint qpitch = 0;
+ GLuint y_pitch = 0;
+
+ tex->pitch = tex->base.width0;
+ brw_tex_alignment_unit(tex->base.format, &align_w, &align_h);
+ y_pitch = align(height, align_h);
+
+ if (tex->compressed) {
+ tex->pitch = align(tex->base.width0, align_w);
+ }
+
+ if (tex->base.last_level != 0) {
+ GLuint mip1_width;
+
+ if (tex->compressed) {
+ mip1_width = (align(u_minify(tex->base.width0, 1), align_w) +
+ align(u_minify(tex->base.width0, 2), align_w));
+ } else {
+ mip1_width = (align(u_minify(tex->base.width0, 1), align_w) +
+ u_minify(tex->base.width0, 2));
+ }
+
+ if (mip1_width > tex->pitch) {
+ tex->pitch = mip1_width;
+ }
+ }
+
+ tex->pitch = brw_tex_pitch_align(tex, tex->pitch);
+
+ if (tex->compressed) {
+ qpitch = ((y_pitch +
+ align(u_minify(y_pitch, 1), align_h) +
+ 11 * align_h) / 4) * tex->pitch * tex->cpp;
+
+ tex->total_height = ((y_pitch +
+ align(u_minify(y_pitch, 1), align_h) +
+ 11 * align_h) / 4) * 6;
+ } else {
+ qpitch = (y_pitch +
+ align(u_minify(y_pitch, 1), align_h) +
+ 11 * align_h) * tex->pitch * tex->cpp;
+
+ tex->total_height = (y_pitch +
+ align(u_minify(y_pitch, 1), align_h) +
+ 11 * align_h) * 6;
+ }
+
+ for (level = 0; level <= tex->base.last_level; level++) {
+ GLuint img_height;
+ GLuint nr_images = 6;
+ GLuint q = 0;
+
+ brw_tex_set_level_info(tex, level, nr_images, x, y, width, height, 1);
+
+ for (q = 0; q < nr_images; q++)
+ brw_tex_set_image_offset(tex, level, q, x, y, q * qpitch);
+
+ if (tex->compressed)
+ img_height = MAX2(1, height/4);
+ else
+ img_height = align(height, align_h);
+
+ if (level == 1) {
+ x += align(width, align_w);
+ }
+ else {
+ y += img_height;
+ }
+
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ }
+
+ return TRUE;
+}
+
+
+static boolean
+brw_layout_3d_cube( struct brw_texture *tex )
+{
+ GLuint width = tex->base.width0;
+ GLuint height = tex->base.height0;
+ GLuint depth = tex->base.depth0;
+ GLuint pack_x_pitch, pack_x_nr;
+ GLuint pack_y_pitch;
+ GLuint level;
+ GLuint align_h = 2;
+ GLuint align_w = 4;
+
+ tex->total_height = 0;
+ brw_tex_alignment_unit(tex->base.format, &align_w, &align_h);
+
+ if (tex->compressed) {
+ tex->pitch = align(width, align_w);
+ pack_y_pitch = (height + 3) / 4;
+ } else {
+ tex->pitch = brw_tex_pitch_align(tex, tex->base.width0);
+ pack_y_pitch = align(tex->base.height0, align_h);
+ }
+
+ pack_x_pitch = width;
+ pack_x_nr = 1;
+
+ for (level = 0 ; level <= tex->base.last_level ; level++) {
+ GLuint nr_images = tex->base.target == PIPE_TEXTURE_3D ? depth : 6;
+ GLint x = 0;
+ GLint y = 0;
+ GLint q, j;
+
+ brw_tex_set_level_info(tex, level, nr_images,
+ 0, tex->total_height,
+ width, height, depth);
+
+ for (q = 0; q < nr_images;) {
+ for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) {
+ brw_tex_set_image_offset(tex, level, q, x, y, 0);
+ x += pack_x_pitch;
+ }
+
+ x = 0;
+ y += pack_y_pitch;
+ }
+
+
+ tex->total_height += y;
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ depth = u_minify(depth, 1);
+
+ if (tex->compressed) {
+ pack_y_pitch = (height + 3) / 4;
+
+ if (pack_x_pitch > align(width, align_w)) {
+ pack_x_pitch = align(width, align_w);
+ pack_x_nr <<= 1;
+ }
+ } else {
+ if (pack_x_pitch > 4) {
+ pack_x_pitch >>= 1;
+ pack_x_nr <<= 1;
+ assert(pack_x_pitch * pack_x_nr <= tex->pitch);
+ }
+
+ if (pack_y_pitch > 2) {
+ pack_y_pitch >>= 1;
+ pack_y_pitch = align(pack_y_pitch, align_h);
+ }
+ }
+ }
+
+ /* The 965's sampler lays cachelines out according to how accesses
+ * in the texture surfaces run, so they may be "vertical" through
+ * memory. As a result, the docs say in Surface Padding Requirements:
+ * Sampling Engine Surfaces that two extra rows of padding are required.
+ */
+ if (tex->base.target == PIPE_TEXTURE_CUBE)
+ tex->total_height += 2;
+
+ return TRUE;
+}
+
+
+
+GLboolean brw_texture_layout(struct brw_screen *brw_screen,
+ struct brw_texture *tex )
+{
+ switch (tex->base.target) {
+ case PIPE_TEXTURE_CUBE:
+ if (brw_screen->chipset.is_igdng)
+ brw_layout_cubemap_idgng( tex );
+ else
+ brw_layout_3d_cube( tex );
+ break;
+
+ case PIPE_TEXTURE_3D:
+ brw_layout_3d_cube( tex );
+ break;
+
+ default:
+ brw_layout_2d( tex );
+ break;
+ }
+
+ if (BRW_DEBUG & DEBUG_TEXTURE)
+ debug_printf("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
+ tex->pitch,
+ tex->total_height,
+ tex->cpp,
+ tex->pitch * tex->total_height * tex->cpp );
+
+ return GL_TRUE;
+}
diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
new file mode 100644
index 00000000000..feb9d5f7657
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -0,0 +1,573 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
+#include "util/u_format.h"
+
+#include "brw_screen.h"
+#include "brw_defines.h"
+#include "brw_structs.h"
+#include "brw_winsys.h"
+
+
+
+static GLuint translate_tex_target( unsigned target )
+{
+ switch (target) {
+ case PIPE_TEXTURE_1D:
+ return BRW_SURFACE_1D;
+
+ case PIPE_TEXTURE_2D:
+ return BRW_SURFACE_2D;
+
+ case PIPE_TEXTURE_3D:
+ return BRW_SURFACE_3D;
+
+ case PIPE_TEXTURE_CUBE:
+ return BRW_SURFACE_CUBE;
+
+ default:
+ assert(0);
+ return BRW_SURFACE_1D;
+ }
+}
+
+
+static GLuint translate_tex_format( enum pipe_format pf )
+{
+ switch( pf ) {
+ case PIPE_FORMAT_L8_UNORM:
+ return BRW_SURFACEFORMAT_L8_UNORM;
+
+ case PIPE_FORMAT_I8_UNORM:
+ return BRW_SURFACEFORMAT_I8_UNORM;
+
+ case PIPE_FORMAT_A8_UNORM:
+ return BRW_SURFACEFORMAT_A8_UNORM;
+
+ case PIPE_FORMAT_L16_UNORM:
+ return BRW_SURFACEFORMAT_L16_UNORM;
+
+ /* XXX: Add these to gallium
+ case PIPE_FORMAT_I16_UNORM:
+ return BRW_SURFACEFORMAT_I16_UNORM;
+
+ case PIPE_FORMAT_A16_UNORM:
+ return BRW_SURFACEFORMAT_A16_UNORM;
+ */
+
+ case PIPE_FORMAT_A8L8_UNORM:
+ return BRW_SURFACEFORMAT_L8A8_UNORM;
+
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ return BRW_SURFACEFORMAT_B5G6R5_UNORM;
+
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ return BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
+
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
+
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ return BRW_SURFACEFORMAT_R8G8B8X8_UNORM;
+
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+ /*
+ * Video formats
+ */
+
+ case PIPE_FORMAT_YCBCR_REV:
+ return BRW_SURFACEFORMAT_YCRCB_NORMAL;
+
+ case PIPE_FORMAT_YCBCR:
+ return BRW_SURFACEFORMAT_YCRCB_SWAPUVY;
+
+ /*
+ * Compressed formats.
+ */
+ /* XXX: Add FXT to gallium?
+ case PIPE_FORMAT_FXT1_RGBA:
+ return BRW_SURFACEFORMAT_FXT1;
+ */
+
+ case PIPE_FORMAT_DXT1_RGB:
+ return BRW_SURFACEFORMAT_DXT1_RGB;
+
+ case PIPE_FORMAT_DXT1_RGBA:
+ return BRW_SURFACEFORMAT_BC1_UNORM;
+
+ case PIPE_FORMAT_DXT3_RGBA:
+ return BRW_SURFACEFORMAT_BC2_UNORM;
+
+ case PIPE_FORMAT_DXT5_RGBA:
+ return BRW_SURFACEFORMAT_BC3_UNORM;
+
+ /*
+ * sRGB formats
+ */
+
+ case PIPE_FORMAT_R8G8B8A8_SRGB:
+ return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
+
+ case PIPE_FORMAT_A8L8_SRGB:
+ return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB;
+
+ case PIPE_FORMAT_L8_SRGB:
+ return BRW_SURFACEFORMAT_L8_UNORM_SRGB;
+
+ case PIPE_FORMAT_DXT1_SRGB:
+ return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
+
+ /*
+ * Depth formats
+ */
+
+ case PIPE_FORMAT_Z16_UNORM:
+ return BRW_SURFACEFORMAT_I16_UNORM;
+
+ case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ return BRW_SURFACEFORMAT_I24X8_UNORM;
+
+ case PIPE_FORMAT_Z32_FLOAT:
+ return BRW_SURFACEFORMAT_I32_FLOAT;
+
+ /* XXX: presumably for bump mapping. Add this to mesa state
+ * tracker?
+ *
+ * XXX: Add flipped versions of these formats to Gallium.
+ */
+ case PIPE_FORMAT_R8G8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8_SNORM;
+
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
+
+ default:
+ return BRW_SURFACEFORMAT_INVALID;
+ }
+}
+
+
+
+
+
+static struct pipe_texture *brw_texture_create( struct pipe_screen *screen,
+ const struct pipe_texture *templ )
+
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_texture *tex;
+ enum brw_buffer_type buffer_type;
+ enum pipe_error ret;
+
+ tex = CALLOC_STRUCT(brw_texture);
+ if (tex == NULL)
+ return NULL;
+
+ memcpy(&tex->base, templ, sizeof *templ);
+ pipe_reference_init(&tex->base.reference, 1);
+ tex->base.screen = screen;
+
+ /* XXX: compressed textures need special treatment here
+ */
+ tex->cpp = util_format_get_blocksize(tex->base.format);
+ tex->compressed = util_format_is_compressed(tex->base.format);
+
+ make_empty_list(&tex->views[0]);
+ make_empty_list(&tex->views[1]);
+
+ /* XXX: No tiling with compressed textures??
+ */
+ if (tex->compressed == 0 &&
+ !bscreen->no_tiling)
+ {
+ if (bscreen->chipset.is_965 &&
+ util_format_is_depth_or_stencil(templ->format))
+ tex->tiling = BRW_TILING_Y;
+ else
+ tex->tiling = BRW_TILING_X;
+ }
+ else {
+ tex->tiling = BRW_TILING_NONE;
+ }
+
+
+
+
+ if (!brw_texture_layout( bscreen, tex ))
+ goto fail;
+
+
+ if (templ->tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
+ PIPE_TEXTURE_USAGE_PRIMARY)) {
+ buffer_type = BRW_BUFFER_TYPE_SCANOUT;
+ }
+ else {
+ buffer_type = BRW_BUFFER_TYPE_TEXTURE;
+ }
+
+ ret = bscreen->sws->bo_alloc( bscreen->sws,
+ buffer_type,
+ tex->pitch * tex->total_height * tex->cpp,
+ 64,
+ &tex->bo );
+ if (ret)
+ goto fail;
+
+ tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+ tex->ss.ss0.surface_type = translate_tex_target(tex->base.target);
+ tex->ss.ss0.surface_format = translate_tex_format(tex->base.format);
+ assert(tex->ss.ss0.surface_format != BRW_SURFACEFORMAT_INVALID);
+
+ /* This is ok for all textures with channel width 8bit or less:
+ */
+/* tex->ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
+
+
+ /* XXX: what happens when tex->bo->offset changes???
+ */
+ tex->ss.ss1.base_addr = 0; /* reloc */
+ tex->ss.ss2.mip_count = tex->base.last_level;
+ tex->ss.ss2.width = tex->base.width0 - 1;
+ tex->ss.ss2.height = tex->base.height0 - 1;
+
+ switch (tex->tiling) {
+ case BRW_TILING_NONE:
+ tex->ss.ss3.tiled_surface = 0;
+ tex->ss.ss3.tile_walk = 0;
+ break;
+ case BRW_TILING_X:
+ tex->ss.ss3.tiled_surface = 1;
+ tex->ss.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+ break;
+ case BRW_TILING_Y:
+ tex->ss.ss3.tiled_surface = 1;
+ tex->ss.ss3.tile_walk = BRW_TILEWALK_YMAJOR;
+ break;
+ }
+
+ tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1;
+ tex->ss.ss3.depth = tex->base.depth0 - 1;
+
+ tex->ss.ss4.min_lod = 0;
+
+ if (tex->base.target == PIPE_TEXTURE_CUBE) {
+ tex->ss.ss0.cube_pos_x = 1;
+ tex->ss.ss0.cube_pos_y = 1;
+ tex->ss.ss0.cube_pos_z = 1;
+ tex->ss.ss0.cube_neg_x = 1;
+ tex->ss.ss0.cube_neg_y = 1;
+ tex->ss.ss0.cube_neg_z = 1;
+ }
+
+ return &tex->base;
+
+fail:
+ bo_reference(&tex->bo, NULL);
+ FREE(tex);
+ return NULL;
+}
+
+static struct pipe_texture *brw_texture_blanket(struct pipe_screen *screen,
+ const struct pipe_texture *templ,
+ const unsigned *stride,
+ struct pipe_buffer *buffer)
+{
+ return NULL;
+}
+
+static void brw_texture_destroy(struct pipe_texture *pt)
+{
+ struct brw_texture *tex = brw_texture(pt);
+ bo_reference(&tex->bo, NULL);
+ FREE(pt);
+}
+
+
+static boolean brw_is_format_supported( struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage,
+ unsigned geom_flags )
+{
+ return translate_tex_format(format) != BRW_SURFACEFORMAT_INVALID;
+}
+
+
+boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen,
+ struct pipe_texture *texture,
+ unsigned face,
+ unsigned level,
+ struct brw_winsys_buffer *bo )
+{
+ struct brw_texture *tex = brw_texture(texture);
+ struct brw_surface *surf;
+ int i;
+
+ /* XXX: this is subject to false positives if the underlying
+ * texture BO is referenced, we can't tell whether the sub-region
+ * we care about participates in that.
+ */
+ if (brw_screen->sws->bo_references( bo, tex->bo ))
+ return TRUE;
+
+ /* Find any view on this texture for this face/level and see if it
+ * is referenced:
+ */
+ for (i = 0; i < 2; i++) {
+ foreach (surf, &tex->views[i]) {
+ if (surf->bo == tex->bo)
+ continue;
+
+ if (surf->id.bits.face != face ||
+ surf->id.bits.level != level)
+ continue;
+
+ if (brw_screen->sws->bo_references( bo, surf->bo))
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+
+/*
+ * Transfer functions
+ */
+
+static struct pipe_transfer*
+brw_get_tex_transfer(struct pipe_screen *screen,
+ struct pipe_texture *texture,
+ unsigned face, unsigned level, unsigned zslice,
+ enum pipe_transfer_usage usage, unsigned x, unsigned y,
+ unsigned w, unsigned h)
+{
+ struct brw_texture *tex = brw_texture(texture);
+ struct brw_transfer *trans;
+ unsigned offset; /* in bytes */
+
+ if (texture->target == PIPE_TEXTURE_CUBE) {
+ offset = tex->image_offset[level][face];
+ } else if (texture->target == PIPE_TEXTURE_3D) {
+ offset = tex->image_offset[level][zslice];
+ } else {
+ offset = tex->image_offset[level][0];
+ assert(face == 0);
+ assert(zslice == 0);
+ }
+
+ trans = CALLOC_STRUCT(brw_transfer);
+ if (trans) {
+ pipe_texture_reference(&trans->base.texture, texture);
+ trans->base.x = x;
+ trans->base.y = y;
+ trans->base.width = w;
+ trans->base.height = h;
+ trans->base.stride = tex->pitch * tex->cpp;
+ trans->offset = offset;
+ trans->base.usage = usage;
+ }
+ return &trans->base;
+}
+
+static void *
+brw_transfer_map(struct pipe_screen *screen,
+ struct pipe_transfer *transfer)
+{
+ struct brw_texture *tex = brw_texture(transfer->texture);
+ struct brw_winsys_screen *sws = brw_screen(screen)->sws;
+ char *map;
+ unsigned usage = transfer->usage;
+
+ map = sws->bo_map(tex->bo,
+ BRW_DATA_OTHER,
+ 0,
+ tex->bo->size,
+ (usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE,
+ (usage & 0) ? TRUE : FALSE,
+ (usage & 0) ? TRUE : FALSE);
+
+ if (!map)
+ return NULL;
+
+ /* XXX: blocksize and compressed textures
+ */
+ return map + brw_transfer(transfer)->offset +
+ transfer->y /* / transfer->block.height */ * transfer->stride +
+ transfer->x /* / transfer->block.width */ * brw_texture(transfer->texture)->cpp;
+}
+
+static void
+brw_transfer_unmap(struct pipe_screen *screen,
+ struct pipe_transfer *transfer)
+{
+ struct brw_texture *tex = brw_texture(transfer->texture);
+ struct brw_winsys_screen *sws = brw_screen(screen)->sws;
+
+ sws->bo_unmap(tex->bo);
+}
+
+static void
+brw_tex_transfer_destroy(struct pipe_transfer *trans)
+{
+ pipe_texture_reference(&trans->texture, NULL);
+ FREE(trans);
+}
+
+
+/*
+ * Functions exported to the winsys
+ */
+
+boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture,
+ struct brw_winsys_buffer **buffer,
+ unsigned *stride)
+{
+ struct brw_texture *tex = brw_texture(texture);
+
+ *buffer = tex->bo;
+ if (stride)
+ *stride = tex->pitch * tex->cpp;
+
+ return TRUE;
+}
+
+struct pipe_texture *
+brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
+ const struct pipe_texture *templ,
+ unsigned pitch,
+ unsigned tiling,
+ struct brw_winsys_buffer *buffer)
+{
+ struct brw_screen *bscreen = brw_screen(screen);
+ struct brw_texture *tex;
+
+ if (templ->target != PIPE_TEXTURE_2D ||
+ templ->last_level != 0 ||
+ templ->depth0 != 1)
+ return NULL;
+
+ if (util_format_is_compressed(templ->format))
+ return NULL;
+
+ tex = CALLOC_STRUCT(brw_texture);
+ if (!tex)
+ return NULL;
+
+ memcpy(&tex->base, templ, sizeof *templ);
+ pipe_reference_init(&tex->base.reference, 1);
+ tex->base.screen = screen;
+
+ /* XXX: cpp vs. blocksize
+ */
+ tex->cpp = util_format_get_blocksize(tex->base.format);
+ tex->tiling = tiling;
+
+ make_empty_list(&tex->views[0]);
+ make_empty_list(&tex->views[1]);
+
+ if (!brw_texture_layout(bscreen, tex))
+ goto fail;
+
+ /* XXX Maybe some more checks? */
+ if ((pitch / tex->cpp) < tex->pitch)
+ goto fail;
+
+ tex->pitch = pitch / tex->cpp;
+
+ tex->bo = buffer;
+
+ /* fix this warning */
+#if 0
+ if (tex->size > buffer->size)
+ goto fail;
+#endif
+
+ tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+ tex->ss.ss0.surface_type = translate_tex_target(tex->base.target);
+ tex->ss.ss0.surface_format = translate_tex_format(tex->base.format);
+ assert(tex->ss.ss0.surface_format != BRW_SURFACEFORMAT_INVALID);
+
+ /* This is ok for all textures with channel width 8bit or less:
+ */
+/* tex->ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
+
+
+ /* XXX: what happens when tex->bo->offset changes???
+ */
+ tex->ss.ss1.base_addr = 0; /* reloc */
+ tex->ss.ss2.mip_count = tex->base.last_level;
+ tex->ss.ss2.width = tex->base.width0 - 1;
+ tex->ss.ss2.height = tex->base.height0 - 1;
+
+ switch (tex->tiling) {
+ case BRW_TILING_NONE:
+ tex->ss.ss3.tiled_surface = 0;
+ tex->ss.ss3.tile_walk = 0;
+ break;
+ case BRW_TILING_X:
+ tex->ss.ss3.tiled_surface = 1;
+ tex->ss.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+ break;
+ case BRW_TILING_Y:
+ tex->ss.ss3.tiled_surface = 1;
+ tex->ss.ss3.tile_walk = BRW_TILEWALK_YMAJOR;
+ break;
+ }
+
+ tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1;
+ tex->ss.ss3.depth = tex->base.depth0 - 1;
+
+ tex->ss.ss4.min_lod = 0;
+
+ return &tex->base;
+
+fail:
+ FREE(tex);
+ return NULL;
+}
+
+void brw_screen_tex_init( struct brw_screen *brw_screen )
+{
+ brw_screen->base.is_format_supported = brw_is_format_supported;
+ brw_screen->base.texture_create = brw_texture_create;
+ brw_screen->base.texture_destroy = brw_texture_destroy;
+ brw_screen->base.texture_blanket = brw_texture_blanket;
+ brw_screen->base.get_tex_transfer = brw_get_tex_transfer;
+ brw_screen->base.transfer_map = brw_transfer_map;
+ brw_screen->base.transfer_unmap = brw_transfer_unmap;
+ brw_screen->base.tex_transfer_destroy = brw_tex_transfer_destroy;
+}
diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
new file mode 100644
index 00000000000..e1986a9dbbd
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -0,0 +1,216 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_state.h"
+
+#include "brw_batchbuffer.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_pipe_rast.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+#include "brw_state.h"
+
+static enum pipe_error compile_sf_prog( struct brw_context *brw,
+ struct brw_sf_prog_key *key,
+ struct brw_winsys_buffer **bo_out )
+{
+ enum pipe_error ret;
+ struct brw_sf_compile c;
+ const GLuint *program;
+ GLuint program_size;
+
+ memset(&c, 0, sizeof(c));
+
+ /* Begin the compilation:
+ */
+ brw_init_compile(brw, &c.func);
+
+ c.key = *key;
+ c.nr_attrs = c.key.nr_attrs;
+ c.nr_attr_regs = (c.nr_attrs+1)/2;
+ c.nr_setup_attrs = c.key.nr_attrs;
+ c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
+
+ c.prog_data.urb_read_length = c.nr_attr_regs;
+ c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
+
+ /* Special case when there are no attributes to setup.
+ *
+ * XXX: should be able to set nr_setup_attrs to nr_attrs-1 -- but
+ * breaks vp-tris.c
+ */
+ if (c.nr_attrs - 1 == 0) {
+ c.nr_verts = 0;
+ brw_emit_null_setup( &c );
+ }
+ else {
+ /* Which primitive? Or all three?
+ */
+ switch (key->primitive) {
+ case SF_TRIANGLES:
+ c.nr_verts = 3;
+ brw_emit_tri_setup( &c, GL_TRUE );
+ break;
+ case SF_LINES:
+ c.nr_verts = 2;
+ brw_emit_line_setup( &c, GL_TRUE );
+ break;
+ case SF_POINTS:
+ c.nr_verts = 1;
+ if (key->do_point_sprite)
+ brw_emit_point_sprite_setup( &c, GL_TRUE );
+ else
+ brw_emit_point_setup( &c, GL_TRUE );
+ break;
+ case SF_UNFILLED_TRIS:
+ c.nr_verts = 3;
+ brw_emit_anyprim_setup( &c );
+ break;
+ default:
+ assert(0);
+ return PIPE_ERROR_BAD_INPUT;
+ }
+ }
+
+ /* get the program
+ */
+ ret = brw_get_program(&c.func, &program, &program_size);
+ if (ret)
+ return ret;
+
+ /* Upload
+ */
+ ret = brw_upload_cache( &brw->cache, BRW_SF_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ &brw->sf.prog_data,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static enum pipe_error upload_sf_prog(struct brw_context *brw)
+{
+ const struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature;
+ struct brw_sf_prog_key key;
+ enum pipe_error ret;
+ unsigned i;
+
+ memset(&key, 0, sizeof(key));
+
+ /* Populate the key, noting state dependencies:
+ */
+
+ /* XXX: Add one to account for the position input.
+ */
+ /* PIPE_NEW_FRAGMENT_SIGNATURE */
+ key.nr_attrs = sig->nr_inputs + 1;
+
+
+ /* XXX: why is position required to be linear? why do we care
+ * about it at all?
+ */
+ key.linear_attrs = 1; /* position -- but why? */
+
+ for (i = 0; i < sig->nr_inputs; i++) {
+ switch (sig->input[i].interp) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ key.linear_attrs |= 1 << (i+1);
+ break;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ key.persp_attrs |= 1 << (i+1);
+ break;
+ }
+ }
+
+ /* BRW_NEW_REDUCED_PRIMITIVE */
+ switch (brw->reduced_primitive) {
+ case PIPE_PRIM_TRIANGLES:
+ /* PIPE_NEW_RAST
+ */
+ if (brw->curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL ||
+ brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL)
+ key.primitive = SF_UNFILLED_TRIS;
+ else
+ key.primitive = SF_TRIANGLES;
+ break;
+ case PIPE_PRIM_LINES:
+ key.primitive = SF_LINES;
+ break;
+ case PIPE_PRIM_POINTS:
+ key.primitive = SF_POINTS;
+ break;
+ }
+
+ key.do_point_sprite = brw->curr.rast->templ.point_sprite;
+ key.sprite_origin_lower_left = 0; /* XXX: ctx->Point.SpriteOrigin - fix rast state */
+ key.do_flat_shading = brw->curr.rast->templ.flatshade;
+ key.do_twoside_color = brw->curr.rast->templ.light_twoside;
+
+ if (key.do_twoside_color) {
+ key.frontface_ccw = (brw->curr.rast->templ.front_winding ==
+ PIPE_WINDING_CCW);
+ }
+
+ if (brw_search_cache(&brw->cache, BRW_SF_PROG,
+ &key, sizeof(key),
+ NULL, 0,
+ &brw->sf.prog_data,
+ &brw->sf.prog_bo))
+ return PIPE_OK;
+
+ ret = compile_sf_prog( brw, &key, &brw->sf.prog_bo );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+const struct brw_tracked_state brw_sf_prog = {
+ .dirty = {
+ .mesa = (PIPE_NEW_RAST | PIPE_NEW_FRAGMENT_SIGNATURE),
+ .brw = (BRW_NEW_REDUCED_PRIMITIVE),
+ .cache = 0
+ },
+ .prepare = upload_sf_prog
+};
+
diff --git a/src/gallium/drivers/i965/brw_sf.h b/src/gallium/drivers/i965/brw_sf.h
new file mode 100644
index 00000000000..a895c7d2f6a
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf.h
@@ -0,0 +1,122 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_SF_H
+#define BRW_SF_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+
+#define SF_POINTS 0
+#define SF_LINES 1
+#define SF_TRIANGLES 2
+#define SF_UNFILLED_TRIS 3
+
+struct brw_sf_prog_key {
+
+ /* Bitmask of linear and perspective interpolated inputs, 0..nr
+ */
+ GLuint persp_attrs:32;
+ GLuint linear_attrs:32;
+ GLuint point_coord_replace_attrs:32;
+
+ GLuint nr_attrs:8;
+ GLuint primitive:2;
+ GLuint do_twoside_color:1;
+ GLuint do_flat_shading:1;
+ GLuint frontface_ccw:1;
+ GLuint do_point_sprite:1;
+ GLuint sprite_origin_lower_left:1;
+ GLuint pad:17;
+
+ GLuint attr_col0:8;
+ GLuint attr_col1:8;
+ GLuint attr_bfc0:8;
+ GLuint attr_bfc1:8;
+};
+
+struct brw_sf_point_tex {
+ GLboolean CoordReplace;
+};
+
+struct brw_sf_compile {
+ struct brw_compile func;
+ struct brw_sf_prog_key key;
+ struct brw_sf_prog_data prog_data;
+
+ struct brw_reg pv;
+ struct brw_reg det;
+ struct brw_reg dx0;
+ struct brw_reg dx2;
+ struct brw_reg dy0;
+ struct brw_reg dy2;
+
+ /* z and 1/w passed in seperately:
+ */
+ struct brw_reg z[3];
+ struct brw_reg inv_w[3];
+
+ /* The vertices:
+ */
+ struct brw_reg vert[3];
+
+ /* Temporaries, allocated after last vertex reg.
+ */
+ struct brw_reg inv_det;
+ struct brw_reg a1_sub_a0;
+ struct brw_reg a2_sub_a0;
+ struct brw_reg tmp;
+
+ struct brw_reg m1Cx;
+ struct brw_reg m2Cy;
+ struct brw_reg m3C0;
+
+ GLuint nr_verts;
+ GLuint nr_attrs;
+ GLuint nr_attr_regs;
+ GLuint nr_setup_attrs;
+ GLuint nr_setup_regs;
+
+ GLuint point_coord_replace_mask;
+};
+
+
+void brw_emit_null_setup( struct brw_sf_compile *c );
+void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_anyprim_setup( struct brw_sf_compile *c );
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c
new file mode 100644
index 00000000000..3b85725e368
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf_emit.c
@@ -0,0 +1,765 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+
+
+static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
+ struct brw_reg vert,
+ GLuint attr)
+{
+ GLuint off = attr / 2;
+ GLuint sub = attr % 2;
+
+ return brw_vec4_grf(vert.nr + off, sub * 4);
+}
+
+
+/***********************************************************************
+ * Twoside lighting
+ */
+static void copy_bfc( struct brw_sf_compile *c,
+ struct brw_reg vert )
+{
+ struct brw_compile *p = &c->func;
+
+ if (c->key.attr_col0 && c->key.attr_bfc0)
+ brw_MOV(p,
+ get_vert_attr(c, vert, c->key.attr_col0),
+ get_vert_attr(c, vert, c->key.attr_bfc0));
+
+ if (c->key.attr_col1 && c->key.attr_bfc1)
+ brw_MOV(p,
+ get_vert_attr(c, vert, c->key.attr_col1),
+ get_vert_attr(c, vert, c->key.attr_bfc1));
+}
+
+
+static void do_twoside_color( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *if_insn;
+ GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
+
+ /* Already done in clip program:
+ */
+ if (c->key.primitive == SF_UNFILLED_TRIS)
+ return;
+
+ /* XXX: What happens if BFC isn't present? This could only happen
+ * for user-supplied vertex programs, as t_vp_build.c always does
+ * the right thing.
+ */
+ if (!(c->key.attr_col0 && c->key.attr_bfc0) &&
+ !(c->key.attr_col1 && c->key.attr_bfc1))
+ return;
+
+ /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
+ * to get all channels active inside the IF. In the clipping code
+ * we run with NoMask, so it's not an option and we can use
+ * BRW_EXECUTE_1 for all comparisions.
+ */
+ brw_push_insn_state(p);
+ brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
+ if_insn = brw_IF(p, BRW_EXECUTE_4);
+ {
+ switch (c->nr_verts) {
+ case 3: copy_bfc(c, c->vert[2]);
+ case 2: copy_bfc(c, c->vert[1]);
+ case 1: copy_bfc(c, c->vert[0]);
+ }
+ }
+ brw_ENDIF(p, if_insn);
+ brw_pop_insn_state(p);
+}
+
+
+
+/***********************************************************************
+ * Flat shading
+ */
+
+#define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \
+ (1<<VERT_RESULT_COL1))
+
+static void copy_colors( struct brw_sf_compile *c,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ struct brw_compile *p = &c->func;
+
+ if (c->key.attr_col0)
+ brw_MOV(p,
+ get_vert_attr(c, dst, c->key.attr_col0),
+ get_vert_attr(c, src, c->key.attr_col0));
+
+ if (c->key.attr_col1)
+ brw_MOV(p,
+ get_vert_attr(c, dst, c->key.attr_col1),
+ get_vert_attr(c, src, c->key.attr_col1));
+
+}
+
+
+
+/* Need to use a computed jump to copy flatshaded attributes as the
+ * vertices are ordered according to y-coordinate before reaching this
+ * point, so the PV could be anywhere.
+ */
+static void do_flatshade_triangle( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg ip = brw_ip_reg();
+ GLuint jmpi = 1;
+ GLuint nr = 0;
+
+ if (c->key.attr_col0)
+ nr++;
+
+ if (c->key.attr_col1)
+ nr++;
+
+ if (nr == 0)
+ return;
+
+ /* Already done in clip program:
+ */
+ if (c->key.primitive == SF_UNFILLED_TRIS)
+ return;
+
+ if (BRW_IS_IGDNG(p->brw))
+ jmpi = 2;
+
+ brw_push_insn_state(p);
+
+ brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
+ brw_JMPI(p, ip, ip, c->pv);
+
+ copy_colors(c, c->vert[1], c->vert[0]);
+ copy_colors(c, c->vert[2], c->vert[0]);
+ brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1)));
+
+ copy_colors(c, c->vert[0], c->vert[1]);
+ copy_colors(c, c->vert[2], c->vert[1]);
+ brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2));
+
+ copy_colors(c, c->vert[0], c->vert[2]);
+ copy_colors(c, c->vert[1], c->vert[2]);
+
+ brw_pop_insn_state(p);
+}
+
+
+static void do_flatshade_line( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg ip = brw_ip_reg();
+ GLuint jmpi = 1;
+ GLuint nr = 0;
+
+ if (c->key.attr_col0)
+ nr++;
+
+ if (c->key.attr_col1)
+ nr++;
+
+ if (nr == 0)
+ return;
+
+ /* Already done in clip program:
+ */
+ if (c->key.primitive == SF_UNFILLED_TRIS)
+ return;
+
+ if (BRW_IS_IGDNG(p->brw))
+ jmpi = 2;
+
+ brw_push_insn_state(p);
+
+ brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
+ brw_JMPI(p, ip, ip, c->pv);
+ copy_colors(c, c->vert[1], c->vert[0]);
+
+ brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
+ copy_colors(c, c->vert[0], c->vert[1]);
+
+ brw_pop_insn_state(p);
+}
+
+
+
+/***********************************************************************
+ * Triangle setup.
+ */
+
+
+static void alloc_regs( struct brw_sf_compile *c )
+{
+ GLuint reg, i;
+
+ /* Values computed by fixed function unit:
+ */
+ c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
+ c->det = brw_vec1_grf(1, 2);
+ c->dx0 = brw_vec1_grf(1, 3);
+ c->dx2 = brw_vec1_grf(1, 4);
+ c->dy0 = brw_vec1_grf(1, 5);
+ c->dy2 = brw_vec1_grf(1, 6);
+
+ /* z and 1/w passed in seperately:
+ */
+ c->z[0] = brw_vec1_grf(2, 0);
+ c->inv_w[0] = brw_vec1_grf(2, 1);
+ c->z[1] = brw_vec1_grf(2, 2);
+ c->inv_w[1] = brw_vec1_grf(2, 3);
+ c->z[2] = brw_vec1_grf(2, 4);
+ c->inv_w[2] = brw_vec1_grf(2, 5);
+
+ /* The vertices:
+ */
+ reg = 3;
+ for (i = 0; i < c->nr_verts; i++) {
+ c->vert[i] = brw_vec8_grf(reg, 0);
+ reg += c->nr_attr_regs;
+ }
+
+ /* Temporaries, allocated after last vertex reg.
+ */
+ c->inv_det = brw_vec1_grf(reg, 0); reg++;
+ c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++;
+ c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++;
+ c->tmp = brw_vec8_grf(reg, 0); reg++;
+
+ /* Note grf allocation:
+ */
+ c->prog_data.total_grf = reg;
+
+
+ /* Outputs of this program - interpolation coefficients for
+ * rasterization:
+ */
+ c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
+ c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
+ c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
+}
+
+
+static void copy_z_inv_w( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ brw_push_insn_state(p);
+
+ /* Copy both scalars with a single MOV:
+ */
+ for (i = 0; i < c->nr_verts; i++)
+ brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
+
+ brw_pop_insn_state(p);
+}
+
+
+static void invert_det( struct brw_sf_compile *c)
+{
+ /* Looks like we invert all 8 elements just to get 1/det in
+ * position 2 !?!
+ */
+ brw_math(&c->func,
+ c->inv_det,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 0,
+ c->det,
+ BRW_MATH_DATA_SCALAR,
+ BRW_MATH_PRECISION_FULL);
+
+}
+
+
+/* Two attributes packed into a wide register. Figure out if either
+ * or both of them need linear/perspective interpolation. Constant
+ * regs are left as-is.
+ */
+static GLboolean calculate_masks( struct brw_sf_compile *c,
+ GLuint reg,
+ GLushort *pc,
+ GLushort *pc_persp,
+ GLushort *pc_linear)
+{
+ GLboolean is_last_attr = (reg == c->nr_setup_regs - 1);
+ GLuint persp_mask = c->key.persp_attrs;
+ GLuint linear_mask = (c->key.persp_attrs | c->key.linear_attrs);
+
+ *pc_persp = 0;
+ *pc_linear = 0;
+ *pc = 0xf;
+
+ if (persp_mask & (1 << (reg*2)))
+ *pc_persp = 0xf;
+
+ if (linear_mask & (1 << (reg*2)))
+ *pc_linear = 0xf;
+
+ /* Maybe only processs one attribute on the final round:
+ */
+ if (reg*2+1 < c->nr_setup_attrs) {
+ *pc |= 0xf0;
+
+ if (persp_mask & (1 << (reg*2+1)))
+ *pc_persp |= 0xf0;
+
+ if (linear_mask & (1 << (reg*2+1)))
+ *pc_linear |= 0xf0;
+ }
+
+ return is_last_attr;
+}
+
+
+void brw_emit_null_setup( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+
+ /* m0 is implicitly copied from r0 in the send instruction:
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
+ 0, /* allocate */
+ 1, /* used */
+ 1, /* msg len */
+ 0, /* response len */
+ 1, /* eot */
+ 1, /* writes complete */
+ 0, /* offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+}
+
+void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ c->nr_verts = 3;
+
+ if (allocate)
+ alloc_regs(c);
+
+ invert_det(c);
+ copy_z_inv_w(c);
+
+ if (c->key.do_twoside_color)
+ do_twoside_color(c);
+
+ if (c->key.do_flat_shading)
+ do_flatshade_triangle(c);
+
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ /* Pair of incoming attributes:
+ */
+ struct brw_reg a0 = offset(c->vert[0], i);
+ struct brw_reg a1 = offset(c->vert[1], i);
+ struct brw_reg a2 = offset(c->vert[2], i);
+ GLushort pc, pc_persp, pc_linear;
+ GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ brw_MUL(p, a1, a1, c->inv_w[1]);
+ brw_MUL(p, a2, a2, c->inv_w[2]);
+ }
+
+
+ /* Calculate coefficients for interpolated values:
+ */
+ if (pc_linear)
+ {
+ brw_set_predicate_control_flag_value(p, pc_linear);
+
+ brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+ brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
+
+ /* calculate dA/dx
+ */
+ brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
+ brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+
+ /* calculate dA/dy
+ */
+ brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
+ brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
+ brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+ }
+
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+ /* start point for interpolation
+ */
+ brw_MOV(p, c->m3C0, a0);
+
+ /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in
+ * the send instruction:
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* offset */
+ BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
+ }
+ }
+}
+
+
+
+void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+
+ c->nr_verts = 2;
+
+ if (allocate)
+ alloc_regs(c);
+
+ invert_det(c);
+ copy_z_inv_w(c);
+
+ if (c->key.do_flat_shading)
+ do_flatshade_line(c);
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ /* Pair of incoming attributes:
+ */
+ struct brw_reg a0 = offset(c->vert[0], i);
+ struct brw_reg a1 = offset(c->vert[1], i);
+ GLushort pc, pc_persp, pc_linear;
+ GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ brw_MUL(p, a1, a1, c->inv_w[1]);
+ }
+
+ /* Calculate coefficients for position, color:
+ */
+ if (pc_linear) {
+ brw_set_predicate_control_flag_value(p, pc_linear);
+
+ brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+
+ brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+
+ brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
+ brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+ }
+
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+
+ /* start point for interpolation
+ */
+ brw_MOV(p, c->m3C0, a0);
+
+ /* Copy m0..m3 to URB.
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0),
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* urb destination offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+ }
+ }
+}
+
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ c->nr_verts = 1;
+
+ if (allocate)
+ alloc_regs(c);
+
+ copy_z_inv_w(c);
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ /* XXX: only seems to check point_coord_replace_attrs for every
+ * second attribute?!?
+ */
+ boolean coord_replace = !!(c->key.point_coord_replace_attrs & (1<<(2*i)));
+ struct brw_reg a0 = offset(c->vert[0], i);
+ GLushort pc, pc_persp, pc_linear;
+ GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ if (coord_replace) {
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ }
+ }
+
+ if (coord_replace) {
+ /* Caculate 1.0/PointWidth */
+ brw_math(&c->func,
+ c->tmp,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 0,
+ c->dx0,
+ BRW_MATH_DATA_SCALAR,
+ BRW_MATH_PRECISION_FULL);
+
+ if (c->key.sprite_origin_lower_left) {
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
+ brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
+ brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0]));
+ brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
+ }
+ else {
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
+ brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
+ brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]);
+ brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
+ }
+ }
+ else {
+ brw_MOV(p, c->m1Cx, brw_imm_ud(0));
+ brw_MOV(p, c->m2Cy, brw_imm_ud(0));
+ }
+
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+ if (coord_replace) {
+ if (c->key.sprite_origin_lower_left) {
+ brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
+ brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
+ }
+ else {
+ brw_MOV(p, c->m3C0, brw_imm_f(0.0));
+ }
+ }
+ else {
+ brw_MOV(p, c->m3C0, a0); /* constant value */
+ }
+
+ /* Copy m0..m3 to URB.
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0),
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* urb destination offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+ }
+ }
+}
+
+/* Points setup - several simplifications as all attributes are
+ * constant across the face of the point (point sprites excluded!)
+ */
+void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ c->nr_verts = 1;
+
+ if (allocate)
+ alloc_regs(c);
+
+ copy_z_inv_w(c);
+
+ brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
+ brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ struct brw_reg a0 = offset(c->vert[0], i);
+ GLushort pc, pc_persp, pc_linear;
+ GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ /* This seems odd as the values are all constant, but the
+ * fragment shader will be expecting it:
+ */
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ }
+
+
+ /* The delta values are always zero, just send the starting
+ * coordinate. Again, this is to fit in with the interpolation
+ * code in the fragment shader.
+ */
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+
+ brw_MOV(p, c->m3C0, a0); /* constant value */
+
+ /* Copy m0..m3 to URB.
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0),
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* urb destination offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+ }
+ }
+}
+
+void brw_emit_anyprim_setup( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg ip = brw_ip_reg();
+ struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
+ struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
+ struct brw_reg primmask;
+ struct brw_instruction *jmp;
+ struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+
+ GLuint saveflag;
+
+ c->nr_verts = 3;
+ alloc_regs(c);
+
+ primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
+
+ brw_MOV(p, primmask, brw_imm_ud(1));
+ brw_SHL(p, primmask, primmask, payload_prim);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
+ (1<<_3DPRIM_TRISTRIP) |
+ (1<<_3DPRIM_TRIFAN) |
+ (1<<_3DPRIM_TRISTRIP_REVERSE) |
+ (1<<_3DPRIM_POLYGON) |
+ (1<<_3DPRIM_RECTLIST) |
+ (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+ {
+ saveflag = p->flag_value;
+ brw_push_insn_state(p);
+ brw_emit_tri_setup( c, GL_FALSE );
+ brw_pop_insn_state(p);
+ p->flag_value = saveflag;
+ /* note - thread killed in subroutine, so must
+ * restore the flag which is changed when building
+ * the subroutine. fix #13240
+ */
+ }
+ brw_land_fwd_jump(p, jmp);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
+ (1<<_3DPRIM_LINESTRIP) |
+ (1<<_3DPRIM_LINELOOP) |
+ (1<<_3DPRIM_LINESTRIP_CONT) |
+ (1<<_3DPRIM_LINESTRIP_BF) |
+ (1<<_3DPRIM_LINESTRIP_CONT_BF)));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+ {
+ saveflag = p->flag_value;
+ brw_push_insn_state(p);
+ brw_emit_line_setup( c, GL_FALSE );
+ brw_pop_insn_state(p);
+ p->flag_value = saveflag;
+ /* note - thread killed in subroutine */
+ }
+ brw_land_fwd_jump(p, jmp);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+ {
+ saveflag = p->flag_value;
+ brw_push_insn_state(p);
+ brw_emit_point_sprite_setup( c, GL_FALSE );
+ brw_pop_insn_state(p);
+ p->flag_value = saveflag;
+ }
+ brw_land_fwd_jump(p, jmp);
+
+ brw_emit_point_setup( c, GL_FALSE );
+}
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
new file mode 100644
index 00000000000..25dc2b52e07
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -0,0 +1,333 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+
+#include "pipe/p_state.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_debug.h"
+#include "brw_pipe_rast.h"
+
+static enum pipe_error upload_sf_vp(struct brw_context *brw)
+{
+ const struct pipe_viewport_state *vp = &brw->curr.viewport;
+ const struct pipe_scissor_state *scissor = &brw->curr.scissor;
+ struct brw_sf_viewport sfv;
+ enum pipe_error ret;
+
+ memset(&sfv, 0, sizeof(sfv));
+
+ /* PIPE_NEW_VIEWPORT, PIPE_NEW_SCISSOR */
+
+ sfv.viewport.m00 = vp->scale[0];
+ sfv.viewport.m11 = vp->scale[1];
+ sfv.viewport.m22 = vp->scale[2];
+ sfv.viewport.m30 = vp->translate[0];
+ sfv.viewport.m31 = vp->translate[1];
+ sfv.viewport.m32 = vp->translate[2];
+
+ sfv.scissor.xmin = scissor->minx;
+ sfv.scissor.xmax = scissor->maxx - 1; /* ? */
+ sfv.scissor.ymin = scissor->miny;
+ sfv.scissor.ymax = scissor->maxy - 1; /* ? */
+
+ ret = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0,
+ &brw->sf.vp_bo );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_sf_vp = {
+ .dirty = {
+ .mesa = (PIPE_NEW_VIEWPORT |
+ PIPE_NEW_SCISSOR),
+ .brw = 0,
+ .cache = 0
+ },
+ .prepare = upload_sf_vp
+};
+
+struct brw_sf_unit_key {
+ unsigned int total_grf;
+ unsigned int urb_entry_read_length;
+ unsigned int nr_urb_entries, urb_size, sfsize;
+
+ unsigned scissor:1;
+ unsigned line_smooth:1;
+ unsigned point_sprite:1;
+ unsigned point_attenuated:1;
+ unsigned front_face:2;
+ unsigned cull_mode:2;
+ unsigned flatshade_first:1;
+ unsigned gl_rasterization_rules:1;
+ unsigned line_last_pixel_enable:1;
+ float line_width;
+ float point_size;
+};
+
+static void
+sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
+{
+ const struct pipe_rasterizer_state *rast = &brw->curr.rast->templ;
+ memset(key, 0, sizeof(*key));
+
+ /* CACHE_NEW_SF_PROG */
+ key->total_grf = brw->sf.prog_data->total_grf;
+ key->urb_entry_read_length = brw->sf.prog_data->urb_read_length;
+
+ /* BRW_NEW_URB_FENCE */
+ key->nr_urb_entries = brw->urb.nr_sf_entries;
+ key->urb_size = brw->urb.vsize;
+ key->sfsize = brw->urb.sfsize;
+
+ /* PIPE_NEW_RAST */
+ key->scissor = rast->scissor;
+ key->front_face = rast->front_winding;
+ key->cull_mode = rast->cull_mode;
+ key->line_smooth = rast->line_smooth;
+ key->line_width = rast->line_width;
+ key->flatshade_first = rast->flatshade_first;
+ key->line_last_pixel_enable = rast->line_last_pixel;
+ key->gl_rasterization_rules = rast->gl_rasterization_rules;
+
+ key->point_sprite = rast->point_sprite;
+ key->point_attenuated = rast->point_size_per_vertex;
+
+ key->point_size = CLAMP(rast->point_size,
+ rast->point_size_min,
+ rast->point_size_max);
+}
+
+static enum pipe_error
+sf_unit_create_from_key(struct brw_context *brw,
+ struct brw_sf_unit_key *key,
+ struct brw_winsys_reloc *reloc,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_sf_unit_state sf;
+ enum pipe_error ret;
+ int chipset_max_threads;
+ memset(&sf, 0, sizeof(sf));
+
+ sf.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
+ /* reloc */
+ sf.thread0.kernel_start_pointer = 0;
+
+ sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+
+ sf.thread3.dispatch_grf_start_reg = 3;
+
+ if (BRW_IS_IGDNG(brw))
+ sf.thread3.urb_entry_read_offset = 3;
+ else
+ sf.thread3.urb_entry_read_offset = 1;
+
+ sf.thread3.urb_entry_read_length = key->urb_entry_read_length;
+
+ sf.thread4.nr_urb_entries = key->nr_urb_entries;
+ sf.thread4.urb_entry_allocation_size = key->sfsize - 1;
+
+ /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or
+ * 48(IGDNG) threads
+ */
+ if (BRW_IS_IGDNG(brw))
+ chipset_max_threads = 48;
+ else
+ chipset_max_threads = 24;
+
+ sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1;
+
+ if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
+ sf.thread4.max_threads = 0;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ sf.thread4.stats_enable = 1;
+
+ /* CACHE_NEW_SF_VP */
+ /* reloc */
+ sf.sf5.sf_viewport_state_offset = 0;
+
+ sf.sf5.viewport_transform = 1;
+
+ if (key->scissor)
+ sf.sf6.scissor = 1;
+
+ if (key->front_face == PIPE_WINDING_CCW)
+ sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
+ else
+ sf.sf5.front_winding = BRW_FRONTWINDING_CW;
+
+ switch (key->cull_mode) {
+ case PIPE_WINDING_CCW:
+ case PIPE_WINDING_CW:
+ sf.sf6.cull_mode = (key->front_face == key->cull_mode ?
+ BRW_CULLMODE_FRONT :
+ BRW_CULLMODE_BACK);
+ break;
+ case PIPE_WINDING_BOTH:
+ sf.sf6.cull_mode = BRW_CULLMODE_BOTH;
+ break;
+ case PIPE_WINDING_NONE:
+ sf.sf6.cull_mode = BRW_CULLMODE_NONE;
+ break;
+ default:
+ assert(0);
+ sf.sf6.cull_mode = BRW_CULLMODE_NONE;
+ break;
+ }
+
+ /* _NEW_LINE */
+ /* XXX use ctx->Const.Min/MaxLineWidth here */
+ sf.sf6.line_width = CLAMP(key->line_width, 1.0, 5.0) * (1<<1);
+
+ sf.sf6.line_endcap_aa_region_width = 1;
+ if (key->line_smooth)
+ sf.sf6.aa_enable = 1;
+ else if (sf.sf6.line_width <= 0x2)
+ sf.sf6.line_width = 0;
+
+ /* XXX: gl_rasterization_rules? something else?
+ */
+ sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
+ sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT;
+ sf.sf6.point_rast_rule = 1;
+
+ /* XXX clamp max depends on AA vs. non-AA */
+
+ /* _NEW_POINT */
+ sf.sf7.sprite_point = key->point_sprite;
+ sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3);
+ sf.sf7.use_point_size_state = !key->point_attenuated;
+ sf.sf7.aa_line_distance_mode = 0;
+
+ /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
+ */
+ if (!key->flatshade_first) {
+ sf.sf7.trifan_pv = 2;
+ sf.sf7.linestrip_pv = 1;
+ sf.sf7.tristrip_pv = 2;
+ } else {
+ sf.sf7.trifan_pv = 1;
+ sf.sf7.linestrip_pv = 0;
+ sf.sf7.tristrip_pv = 0;
+ }
+
+ sf.sf7.line_last_pixel_enable = key->line_last_pixel_enable;
+
+ /* Set bias for OpenGL rasterization rules:
+ */
+ if (key->gl_rasterization_rules) {
+ sf.sf6.dest_org_vbias = 0x8;
+ sf.sf6.dest_org_hbias = 0x8;
+ }
+ else {
+ sf.sf6.dest_org_vbias = 0x0;
+ sf.sf6.dest_org_hbias = 0x0;
+ }
+
+ ret = brw_upload_cache(&brw->cache, BRW_SF_UNIT,
+ key, sizeof(*key),
+ reloc, 2,
+ &sf, sizeof(sf),
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+
+ return PIPE_OK;
+}
+
+static enum pipe_error upload_sf_unit( struct brw_context *brw )
+{
+ struct brw_sf_unit_key key;
+ struct brw_winsys_reloc reloc[2];
+ unsigned total_grf;
+ unsigned viewport_transform;
+ unsigned front_winding;
+ enum pipe_error ret;
+
+ sf_unit_populate_key(brw, &key);
+
+ /* XXX: cut this crap and pre calculate the key:
+ */
+ total_grf = (align(key.total_grf, 16) / 16 - 1);
+ viewport_transform = 1;
+ front_winding = (key.front_face == PIPE_WINDING_CCW ?
+ BRW_FRONTWINDING_CCW :
+ BRW_FRONTWINDING_CW);
+
+ /* Emit SF program relocation */
+ make_reloc(&reloc[0],
+ BRW_USAGE_STATE,
+ total_grf << 1,
+ offsetof(struct brw_sf_unit_state, thread0),
+ brw->sf.prog_bo);
+
+ /* Emit SF viewport relocation */
+ make_reloc(&reloc[1],
+ BRW_USAGE_STATE,
+ front_winding | (viewport_transform << 1),
+ offsetof(struct brw_sf_unit_state, sf5),
+ brw->sf.vp_bo);
+
+
+ if (brw_search_cache(&brw->cache, BRW_SF_UNIT,
+ &key, sizeof(key),
+ reloc, 2,
+ NULL,
+ &brw->sf.state_bo))
+ return PIPE_OK;
+
+
+ ret = sf_unit_create_from_key(brw, &key,
+ reloc,
+ &brw->sf.state_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_sf_unit = {
+ .dirty = {
+ .mesa = (PIPE_NEW_RAST),
+ .brw = BRW_NEW_URB_FENCE,
+ .cache = (CACHE_NEW_SF_VP |
+ CACHE_NEW_SF_PROG)
+ },
+ .prepare = upload_sf_unit,
+};
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
new file mode 100644
index 00000000000..d2bbd0123d1
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -0,0 +1,174 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_STATE_H
+#define BRW_STATE_H
+
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+
+#include "brw_context.h"
+
+static INLINE void
+brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo)
+{
+ assert(brw->state.validated_bo_count < Elements(brw->state.validated_bos));
+
+ if (bo != NULL) {
+ bo_reference( &brw->state.validated_bos[brw->state.validated_bo_count++],
+ bo );
+ }
+}
+
+const struct brw_tracked_state brw_blend_constant_color;
+const struct brw_tracked_state brw_cc_unit;
+const struct brw_tracked_state brw_cc_vp;
+const struct brw_tracked_state brw_clip_prog;
+const struct brw_tracked_state brw_clip_unit;
+const struct brw_tracked_state brw_curbe_buffer;
+const struct brw_tracked_state brw_curbe_offsets;
+const struct brw_tracked_state brw_invarient_state;
+const struct brw_tracked_state brw_gs_prog;
+const struct brw_tracked_state brw_gs_unit;
+const struct brw_tracked_state brw_line_stipple;
+const struct brw_tracked_state brw_aa_line_parameters;
+const struct brw_tracked_state brw_pipelined_state_pointers;
+const struct brw_tracked_state brw_binding_table_pointers;
+const struct brw_tracked_state brw_depthbuffer;
+const struct brw_tracked_state brw_polygon_stipple;
+const struct brw_tracked_state brw_program_parameters;
+const struct brw_tracked_state brw_recalculate_urb_fence;
+const struct brw_tracked_state brw_sf_prog;
+const struct brw_tracked_state brw_sf_unit;
+const struct brw_tracked_state brw_sf_vp;
+const struct brw_tracked_state brw_state_base_address;
+const struct brw_tracked_state brw_urb_fence;
+const struct brw_tracked_state brw_vertex_state;
+const struct brw_tracked_state brw_vs_surfaces;
+const struct brw_tracked_state brw_vs_prog;
+const struct brw_tracked_state brw_vs_unit;
+const struct brw_tracked_state brw_wm_input_sizes;
+const struct brw_tracked_state brw_wm_prog;
+const struct brw_tracked_state brw_wm_samplers;
+const struct brw_tracked_state brw_wm_constant_surface;
+const struct brw_tracked_state brw_wm_surfaces;
+const struct brw_tracked_state brw_wm_unit;
+
+const struct brw_tracked_state brw_psp_urb_cbs;
+
+const struct brw_tracked_state brw_pipe_control;
+
+const struct brw_tracked_state brw_drawing_rect;
+const struct brw_tracked_state brw_indices;
+const struct brw_tracked_state brw_vertices;
+const struct brw_tracked_state brw_index_buffer;
+
+
+/***********************************************************************
+ * brw_state.c
+ */
+int brw_validate_state(struct brw_context *brw);
+int brw_upload_state(struct brw_context *brw);
+void brw_init_state(struct brw_context *brw);
+void brw_destroy_state(struct brw_context *brw);
+
+/***********************************************************************
+ * brw_state_cache.c
+ */
+enum pipe_error brw_cache_data(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ struct brw_winsys_buffer **bo_out );
+
+enum pipe_error brw_cache_data_sz(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data,
+ GLuint data_size,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ struct brw_winsys_buffer **bo_out);
+
+enum pipe_error brw_upload_cache( struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_sz,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ const void *data,
+ GLuint data_sz,
+ const void *aux,
+ void *aux_return ,
+ struct brw_winsys_buffer **bo_out);
+
+boolean brw_search_cache( struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ void *aux_return,
+ struct brw_winsys_buffer **bo_out);
+
+void brw_state_cache_check_size( struct brw_context *brw );
+
+void brw_init_caches( struct brw_context *brw );
+void brw_destroy_caches( struct brw_context *brw );
+void brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo);
+
+/***********************************************************************
+ * brw_state_batch.c
+ */
+#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS)
+#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
+
+GLboolean brw_cached_batch_struct( struct brw_context *brw,
+ const void *data,
+ GLuint sz );
+void brw_destroy_batch_cache( struct brw_context *brw );
+void brw_clear_batch_cache( struct brw_context *brw );
+
+/***********************************************************************
+ * brw_wm_surface_state.c
+ */
+
+/***********************************************************************
+ * brw_state_debug.c
+ */
+void brw_update_dirty_counts( unsigned mesa,
+ unsigned brw,
+ unsigned cache );
+
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c
new file mode 100644
index 00000000000..7d212e5c247
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_batch.c
@@ -0,0 +1,98 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+
+#include "brw_state.h"
+#include "brw_batchbuffer.h"
+
+
+
+/* A facility similar to the data caching code above, which aims to
+ * prevent identical commands being issued repeatedly.
+ */
+GLboolean brw_cached_batch_struct( struct brw_context *brw,
+ const void *data,
+ GLuint sz )
+{
+ struct brw_cached_batch_item *item = brw->cached_batch_items;
+ struct header *newheader = (struct header *)data;
+
+ if (brw->flags.always_emit_state) {
+ brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS);
+ return GL_TRUE;
+ }
+
+ while (item) {
+ if (item->header->opcode == newheader->opcode) {
+ if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
+ return GL_FALSE;
+ if (item->sz != sz) {
+ FREE(item->header);
+ item->header = MALLOC(sz);
+ item->sz = sz;
+ }
+ goto emit;
+ }
+ item = item->next;
+ }
+
+ assert(!item);
+ item = CALLOC_STRUCT(brw_cached_batch_item);
+ item->header = MALLOC(sz);
+ item->sz = sz;
+ item->next = brw->cached_batch_items;
+ brw->cached_batch_items = item;
+
+ emit:
+ memcpy(item->header, newheader, sz);
+ brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS);
+ return GL_TRUE;
+}
+
+void brw_clear_batch_cache( struct brw_context *brw )
+{
+ struct brw_cached_batch_item *item = brw->cached_batch_items;
+
+ while (item) {
+ struct brw_cached_batch_item *next = item->next;
+ free((void *)item->header);
+ free(item);
+ item = next;
+ }
+
+ brw->cached_batch_items = NULL;
+}
+
+void brw_destroy_batch_cache( struct brw_context *brw )
+{
+ brw_clear_batch_cache(brw);
+}
diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c
new file mode 100644
index 00000000000..16b643ceb28
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_cache.c
@@ -0,0 +1,617 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+/** @file brw_state_cache.c
+ *
+ * This file implements a simple static state cache for 965. The consumers
+ * can query the hash table of state using a cache_id, opaque key data,
+ * and list of buffers that will be used in relocations, and receive the
+ * corresponding state buffer object of state (plus associated auxiliary
+ * data) in return.
+ *
+ * The inner workings are a simple hash table based on a CRC of the key data.
+ * The cache_id and relocation target buffers associated with the state
+ * buffer are included as auxiliary key data, but are not part of the hash
+ * value (this should be fixed, but will likely be fixed instead by making
+ * consumers use structured keys).
+ *
+ * Replacement is not implemented. Instead, when the cache gets too big, at
+ * a safe point (unlock) we throw out all of the cache data and let it
+ * regenerate for the next rendering operation.
+ *
+ * The reloc structs need to be included as key data, otherwise the
+ * non-unique values stuffed in the offset in key data through
+ * brw_cache_data() may result in successful probe for state buffers
+ * even when the buffer being referenced doesn't match. The result would be
+ * that the same state cache entry is used twice for different buffers,
+ * only one of the two buffers referenced gets put into the offset, and the
+ * incorrect program is run for the other instance.
+ */
+#include "util/u_memory.h"
+
+#include "brw_debug.h"
+#include "brw_state.h"
+#include "brw_batchbuffer.h"
+
+/* XXX: Fixme - have to include these to get the sizes of the prog_key
+ * structs:
+ */
+#include "brw_wm.h"
+#include "brw_vs.h"
+#include "brw_clip.h"
+#include "brw_sf.h"
+#include "brw_gs.h"
+
+
+static GLuint
+hash_key(const void *key, GLuint key_size,
+ struct brw_winsys_reloc *relocs, GLuint nr_relocs)
+{
+ GLuint *ikey = (GLuint *)key;
+ GLuint hash = 0, i;
+
+ assert(key_size % 4 == 0);
+
+ /* I'm sure this can be improved on:
+ */
+ for (i = 0; i < key_size/4; i++) {
+ hash ^= ikey[i];
+ hash = (hash << 5) | (hash >> 27);
+ }
+
+ /* Include the BO pointers as key data as well */
+ ikey = (GLuint *)relocs;
+ key_size = nr_relocs * sizeof(struct brw_winsys_reloc);
+ for (i = 0; i < key_size/4; i++) {
+ hash ^= ikey[i];
+ hash = (hash << 5) | (hash >> 27);
+ }
+
+ return hash;
+}
+
+
+/**
+ * Marks a new buffer as being chosen for the given cache id.
+ */
+static void
+update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
+ struct brw_winsys_buffer *bo)
+{
+ if (bo == cache->last_bo[cache_id])
+ return; /* no change */
+
+ bo_reference( &cache->last_bo[cache_id], bo );
+
+ cache->brw->state.dirty.cache |= 1 << cache_id;
+}
+
+
+static struct brw_cache_item *
+search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
+ GLuint hash, const void *key, GLuint key_size,
+ struct brw_winsys_reloc *relocs, GLuint nr_relocs)
+{
+ struct brw_cache_item *c;
+
+#if 0
+ int bucketcount = 0;
+
+ for (c = cache->items[hash % cache->size]; c; c = c->next)
+ bucketcount++;
+
+ debug_printf("bucket %d/%d = %d/%d items\n", hash % cache->size,
+ cache->size, bucketcount, cache->n_items);
+#endif
+
+ for (c = cache->items[hash % cache->size]; c; c = c->next) {
+ if (c->cache_id == cache_id &&
+ c->hash == hash &&
+ c->key_size == key_size &&
+ memcmp(c->key, key, key_size) == 0 &&
+ c->nr_relocs == nr_relocs &&
+ memcmp(c->relocs, relocs, nr_relocs * sizeof *relocs) == 0)
+ return c;
+ }
+
+ return NULL;
+}
+
+
+static void
+rehash(struct brw_cache *cache)
+{
+ struct brw_cache_item **items;
+ struct brw_cache_item *c, *next;
+ GLuint size, i;
+
+ size = cache->size * 3;
+ items = (struct brw_cache_item**) CALLOC(size, sizeof(*items));
+
+ for (i = 0; i < cache->size; i++)
+ for (c = cache->items[i]; c; c = next) {
+ next = c->next;
+ c->next = items[c->hash % size];
+ items[c->hash % size] = c;
+ }
+
+ FREE(cache->items);
+ cache->items = items;
+ cache->size = size;
+}
+
+
+/**
+ * Returns the buffer object matching cache_id and key, or NULL.
+ */
+boolean
+brw_search_cache(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ void *aux_return,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_cache_item *item;
+ GLuint hash = hash_key(key, key_size, relocs, nr_relocs);
+
+ item = search_cache(cache, cache_id, hash, key, key_size,
+ relocs, nr_relocs);
+
+ if (item) {
+ if (aux_return)
+ *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+
+ update_cache_last(cache, cache_id, item->bo);
+ bo_reference(bo_out, item->bo);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+enum pipe_error
+brw_upload_cache( struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ const void *data,
+ GLuint data_size,
+ const void *aux,
+ void *aux_return,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
+ GLuint hash = hash_key(key, key_size, relocs, nr_relocs);
+ GLuint relocs_size = nr_relocs * sizeof relocs[0];
+ GLuint aux_size = cache->aux_size[cache_id];
+ enum pipe_error ret;
+ void *tmp;
+ int i;
+
+ /* Create the buffer object to contain the data. For now, use a
+ * single buffer type to describe all cached state atoms. Later,
+ * may want to take advantage of hardware distinctions between
+ * these various entities.
+ */
+ ret = cache->sws->bo_alloc(cache->sws,
+ cache->buffer_type,
+ data_size, 1 << 6,
+ bo_out);
+ if (ret)
+ return ret;
+
+
+ /* Set up the memory containing the key, aux_data, and relocs */
+ tmp = MALLOC(key_size + aux_size + relocs_size);
+
+ memcpy(tmp, key, key_size);
+ memcpy((char *)tmp + key_size, aux, cache->aux_size[cache_id]);
+ memcpy((char *)tmp + key_size + aux_size, relocs, relocs_size);
+ for (i = 0; i < nr_relocs; i++) {
+ p_atomic_inc(&relocs[i].bo->reference.count);
+ }
+
+ item->cache_id = cache_id;
+ item->key = tmp;
+ item->hash = hash;
+ item->key_size = key_size;
+ item->relocs = (struct brw_winsys_reloc *)((char *)tmp + key_size + aux_size);
+ item->nr_relocs = nr_relocs;
+ bo_reference( &item->bo, *bo_out );
+ item->data_size = data_size;
+
+ if (cache->n_items > cache->size * 1.5)
+ rehash(cache);
+
+ hash %= cache->size;
+ item->next = cache->items[hash];
+ cache->items[hash] = item;
+ cache->n_items++;
+
+ if (aux_return) {
+ assert(cache->aux_size[cache_id]);
+ *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+ }
+
+ if (BRW_DEBUG & DEBUG_STATE)
+ debug_printf("upload %s: %d bytes to cache id %d\n",
+ cache->name[cache_id],
+ data_size, cache_id);
+
+ /* Copy data to the buffer */
+ ret = cache->sws->bo_subdata(item->bo,
+ cache_id,
+ 0, data_size, data,
+ relocs, nr_relocs);
+ if (ret)
+ return ret;
+
+ update_cache_last(cache, cache_id, item->bo);
+
+ return PIPE_OK;
+}
+
+
+/**
+ * This doesn't really work with aux data. Use search/upload instead
+ */
+enum pipe_error
+brw_cache_data_sz(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data,
+ GLuint data_size,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_cache_item *item;
+ GLuint hash = hash_key(data, data_size, relocs, nr_relocs);
+
+ item = search_cache(cache, cache_id, hash, data, data_size,
+ relocs, nr_relocs);
+ if (item) {
+ update_cache_last(cache, cache_id, item->bo);
+
+ bo_reference(bo_out, item->bo);
+ return PIPE_OK;
+ }
+
+ return brw_upload_cache(cache, cache_id,
+ data, data_size,
+ relocs, nr_relocs,
+ data, data_size,
+ NULL, NULL,
+ bo_out);
+}
+
+
+/**
+ * Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
+ *
+ * If nr_relocs is nonzero, brw_search_cache()/brw_upload_cache() would be
+ * better to use, as the potentially changing offsets in the data-used-as-key
+ * will result in excessive cache misses.
+ *
+ * XXX: above is no longer true -- can we remove some code?
+ */
+enum pipe_error
+brw_cache_data(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data,
+ struct brw_winsys_reloc *relocs,
+ GLuint nr_relocs,
+ struct brw_winsys_buffer **bo_out)
+{
+ return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id],
+ relocs, nr_relocs, bo_out);
+}
+
+
+static void
+brw_init_cache_id(struct brw_cache *cache,
+ const char *name,
+ enum brw_cache_id id,
+ GLuint key_size,
+ GLuint aux_size)
+{
+ cache->name[id] = strdup(name);
+ cache->key_size[id] = key_size;
+ cache->aux_size[id] = aux_size;
+}
+
+
+static void
+brw_init_general_state_cache(struct brw_context *brw)
+{
+ struct brw_cache *cache = &brw->cache;
+
+ cache->brw = brw;
+ cache->sws = brw->sws;
+
+ cache->buffer_type = BRW_BUFFER_TYPE_GENERAL_STATE;
+
+ cache->size = 7;
+ cache->n_items = 0;
+ cache->items = (struct brw_cache_item **)
+ CALLOC(cache->size, sizeof(struct brw_cache_item));
+
+ brw_init_cache_id(cache,
+ "CC_VP",
+ BRW_CC_VP,
+ sizeof(struct brw_cc_viewport),
+ 0);
+
+ brw_init_cache_id(cache,
+ "CC_UNIT",
+ BRW_CC_UNIT,
+ sizeof(struct brw_cc_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "WM_PROG",
+ BRW_WM_PROG,
+ sizeof(struct brw_wm_prog_key),
+ sizeof(struct brw_wm_prog_data));
+
+ brw_init_cache_id(cache,
+ "SAMPLER_DEFAULT_COLOR",
+ BRW_SAMPLER_DEFAULT_COLOR,
+ sizeof(struct brw_sampler_default_color),
+ 0);
+
+ brw_init_cache_id(cache,
+ "SAMPLER",
+ BRW_SAMPLER,
+ 0, /* variable key/data size */
+ 0);
+
+ brw_init_cache_id(cache,
+ "WM_UNIT",
+ BRW_WM_UNIT,
+ sizeof(struct brw_wm_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "SF_PROG",
+ BRW_SF_PROG,
+ sizeof(struct brw_sf_prog_key),
+ sizeof(struct brw_sf_prog_data));
+
+ brw_init_cache_id(cache,
+ "SF_VP",
+ BRW_SF_VP,
+ sizeof(struct brw_sf_viewport),
+ 0);
+
+ brw_init_cache_id(cache,
+ "SF_UNIT",
+ BRW_SF_UNIT,
+ sizeof(struct brw_sf_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "VS_UNIT",
+ BRW_VS_UNIT,
+ sizeof(struct brw_vs_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "VS_PROG",
+ BRW_VS_PROG,
+ sizeof(struct brw_vs_prog_key),
+ sizeof(struct brw_vs_prog_data));
+
+ brw_init_cache_id(cache,
+ "CLIP_UNIT",
+ BRW_CLIP_UNIT,
+ sizeof(struct brw_clip_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "CLIP_PROG",
+ BRW_CLIP_PROG,
+ sizeof(struct brw_clip_prog_key),
+ sizeof(struct brw_clip_prog_data));
+
+ brw_init_cache_id(cache,
+ "GS_UNIT",
+ BRW_GS_UNIT,
+ sizeof(struct brw_gs_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "GS_PROG",
+ BRW_GS_PROG,
+ sizeof(struct brw_gs_prog_key),
+ sizeof(struct brw_gs_prog_data));
+}
+
+
+static void
+brw_init_surface_state_cache(struct brw_context *brw)
+{
+ struct brw_cache *cache = &brw->surface_cache;
+
+ cache->brw = brw;
+ cache->sws = brw->sws;
+
+ cache->buffer_type = BRW_BUFFER_TYPE_SURFACE_STATE;
+
+ cache->size = 7;
+ cache->n_items = 0;
+ cache->items = (struct brw_cache_item **)
+ CALLOC(cache->size, sizeof(struct brw_cache_item));
+
+ brw_init_cache_id(cache,
+ "SS_SURFACE",
+ BRW_SS_SURFACE,
+ sizeof(struct brw_surface_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "SS_SURF_BIND",
+ BRW_SS_SURF_BIND,
+ 0,
+ 0);
+}
+
+
+void
+brw_init_caches(struct brw_context *brw)
+{
+ brw_init_general_state_cache(brw);
+ brw_init_surface_state_cache(brw);
+}
+
+
+static void
+brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
+{
+ struct brw_cache_item *c, *next;
+ GLuint i;
+
+ if (BRW_DEBUG & DEBUG_STATE)
+ debug_printf("%s\n", __FUNCTION__);
+
+ for (i = 0; i < cache->size; i++) {
+ for (c = cache->items[i]; c; c = next) {
+ int j;
+
+ next = c->next;
+
+ for (j = 0; j < c->nr_relocs; j++)
+ bo_reference(&c->relocs[j].bo, NULL);
+
+ bo_reference(&c->bo, NULL);
+ FREE((void *)c->key);
+ FREE(c);
+ }
+ cache->items[i] = NULL;
+ }
+
+ cache->n_items = 0;
+
+ if (brw->curbe.last_buf) {
+ FREE(brw->curbe.last_buf);
+ brw->curbe.last_buf = NULL;
+ }
+
+ brw->state.dirty.mesa |= ~0;
+ brw->state.dirty.brw |= ~0;
+ brw->state.dirty.cache |= ~0;
+}
+
+/* Clear all entries from the cache that point to the given bo.
+ *
+ * This lets us release memory for reuse earlier for known-dead buffers,
+ * at the cost of walking the entire hash table.
+ */
+void
+brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo)
+{
+ struct brw_cache_item **prev;
+ GLuint i;
+
+ if (BRW_DEBUG & DEBUG_STATE)
+ debug_printf("%s\n", __FUNCTION__);
+
+ for (i = 0; i < cache->size; i++) {
+ for (prev = &cache->items[i]; *prev;) {
+ struct brw_cache_item *c = *prev;
+
+ if (cache->sws->bo_references(c->bo, bo)) {
+ int j;
+
+ *prev = c->next;
+
+ for (j = 0; j < c->nr_relocs; j++)
+ bo_reference(&c->relocs[j].bo, NULL);
+
+ bo_reference(&c->bo, NULL);
+
+ FREE((void *)c->key);
+ FREE(c);
+ cache->n_items--;
+ } else {
+ prev = &c->next;
+ }
+ }
+ }
+}
+
+void
+brw_state_cache_check_size(struct brw_context *brw)
+{
+ if (BRW_DEBUG & DEBUG_STATE)
+ debug_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items);
+
+ /* un-tuned guess. We've got around 20 state objects for a total of around
+ * 32k, so 1000 of them is around 1.5MB.
+ */
+ if (brw->cache.n_items > 1000)
+ brw_clear_cache(brw, &brw->cache);
+
+ if (brw->surface_cache.n_items > 1000)
+ brw_clear_cache(brw, &brw->surface_cache);
+}
+
+
+static void
+brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
+{
+ GLuint i;
+
+ if (BRW_DEBUG & DEBUG_STATE)
+ debug_printf("%s\n", __FUNCTION__);
+
+ brw_clear_cache(brw, cache);
+ for (i = 0; i < BRW_MAX_CACHE; i++) {
+ bo_reference(&cache->last_bo[i], NULL);
+ FREE(cache->name[i]);
+ }
+ FREE(cache->items);
+ cache->items = NULL;
+ cache->size = 0;
+}
+
+
+void
+brw_destroy_caches(struct brw_context *brw)
+{
+ brw_destroy_cache(brw, &brw->cache);
+ brw_destroy_cache(brw, &brw->surface_cache);
+}
diff --git a/src/gallium/drivers/i965/brw_state_debug.c b/src/gallium/drivers/i965/brw_state_debug.c
new file mode 100644
index 00000000000..049c278c93e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_debug.c
@@ -0,0 +1,153 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+
+
+struct dirty_bit_map {
+ uint32_t bit;
+ char *name;
+ uint32_t count;
+};
+
+#define DEFINE_BIT(name) {name, #name, 0}
+
+static struct dirty_bit_map mesa_bits[] = {
+ DEFINE_BIT(PIPE_NEW_DEPTH_STENCIL_ALPHA),
+ DEFINE_BIT(PIPE_NEW_RAST),
+ DEFINE_BIT(PIPE_NEW_BLEND),
+ DEFINE_BIT(PIPE_NEW_VIEWPORT),
+ DEFINE_BIT(PIPE_NEW_SAMPLERS),
+ DEFINE_BIT(PIPE_NEW_VERTEX_BUFFER),
+ DEFINE_BIT(PIPE_NEW_VERTEX_ELEMENT),
+ DEFINE_BIT(PIPE_NEW_FRAGMENT_SHADER),
+ DEFINE_BIT(PIPE_NEW_VERTEX_SHADER),
+ DEFINE_BIT(PIPE_NEW_FRAGMENT_CONSTANTS),
+ DEFINE_BIT(PIPE_NEW_VERTEX_CONSTANTS),
+ DEFINE_BIT(PIPE_NEW_CLIP),
+ DEFINE_BIT(PIPE_NEW_INDEX_BUFFER),
+ DEFINE_BIT(PIPE_NEW_INDEX_RANGE),
+ DEFINE_BIT(PIPE_NEW_BLEND_COLOR),
+ DEFINE_BIT(PIPE_NEW_POLYGON_STIPPLE),
+ DEFINE_BIT(PIPE_NEW_FRAMEBUFFER_DIMENSIONS),
+ DEFINE_BIT(PIPE_NEW_DEPTH_BUFFER),
+ DEFINE_BIT(PIPE_NEW_COLOR_BUFFERS),
+ DEFINE_BIT(PIPE_NEW_QUERY),
+ DEFINE_BIT(PIPE_NEW_SCISSOR),
+ DEFINE_BIT(PIPE_NEW_BOUND_TEXTURES),
+ DEFINE_BIT(PIPE_NEW_NR_CBUFS),
+ {0, 0, 0}
+};
+
+static struct dirty_bit_map brw_bits[] = {
+ DEFINE_BIT(BRW_NEW_URB_FENCE),
+ DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
+ DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
+ DEFINE_BIT(BRW_NEW_INPUT_DIMENSIONS),
+ DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
+ DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
+ DEFINE_BIT(BRW_NEW_PRIMITIVE),
+ DEFINE_BIT(BRW_NEW_CONTEXT),
+ DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
+ DEFINE_BIT(BRW_NEW_PSP),
+ DEFINE_BIT(BRW_NEW_WM_SURFACES),
+ DEFINE_BIT(BRW_NEW_xxx),
+ DEFINE_BIT(BRW_NEW_INDICES),
+ {0, 0, 0}
+};
+
+static struct dirty_bit_map cache_bits[] = {
+ DEFINE_BIT(CACHE_NEW_CC_VP),
+ DEFINE_BIT(CACHE_NEW_CC_UNIT),
+ DEFINE_BIT(CACHE_NEW_WM_PROG),
+ DEFINE_BIT(CACHE_NEW_SAMPLER_DEFAULT_COLOR),
+ DEFINE_BIT(CACHE_NEW_SAMPLER),
+ DEFINE_BIT(CACHE_NEW_WM_UNIT),
+ DEFINE_BIT(CACHE_NEW_SF_PROG),
+ DEFINE_BIT(CACHE_NEW_SF_VP),
+ DEFINE_BIT(CACHE_NEW_SF_UNIT),
+ DEFINE_BIT(CACHE_NEW_VS_UNIT),
+ DEFINE_BIT(CACHE_NEW_VS_PROG),
+ DEFINE_BIT(CACHE_NEW_GS_UNIT),
+ DEFINE_BIT(CACHE_NEW_GS_PROG),
+ DEFINE_BIT(CACHE_NEW_CLIP_VP),
+ DEFINE_BIT(CACHE_NEW_CLIP_UNIT),
+ DEFINE_BIT(CACHE_NEW_CLIP_PROG),
+ DEFINE_BIT(CACHE_NEW_SURFACE),
+ DEFINE_BIT(CACHE_NEW_SURF_BIND),
+ {0, 0, 0}
+};
+
+
+static void
+brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
+{
+ int i;
+
+ for (i = 0; i < 32; i++) {
+ if (bit_map[i].bit == 0)
+ return;
+
+ if (bit_map[i].bit & bits)
+ bit_map[i].count++;
+ }
+}
+
+static void
+brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
+{
+ int i;
+
+ for (i = 0; i < 32; i++) {
+ if (bit_map[i].bit == 0)
+ return;
+
+ debug_printf("0x%08x: %12d (%s)\n",
+ bit_map[i].bit, bit_map[i].count, bit_map[i].name);
+ }
+}
+
+void
+brw_update_dirty_counts( unsigned mesa,
+ unsigned brw,
+ unsigned cache )
+{
+ static int dirty_count = 0;
+
+ brw_update_dirty_count(mesa_bits, mesa);
+ brw_update_dirty_count(brw_bits, brw);
+ brw_update_dirty_count(cache_bits, cache);
+ if (dirty_count++ % 1000 == 0) {
+ brw_print_dirty_count(mesa_bits, mesa);
+ brw_print_dirty_count(brw_bits, brw);
+ brw_print_dirty_count(cache_bits, cache);
+ debug_printf("\n");
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c
new file mode 100644
index 00000000000..f8b91eff816
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_upload.c
@@ -0,0 +1,270 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_batchbuffer.h"
+#include "brw_debug.h"
+
+const struct brw_tracked_state *atoms[] =
+{
+/* &brw_wm_input_sizes, */
+ &brw_vs_prog,
+ &brw_gs_prog,
+ &brw_clip_prog,
+ &brw_sf_prog,
+ &brw_wm_prog,
+
+ /* Once all the programs are done, we know how large urb entry
+ * sizes need to be and can decide if we need to change the urb
+ * layout.
+ */
+ &brw_curbe_offsets,
+ &brw_recalculate_urb_fence,
+
+ &brw_cc_vp,
+ &brw_cc_unit,
+
+ &brw_vs_surfaces, /* must do before unit */
+ /*&brw_wm_constant_surface,*/ /* must do before wm surfaces/bind bo */
+ &brw_wm_surfaces, /* must do before samplers and unit */
+ &brw_wm_samplers,
+
+ &brw_wm_unit,
+ &brw_sf_vp,
+ &brw_sf_unit,
+ &brw_vs_unit, /* always required, enabled or not */
+ &brw_clip_unit,
+ &brw_gs_unit,
+
+ /* Command packets:
+ */
+ &brw_invarient_state,
+ &brw_state_base_address,
+
+ &brw_binding_table_pointers,
+ &brw_blend_constant_color,
+
+ &brw_depthbuffer,
+ &brw_polygon_stipple,
+ &brw_line_stipple,
+
+ &brw_psp_urb_cbs,
+
+ &brw_drawing_rect,
+ &brw_indices,
+ &brw_index_buffer,
+ &brw_vertices,
+
+ &brw_curbe_buffer
+};
+
+
+void brw_init_state( struct brw_context *brw )
+{
+ brw_init_caches(brw);
+}
+
+
+void brw_destroy_state( struct brw_context *brw )
+{
+ brw_destroy_caches(brw);
+ brw_destroy_batch_cache(brw);
+}
+
+/***********************************************************************
+ */
+
+static GLboolean check_state( const struct brw_state_flags *a,
+ const struct brw_state_flags *b )
+{
+ return ((a->mesa & b->mesa) ||
+ (a->brw & b->brw) ||
+ (a->cache & b->cache));
+}
+
+static void accumulate_state( struct brw_state_flags *a,
+ const struct brw_state_flags *b )
+{
+ a->mesa |= b->mesa;
+ a->brw |= b->brw;
+ a->cache |= b->cache;
+}
+
+
+static void xor_states( struct brw_state_flags *result,
+ const struct brw_state_flags *a,
+ const struct brw_state_flags *b )
+{
+ result->mesa = a->mesa ^ b->mesa;
+ result->brw = a->brw ^ b->brw;
+ result->cache = a->cache ^ b->cache;
+}
+
+static void
+brw_clear_validated_bos(struct brw_context *brw)
+{
+ int i;
+
+ /* Clear the last round of validated bos */
+ for (i = 0; i < brw->state.validated_bo_count; i++) {
+ bo_reference(&brw->state.validated_bos[i], NULL);
+ }
+ brw->state.validated_bo_count = 0;
+}
+
+
+/***********************************************************************
+ * Emit all state:
+ */
+enum pipe_error brw_validate_state( struct brw_context *brw )
+{
+ struct brw_state_flags *state = &brw->state.dirty;
+ GLuint i;
+ int ret;
+
+ brw_clear_validated_bos(brw);
+ brw_add_validated_bo(brw, brw->batch->buf);
+
+ if (brw->flags.always_emit_state) {
+ state->mesa |= ~0;
+ state->brw |= ~0;
+ state->cache |= ~0;
+ }
+
+ if (state->mesa == 0 &&
+ state->cache == 0 &&
+ state->brw == 0)
+ return 0;
+
+ if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
+ brw_clear_batch_cache(brw);
+
+ /* do prepare stage for all atoms */
+ for (i = 0; i < Elements(atoms); i++) {
+ const struct brw_tracked_state *atom = atoms[i];
+
+ if (check_state(state, &atom->dirty)) {
+ if (atom->prepare) {
+ ret = atom->prepare(brw);
+ if (ret)
+ return ret;
+ }
+ }
+ }
+
+ /* Make sure that the textures which are referenced by the current
+ * brw fragment program are actually present/valid.
+ * If this fails, we can experience GPU lock-ups.
+ */
+ {
+ const struct brw_fragment_shader *fp = brw->curr.fragment_shader;
+ if (fp) {
+ assert(fp->info.file_max[TGSI_FILE_SAMPLER] < (int)brw->curr.num_samplers);
+ /*assert(fp->info.texture_max <= brw->curr.num_textures);*/
+ }
+ }
+
+ return 0;
+}
+
+
+enum pipe_error brw_upload_state(struct brw_context *brw)
+{
+ struct brw_state_flags *state = &brw->state.dirty;
+ int ret;
+ int i;
+
+ brw_clear_validated_bos(brw);
+
+ if (BRW_DEBUG) {
+ /* Debug version which enforces various sanity checks on the
+ * state flags which are generated and checked to help ensure
+ * state atoms are ordered correctly in the list.
+ */
+ struct brw_state_flags examined, prev;
+ memset(&examined, 0, sizeof(examined));
+ prev = *state;
+
+ for (i = 0; i < Elements(atoms); i++) {
+ const struct brw_tracked_state *atom = atoms[i];
+ struct brw_state_flags generated;
+
+ assert(atom->dirty.mesa ||
+ atom->dirty.brw ||
+ atom->dirty.cache);
+
+ if (check_state(state, &atom->dirty)) {
+ if (atom->emit) {
+ ret = atom->emit( brw );
+ if (ret)
+ return ret;
+ }
+ }
+
+ accumulate_state(&examined, &atom->dirty);
+
+ /* generated = (prev ^ state)
+ * if (examined & generated)
+ * fail;
+ */
+ xor_states(&generated, &prev, state);
+ assert(!check_state(&examined, &generated));
+ prev = *state;
+ }
+ }
+ else {
+ for (i = 0; i < Elements(atoms); i++) {
+ const struct brw_tracked_state *atom = atoms[i];
+
+ if (check_state(state, &atom->dirty)) {
+ if (atom->emit) {
+ ret = atom->emit( brw );
+ if (ret)
+ return ret;
+ }
+ }
+ }
+ }
+
+ if (BRW_DEBUG & DEBUG_STATE) {
+ brw_update_dirty_counts( state->mesa,
+ state->brw,
+ state->cache );
+ }
+
+ /* Clear dirty flags:
+ */
+ memset(state, 0, sizeof(*state));
+ return 0;
+}
diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h
new file mode 100644
index 00000000000..bf10bc04de7
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_structs.h
@@ -0,0 +1,1576 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_STRUCTS_H
+#define BRW_STRUCTS_H
+
+#include "brw_types.h"
+
+/** Number of general purpose registers (VS, WM, etc) */
+#define BRW_MAX_GRF 128
+
+/** Number of message register file registers */
+#define BRW_MAX_MRF 16
+
+
+/* Command packets:
+ */
+struct header
+{
+ GLuint length:16;
+ GLuint opcode:16;
+};
+
+
+union header_union
+{
+ struct header bits;
+ GLuint dword;
+};
+
+struct brw_3d_control
+{
+ struct
+ {
+ GLuint length:8;
+ GLuint notify_enable:1;
+ GLuint pad:3;
+ GLuint wc_flush_enable:1;
+ GLuint depth_stall_enable:1;
+ GLuint operation:2;
+ GLuint opcode:16;
+ } header;
+
+ struct
+ {
+ GLuint pad:2;
+ GLuint dest_addr_type:1;
+ GLuint dest_addr:29;
+ } dest;
+
+ GLuint dword2;
+ GLuint dword3;
+};
+
+
+struct brw_3d_primitive
+{
+ struct
+ {
+ GLuint length:8;
+ GLuint pad:2;
+ GLuint topology:5;
+ GLuint indexed:1;
+ GLuint opcode:16;
+ } header;
+
+ GLuint verts_per_instance;
+ GLuint start_vert_location;
+ GLuint instance_count;
+ GLuint start_instance_location;
+ GLuint base_vert_location;
+};
+
+/* These seem to be passed around as function args, so it works out
+ * better to keep them as #defines:
+ */
+#define BRW_FLUSH_READ_CACHE 0x1
+#define BRW_FLUSH_STATE_CACHE 0x2
+#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4
+#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8
+
+struct brw_mi_flush
+{
+ GLuint flags:4;
+ GLuint pad:12;
+ GLuint opcode:16;
+};
+
+struct brw_vf_statistics
+{
+ GLuint statistics_enable:1;
+ GLuint pad:15;
+ GLuint opcode:16;
+};
+
+
+
+struct brw_binding_table_pointers
+{
+ struct header header;
+ GLuint vs;
+ GLuint gs;
+ GLuint clp;
+ GLuint sf;
+ GLuint wm;
+};
+
+
+struct brw_blend_constant_color
+{
+ struct header header;
+ GLfloat blend_constant_color[4];
+};
+
+
+struct brw_depthbuffer
+{
+ union header_union header;
+
+ union {
+ struct {
+ GLuint pitch:18;
+ GLuint format:3;
+ GLuint pad:2;
+ GLuint software_tiled_rendering_mode:2;
+ GLuint depth_offset_disable:1;
+ GLuint tile_walk:1;
+ GLuint tiled_surface:1;
+ GLuint pad2:1;
+ GLuint surface_type:3;
+ } bits;
+ GLuint dword;
+ } dword1;
+
+ GLuint dword2_base_addr;
+
+ union {
+ struct {
+ GLuint pad:1;
+ GLuint mipmap_layout:1;
+ GLuint lod:4;
+ GLuint width:13;
+ GLuint height:13;
+ } bits;
+ GLuint dword;
+ } dword3;
+
+ union {
+ struct {
+ GLuint pad:10;
+ GLuint min_array_element:11;
+ GLuint depth:11;
+ } bits;
+ GLuint dword;
+ } dword4;
+};
+
+struct brw_depthbuffer_g4x
+{
+ union header_union header;
+
+ union {
+ struct {
+ GLuint pitch:18;
+ GLuint format:3;
+ GLuint pad:2;
+ GLuint software_tiled_rendering_mode:2;
+ GLuint depth_offset_disable:1;
+ GLuint tile_walk:1;
+ GLuint tiled_surface:1;
+ GLuint pad2:1;
+ GLuint surface_type:3;
+ } bits;
+ GLuint dword;
+ } dword1;
+
+ GLuint dword2_base_addr;
+
+ union {
+ struct {
+ GLuint pad:1;
+ GLuint mipmap_layout:1;
+ GLuint lod:4;
+ GLuint width:13;
+ GLuint height:13;
+ } bits;
+ GLuint dword;
+ } dword3;
+
+ union {
+ struct {
+ GLuint pad:10;
+ GLuint min_array_element:11;
+ GLuint depth:11;
+ } bits;
+ GLuint dword;
+ } dword4;
+
+ union {
+ struct {
+ GLuint xoffset:16;
+ GLuint yoffset:16;
+ } bits;
+ GLuint dword;
+ } dword5; /* NEW in Integrated Graphics Device */
+};
+
+struct brw_drawrect
+{
+ struct header header;
+ GLuint xmin:16;
+ GLuint ymin:16;
+ GLuint xmax:16;
+ GLuint ymax:16;
+ GLuint xorg:16;
+ GLuint yorg:16;
+};
+
+
+
+
+struct brw_global_depth_offset_clamp
+{
+ struct header header;
+ GLfloat depth_offset_clamp;
+};
+
+struct brw_indexbuffer
+{
+ union {
+ struct
+ {
+ GLuint length:8;
+ GLuint index_format:2;
+ GLuint cut_index_enable:1;
+ GLuint pad:5;
+ GLuint opcode:16;
+ } bits;
+ GLuint dword;
+
+ } header;
+
+ GLuint buffer_start;
+ GLuint buffer_end;
+};
+
+/* NEW in Integrated Graphics Device */
+struct brw_aa_line_parameters
+{
+ struct header header;
+
+ struct {
+ GLuint aa_coverage_scope:8;
+ GLuint pad0:8;
+ GLuint aa_coverage_bias:8;
+ GLuint pad1:8;
+ } bits0;
+
+ struct {
+ GLuint aa_coverage_endcap_slope:8;
+ GLuint pad0:8;
+ GLuint aa_coverage_endcap_bias:8;
+ GLuint pad1:8;
+ } bits1;
+};
+
+struct brw_line_stipple
+{
+ struct header header;
+
+ struct
+ {
+ GLuint pattern:16;
+ GLuint pad:16;
+ } bits0;
+
+ struct
+ {
+ GLuint repeat_count:9;
+ GLuint pad:7;
+ GLuint inverse_repeat_count:16;
+ } bits1;
+};
+
+
+struct brw_pipelined_state_pointers
+{
+ struct header header;
+
+ struct {
+ GLuint pad:5;
+ GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+ } vs;
+
+ struct
+ {
+ GLuint enable:1;
+ GLuint pad:4;
+ GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+ } gs;
+
+ struct
+ {
+ GLuint enable:1;
+ GLuint pad:4;
+ GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+ } clp;
+
+ struct
+ {
+ GLuint pad:5;
+ GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+ } sf;
+
+ struct
+ {
+ GLuint pad:5;
+ GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+ } wm;
+
+ struct
+ {
+ GLuint pad:5;
+ GLuint offset:27; /* Offset from GENERAL_STATE_BASE. KW: check me! */
+ } cc;
+};
+
+
+struct brw_polygon_stipple_offset
+{
+ struct header header;
+
+ struct {
+ GLuint y_offset:5;
+ GLuint pad:3;
+ GLuint x_offset:5;
+ GLuint pad0:19;
+ } bits0;
+};
+
+
+
+struct brw_polygon_stipple
+{
+ struct header header;
+ GLuint stipple[32];
+};
+
+
+
+struct brw_pipeline_select
+{
+ struct
+ {
+ GLuint pipeline_select:1;
+ GLuint pad:15;
+ GLuint opcode:16;
+ } header;
+};
+
+
+struct brw_pipe_control
+{
+ struct
+ {
+ GLuint length:8;
+ GLuint notify_enable:1;
+ GLuint texture_cache_flush_enable:1;
+ GLuint indirect_state_pointers_disable:1;
+ GLuint instruction_state_cache_flush_enable:1;
+ GLuint write_cache_flush_enable:1;
+ GLuint depth_stall_enable:1;
+ GLuint post_sync_operation:2;
+
+ GLuint opcode:16;
+ } header;
+
+ struct
+ {
+ GLuint pad:2;
+ GLuint dest_addr_type:1;
+ GLuint dest_addr:29;
+ } bits1;
+
+ GLuint data0;
+ GLuint data1;
+};
+
+
+struct brw_urb_fence
+{
+ struct
+ {
+ GLuint length:8;
+ GLuint vs_realloc:1;
+ GLuint gs_realloc:1;
+ GLuint clp_realloc:1;
+ GLuint sf_realloc:1;
+ GLuint vfe_realloc:1;
+ GLuint cs_realloc:1;
+ GLuint pad:2;
+ GLuint opcode:16;
+ } header;
+
+ struct
+ {
+ GLuint vs_fence:10;
+ GLuint gs_fence:10;
+ GLuint clp_fence:10;
+ GLuint pad:2;
+ } bits0;
+
+ struct
+ {
+ GLuint sf_fence:10;
+ GLuint vf_fence:10;
+ GLuint cs_fence:11;
+ GLuint pad:1;
+ } bits1;
+};
+
+struct brw_cs_urb_state
+{
+ struct header header;
+
+ struct
+ {
+ GLuint nr_urb_entries:3;
+ GLuint pad:1;
+ GLuint urb_entry_size:5;
+ GLuint pad0:23;
+ } bits0;
+};
+
+struct brw_constant_buffer
+{
+ struct
+ {
+ GLuint length:8;
+ GLuint valid:1;
+ GLuint pad:7;
+ GLuint opcode:16;
+ } header;
+
+ struct
+ {
+ GLuint buffer_length:6;
+ GLuint buffer_address:26;
+ } bits0;
+};
+
+struct brw_state_base_address
+{
+ struct header header;
+
+ struct
+ {
+ GLuint modify_enable:1;
+ GLuint pad:4;
+ GLuint general_state_address:27;
+ } bits0;
+
+ struct
+ {
+ GLuint modify_enable:1;
+ GLuint pad:4;
+ GLuint surface_state_address:27;
+ } bits1;
+
+ struct
+ {
+ GLuint modify_enable:1;
+ GLuint pad:4;
+ GLuint indirect_object_state_address:27;
+ } bits2;
+
+ struct
+ {
+ GLuint modify_enable:1;
+ GLuint pad:11;
+ GLuint general_state_upper_bound:20;
+ } bits3;
+
+ struct
+ {
+ GLuint modify_enable:1;
+ GLuint pad:11;
+ GLuint indirect_object_state_upper_bound:20;
+ } bits4;
+};
+
+struct brw_state_prefetch
+{
+ struct header header;
+
+ struct
+ {
+ GLuint prefetch_count:3;
+ GLuint pad:3;
+ GLuint prefetch_pointer:26;
+ } bits0;
+};
+
+struct brw_system_instruction_pointer
+{
+ struct header header;
+
+ struct
+ {
+ GLuint pad:4;
+ GLuint system_instruction_pointer:28;
+ } bits0;
+};
+
+
+
+
+/* State structs for the various fixed function units:
+ */
+
+
+struct thread0
+{
+ GLuint pad0:1;
+ GLuint grf_reg_count:3;
+ GLuint pad1:2;
+ GLuint kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */
+};
+
+struct thread1
+{
+ GLuint ext_halt_exception_enable:1;
+ GLuint sw_exception_enable:1;
+ GLuint mask_stack_exception_enable:1;
+ GLuint timeout_exception_enable:1;
+ GLuint illegal_op_exception_enable:1;
+ GLuint pad0:3;
+ GLuint depth_coef_urb_read_offset:6; /* WM only */
+ GLuint pad1:2;
+ GLuint floating_point_mode:1;
+ GLuint thread_priority:1;
+ GLuint binding_table_entry_count:8;
+ GLuint pad3:5;
+ GLuint single_program_flow:1;
+};
+
+struct thread2
+{
+ GLuint per_thread_scratch_space:4;
+ GLuint pad0:6;
+ GLuint scratch_space_base_pointer:22;
+};
+
+
+struct thread3
+{
+ GLuint dispatch_grf_start_reg:4;
+ GLuint urb_entry_read_offset:6;
+ GLuint pad0:1;
+ GLuint urb_entry_read_length:6;
+ GLuint pad1:1;
+ GLuint const_urb_entry_read_offset:6;
+ GLuint pad2:1;
+ GLuint const_urb_entry_read_length:6;
+ GLuint pad3:1;
+};
+
+
+
+struct brw_clip_unit_state
+{
+ struct thread0 thread0;
+ struct
+ {
+ GLuint pad0:7;
+ GLuint sw_exception_enable:1;
+ GLuint pad1:3;
+ GLuint mask_stack_exception_enable:1;
+ GLuint pad2:1;
+ GLuint illegal_op_exception_enable:1;
+ GLuint pad3:2;
+ GLuint floating_point_mode:1;
+ GLuint thread_priority:1;
+ GLuint binding_table_entry_count:8;
+ GLuint pad4:5;
+ GLuint single_program_flow:1;
+ } thread1;
+
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ GLuint pad0:9;
+ GLuint gs_output_stats:1; /* not always */
+ GLuint stats_enable:1;
+ GLuint nr_urb_entries:7;
+ GLuint pad1:1;
+ GLuint urb_entry_allocation_size:5;
+ GLuint pad2:1;
+ GLuint max_threads:5; /* may be less */
+ GLuint pad3:2;
+ } thread4;
+
+ struct
+ {
+ GLuint pad0:13;
+ GLuint clip_mode:3;
+ GLuint userclip_enable_flags:8;
+ GLuint userclip_must_clip:1;
+ GLuint negative_w_clip_test:1;
+ GLuint guard_band_enable:1;
+ GLuint viewport_z_clip_enable:1;
+ GLuint viewport_xy_clip_enable:1;
+ GLuint vertex_position_space:1;
+ GLuint api_mode:1;
+ GLuint pad2:1;
+ } clip5;
+
+ struct
+ {
+ GLuint pad0:5;
+ GLuint clipper_viewport_state_ptr:27;
+ } clip6;
+
+
+ GLfloat viewport_xmin;
+ GLfloat viewport_xmax;
+ GLfloat viewport_ymin;
+ GLfloat viewport_ymax;
+};
+
+
+
+struct brw_cc_unit_state
+{
+ struct brw_cc0
+ {
+ GLuint pad0:3;
+ GLuint bf_stencil_pass_depth_pass_op:3;
+ GLuint bf_stencil_pass_depth_fail_op:3;
+ GLuint bf_stencil_fail_op:3;
+ GLuint bf_stencil_func:3;
+ GLuint bf_stencil_enable:1;
+ GLuint pad1:2;
+ GLuint stencil_write_enable:1;
+ GLuint stencil_pass_depth_pass_op:3;
+ GLuint stencil_pass_depth_fail_op:3;
+ GLuint stencil_fail_op:3;
+ GLuint stencil_func:3;
+ GLuint stencil_enable:1;
+ } cc0;
+
+
+ struct brw_cc1
+ {
+ GLuint bf_stencil_ref:8;
+ GLuint stencil_write_mask:8;
+ GLuint stencil_test_mask:8;
+ GLuint stencil_ref:8;
+ } cc1;
+
+
+ struct brw_cc2
+ {
+ GLuint logicop_enable:1;
+ GLuint pad0:10;
+ GLuint depth_write_enable:1;
+ GLuint depth_test_function:3;
+ GLuint depth_test:1;
+ GLuint bf_stencil_write_mask:8;
+ GLuint bf_stencil_test_mask:8;
+ } cc2;
+
+
+ struct brw_cc3
+ {
+ GLuint pad0:8;
+ GLuint alpha_test_func:3;
+ GLuint alpha_test:1;
+ GLuint blend_enable:1;
+ GLuint ia_blend_enable:1;
+ GLuint pad1:1;
+ GLuint alpha_test_format:1;
+ GLuint pad2:16;
+ } cc3;
+
+ struct brw_cc4
+ {
+ GLuint pad0:5;
+ GLuint cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
+ } cc4;
+
+ struct brw_cc5
+ {
+ GLuint pad0:2;
+ GLuint ia_dest_blend_factor:5;
+ GLuint ia_src_blend_factor:5;
+ GLuint ia_blend_function:3;
+ GLuint statistics_enable:1;
+ GLuint logicop_func:4;
+ GLuint pad1:11;
+ GLuint dither_enable:1;
+ } cc5;
+
+ struct brw_cc6
+ {
+ GLuint clamp_post_alpha_blend:1;
+ GLuint clamp_pre_alpha_blend:1;
+ GLuint clamp_range:2;
+ GLuint pad0:11;
+ GLuint y_dither_offset:2;
+ GLuint x_dither_offset:2;
+ GLuint dest_blend_factor:5;
+ GLuint src_blend_factor:5;
+ GLuint blend_function:3;
+ } cc6;
+
+ struct brw_cc7 {
+ union {
+ GLfloat f;
+ GLubyte ub[4];
+ } alpha_ref;
+ } cc7;
+};
+
+
+
+struct brw_sf_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ GLuint pad0:10;
+ GLuint stats_enable:1;
+ GLuint nr_urb_entries:7;
+ GLuint pad1:1;
+ GLuint urb_entry_allocation_size:5;
+ GLuint pad2:1;
+ GLuint max_threads:6;
+ GLuint pad3:1;
+ } thread4;
+
+ struct
+ {
+ GLuint front_winding:1;
+ GLuint viewport_transform:1;
+ GLuint pad0:3;
+ GLuint sf_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
+ } sf5;
+
+ struct
+ {
+ GLuint pad0:9;
+ GLuint dest_org_vbias:4;
+ GLuint dest_org_hbias:4;
+ GLuint scissor:1;
+ GLuint disable_2x2_trifilter:1;
+ GLuint disable_zero_pix_trifilter:1;
+ GLuint point_rast_rule:2;
+ GLuint line_endcap_aa_region_width:2;
+ GLuint line_width:4;
+ GLuint fast_scissor_disable:1;
+ GLuint cull_mode:2;
+ GLuint aa_enable:1;
+ } sf6;
+
+ struct
+ {
+ GLuint point_size:11;
+ GLuint use_point_size_state:1;
+ GLuint subpixel_precision:1;
+ GLuint sprite_point:1;
+ GLuint pad0:10;
+ GLuint aa_line_distance_mode:1;
+ GLuint trifan_pv:2;
+ GLuint linestrip_pv:2;
+ GLuint tristrip_pv:2;
+ GLuint line_last_pixel_enable:1;
+ } sf7;
+
+};
+
+
+struct brw_gs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ GLuint pad0:8;
+ GLuint rendering_enable:1; /* for IGDNG */
+ GLuint pad4:1;
+ GLuint stats_enable:1;
+ GLuint nr_urb_entries:7;
+ GLuint pad1:1;
+ GLuint urb_entry_allocation_size:5;
+ GLuint pad2:1;
+ GLuint max_threads:5;
+ GLuint pad3:2;
+ } thread4;
+
+ struct
+ {
+ GLuint sampler_count:3;
+ GLuint pad0:2;
+ GLuint sampler_state_pointer:27;
+ } gs5;
+
+
+ struct
+ {
+ GLuint max_vp_index:4;
+ GLuint pad0:12;
+ GLuint svbi_post_inc_value:10;
+ GLuint pad1:1;
+ GLuint svbi_post_inc_enable:1;
+ GLuint svbi_payload:1;
+ GLuint discard_adjaceny:1;
+ GLuint reorder_enable:1;
+ GLuint pad2:1;
+ } gs6;
+};
+
+
+struct brw_vs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ GLuint pad0:10;
+ GLuint stats_enable:1;
+ GLuint nr_urb_entries:7;
+ GLuint pad1:1;
+ GLuint urb_entry_allocation_size:5;
+ GLuint pad2:1;
+ GLuint max_threads:6;
+ GLuint pad3:1;
+ } thread4;
+
+ struct
+ {
+ GLuint sampler_count:3;
+ GLuint pad0:2;
+ GLuint sampler_state_pointer:27;
+ } vs5;
+
+ struct
+ {
+ GLuint vs_enable:1;
+ GLuint vert_cache_disable:1;
+ GLuint pad0:30;
+ } vs6;
+};
+
+
+struct brw_wm_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct {
+ GLuint stats_enable:1;
+ GLuint depth_buffer_clear:1;
+ GLuint sampler_count:3;
+ GLuint sampler_state_pointer:27;
+ } wm4;
+
+ struct
+ {
+ GLuint enable_8_pix:1;
+ GLuint enable_16_pix:1;
+ GLuint enable_32_pix:1;
+ GLuint enable_con_32_pix:1;
+ GLuint enable_con_64_pix:1;
+ GLuint pad0:5;
+ GLuint legacy_global_depth_bias:1;
+ GLuint line_stipple:1;
+ GLuint depth_offset:1;
+ GLuint polygon_stipple:1;
+ GLuint line_aa_region_width:2;
+ GLuint line_endcap_aa_region_width:2;
+ GLuint early_depth_test:1;
+ GLuint thread_dispatch_enable:1;
+ GLuint program_uses_depth:1;
+ GLuint program_computes_depth:1;
+ GLuint program_uses_killpixel:1;
+ GLuint legacy_line_rast: 1;
+ GLuint transposed_urb_read_enable:1;
+ GLuint max_threads:7;
+ } wm5;
+
+ GLfloat global_depth_offset_constant;
+ GLfloat global_depth_offset_scale;
+
+ /* for IGDNG only */
+ struct {
+ GLuint pad0:1;
+ GLuint grf_reg_count_1:3;
+ GLuint pad1:2;
+ GLuint kernel_start_pointer_1:26;
+ } wm8;
+
+ struct {
+ GLuint pad0:1;
+ GLuint grf_reg_count_2:3;
+ GLuint pad1:2;
+ GLuint kernel_start_pointer_2:26;
+ } wm9;
+
+ struct {
+ GLuint pad0:1;
+ GLuint grf_reg_count_3:3;
+ GLuint pad1:2;
+ GLuint kernel_start_pointer_3:26;
+ } wm10;
+};
+
+struct brw_sampler_default_color {
+ GLfloat color[4];
+};
+
+struct brw_sampler_state
+{
+
+ struct brw_ss0
+ {
+ GLuint shadow_function:3;
+ GLuint lod_bias:11;
+ GLuint min_filter:3;
+ GLuint mag_filter:3;
+ GLuint mip_filter:2;
+ GLuint base_level:5;
+ GLuint pad:1;
+ GLuint lod_preclamp:1;
+ GLuint default_color_mode:1;
+ GLuint pad0:1;
+ GLuint disable:1;
+ } ss0;
+
+ struct brw_ss1
+ {
+ GLuint r_wrap_mode:3;
+ GLuint t_wrap_mode:3;
+ GLuint s_wrap_mode:3;
+ GLuint pad:3;
+ GLuint max_lod:10;
+ GLuint min_lod:10;
+ } ss1;
+
+
+ struct brw_ss2
+ {
+ GLuint pad:5;
+ GLuint default_color_pointer:27;
+ } ss2;
+
+ struct brw_ss3
+ {
+ GLuint pad:19;
+ GLuint max_aniso:3;
+ GLuint chroma_key_mode:1;
+ GLuint chroma_key_index:2;
+ GLuint chroma_key_enable:1;
+ GLuint monochrome_filter_width:3;
+ GLuint monochrome_filter_height:3;
+ } ss3;
+};
+
+
+struct brw_clipper_viewport
+{
+ GLfloat xmin;
+ GLfloat xmax;
+ GLfloat ymin;
+ GLfloat ymax;
+};
+
+struct brw_cc_viewport
+{
+ GLfloat min_depth;
+ GLfloat max_depth;
+};
+
+struct brw_sf_viewport
+{
+ struct {
+ GLfloat m00;
+ GLfloat m11;
+ GLfloat m22;
+ GLfloat m30;
+ GLfloat m31;
+ GLfloat m32;
+ } viewport;
+
+ /* scissor coordinates are inclusive */
+ struct {
+ GLshort xmin;
+ GLshort ymin;
+ GLshort xmax;
+ GLshort ymax;
+ } scissor;
+};
+
+/* Documented in the subsystem/shared-functions/sampler chapter...
+ */
+struct brw_surface_state
+{
+ struct brw_surf_ss0 {
+ GLuint cube_pos_z:1;
+ GLuint cube_neg_z:1;
+ GLuint cube_pos_y:1;
+ GLuint cube_neg_y:1;
+ GLuint cube_pos_x:1;
+ GLuint cube_neg_x:1;
+ GLuint pad:4;
+ GLuint mipmap_layout_mode:1;
+ GLuint vert_line_stride_ofs:1;
+ GLuint vert_line_stride:1;
+ GLuint color_blend:1;
+ GLuint writedisable_blue:1;
+ GLuint writedisable_green:1;
+ GLuint writedisable_red:1;
+ GLuint writedisable_alpha:1;
+ GLuint surface_format:9; /**< BRW_SURFACEFORMAT_x */
+ GLuint data_return_format:1;
+ GLuint pad0:1;
+ GLuint surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */
+ } ss0;
+
+ struct brw_surf_ss1 {
+ GLuint base_addr;
+ } ss1;
+
+ struct brw_surf_ss2 {
+ GLuint pad:2;
+ GLuint mip_count:4;
+ GLuint width:13;
+ GLuint height:13;
+ } ss2;
+
+ struct brw_surf_ss3 {
+ GLuint tile_walk:1;
+ GLuint tiled_surface:1;
+ GLuint pad:1;
+ GLuint pitch:18;
+ GLuint depth:11;
+ } ss3;
+
+ struct brw_surf_ss4 {
+ GLuint multisample_position_palette_index:3;
+ GLuint pad1:1;
+ GLuint num_multisamples:3;
+ GLuint pad0:1;
+ GLuint render_target_view_extent:9;
+ GLuint min_array_elt:11;
+ GLuint min_lod:4;
+ } ss4;
+
+ struct brw_surf_ss5 {
+ GLuint pad1:16;
+ GLuint llc_mapping:1;
+ GLuint mlc_mapping:1;
+ GLuint gfdt:1;
+ GLuint gfdt_src:1;
+ GLuint y_offset:4;
+ GLuint pad0:1;
+ GLuint x_offset:7;
+ } ss5; /* New in G4X */
+
+};
+
+
+
+struct brw_vertex_buffer_state
+{
+ struct {
+ GLuint pitch:11;
+ GLuint pad:15;
+ GLuint access_type:1;
+ GLuint vb_index:5;
+ } vb0;
+
+ GLuint start_addr;
+ GLuint max_index;
+#if 1
+ GLuint instance_data_step_rate; /* not included for sequential/random vertices? */
+#endif
+};
+
+#define BRW_VBP_MAX 17
+
+struct brw_vb_array_state {
+ struct header header;
+ struct brw_vertex_buffer_state vb[BRW_VBP_MAX];
+};
+
+
+struct brw_vertex_element_state
+{
+ struct
+ {
+ GLuint src_offset:11;
+ GLuint pad:5;
+ GLuint src_format:9;
+ GLuint pad0:1;
+ GLuint valid:1;
+ GLuint vertex_buffer_index:5;
+ } ve0;
+
+ struct
+ {
+ GLuint dst_offset:8;
+ GLuint pad:8;
+ GLuint vfcomponent3:4;
+ GLuint vfcomponent2:4;
+ GLuint vfcomponent1:4;
+ GLuint vfcomponent0:4;
+ } ve1;
+};
+
+#define BRW_VEP_MAX 18
+
+struct brw_vertex_element_packet {
+ struct header header;
+ struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */
+};
+
+
+struct brw_urb_immediate {
+ GLuint opcode:4;
+ GLuint offset:6;
+ GLuint swizzle_control:2;
+ GLuint pad:1;
+ GLuint allocate:1;
+ GLuint used:1;
+ GLuint complete:1;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+};
+
+/* Instruction format for the execution units:
+ */
+
+struct brw_instruction
+{
+ struct
+ {
+ GLuint opcode:7;
+ GLuint pad:1;
+ GLuint access_mode:1;
+ GLuint mask_control:1;
+ GLuint dependency_control:2;
+ GLuint compression_control:2;
+ GLuint thread_control:2;
+ GLuint predicate_control:4;
+ GLuint predicate_inverse:1;
+ GLuint execution_size:3;
+ GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */
+ GLuint pad0:2;
+ GLuint debug_control:1;
+ GLuint saturate:1;
+ } header;
+
+ union {
+ struct
+ {
+ GLuint dest_reg_file:2;
+ GLuint dest_reg_type:3;
+ GLuint src0_reg_file:2;
+ GLuint src0_reg_type:3;
+ GLuint src1_reg_file:2;
+ GLuint src1_reg_type:3;
+ GLuint pad:1;
+ GLuint dest_subreg_nr:5;
+ GLuint dest_reg_nr:8;
+ GLuint dest_horiz_stride:2;
+ GLuint dest_address_mode:1;
+ } da1;
+
+ struct
+ {
+ GLuint dest_reg_file:2;
+ GLuint dest_reg_type:3;
+ GLuint src0_reg_file:2;
+ GLuint src0_reg_type:3;
+ GLuint src1_reg_file:2; /* 0x00000c00 */
+ GLuint src1_reg_type:3; /* 0x00007000 */
+ GLuint pad:1;
+ GLint dest_indirect_offset:10; /* offset against the deref'd address reg */
+ GLuint dest_subreg_nr:3; /* subnr for the address reg a0.x */
+ GLuint dest_horiz_stride:2;
+ GLuint dest_address_mode:1;
+ } ia1;
+
+ struct
+ {
+ GLuint dest_reg_file:2;
+ GLuint dest_reg_type:3;
+ GLuint src0_reg_file:2;
+ GLuint src0_reg_type:3;
+ GLuint src1_reg_file:2;
+ GLuint src1_reg_type:3;
+ GLuint pad:1;
+ GLuint dest_writemask:4;
+ GLuint dest_subreg_nr:1;
+ GLuint dest_reg_nr:8;
+ GLuint pad1:2;
+ GLuint dest_address_mode:1;
+ } da16;
+
+ struct
+ {
+ GLuint dest_reg_file:2;
+ GLuint dest_reg_type:3;
+ GLuint src0_reg_file:2;
+ GLuint src0_reg_type:3;
+ GLuint pad0:6;
+ GLuint dest_writemask:4;
+ GLint dest_indirect_offset:6;
+ GLuint dest_subreg_nr:3;
+ GLuint pad1:2;
+ GLuint dest_address_mode:1;
+ } ia16;
+ } bits1;
+
+
+ union {
+ struct
+ {
+ GLuint src0_subreg_nr:5;
+ GLuint src0_reg_nr:8;
+ GLuint src0_abs:1;
+ GLuint src0_negate:1;
+ GLuint src0_address_mode:1;
+ GLuint src0_horiz_stride:2;
+ GLuint src0_width:3;
+ GLuint src0_vert_stride:4;
+ GLuint flag_reg_nr:1;
+ GLuint pad:6;
+ } da1;
+
+ struct
+ {
+ GLint src0_indirect_offset:10;
+ GLuint src0_subreg_nr:3;
+ GLuint src0_abs:1;
+ GLuint src0_negate:1;
+ GLuint src0_address_mode:1;
+ GLuint src0_horiz_stride:2;
+ GLuint src0_width:3;
+ GLuint src0_vert_stride:4;
+ GLuint flag_reg_nr:1;
+ GLuint pad:6;
+ } ia1;
+
+ struct
+ {
+ GLuint src0_swz_x:2;
+ GLuint src0_swz_y:2;
+ GLuint src0_subreg_nr:1;
+ GLuint src0_reg_nr:8;
+ GLuint src0_abs:1;
+ GLuint src0_negate:1;
+ GLuint src0_address_mode:1;
+ GLuint src0_swz_z:2;
+ GLuint src0_swz_w:2;
+ GLuint pad0:1;
+ GLuint src0_vert_stride:4;
+ GLuint flag_reg_nr:1;
+ GLuint pad1:6;
+ } da16;
+
+ struct
+ {
+ GLuint src0_swz_x:2;
+ GLuint src0_swz_y:2;
+ GLint src0_indirect_offset:6;
+ GLuint src0_subreg_nr:3;
+ GLuint src0_abs:1;
+ GLuint src0_negate:1;
+ GLuint src0_address_mode:1;
+ GLuint src0_swz_z:2;
+ GLuint src0_swz_w:2;
+ GLuint pad0:1;
+ GLuint src0_vert_stride:4;
+ GLuint flag_reg_nr:1;
+ GLuint pad1:6;
+ } ia16;
+
+ struct
+ {
+ GLuint pad:26;
+ GLuint end_of_thread:1;
+ GLuint pad1:1;
+ GLuint sfid:4;
+ } send_igdng; /* for IGDNG only */
+
+ } bits2;
+
+ union
+ {
+ struct
+ {
+ GLuint src1_subreg_nr:5;
+ GLuint src1_reg_nr:8;
+ GLuint src1_abs:1;
+ GLuint src1_negate:1;
+ GLuint src1_address_mode:1;
+ GLuint src1_horiz_stride:2;
+ GLuint src1_width:3;
+ GLuint src1_vert_stride:4;
+ GLuint pad0:7;
+ } da1;
+
+ struct
+ {
+ GLuint src1_swz_x:2;
+ GLuint src1_swz_y:2;
+ GLuint src1_subreg_nr:1;
+ GLuint src1_reg_nr:8;
+ GLuint src1_abs:1;
+ GLuint src1_negate:1;
+ GLuint src1_address_mode:1;
+ GLuint src1_swz_z:2;
+ GLuint src1_swz_w:2;
+ GLuint pad1:1;
+ GLuint src1_vert_stride:4;
+ GLuint pad2:7;
+ } da16;
+
+ struct
+ {
+ GLint src1_indirect_offset:10;
+ GLuint src1_subreg_nr:3;
+ GLuint src1_abs:1;
+ GLuint src1_negate:1;
+ GLuint src1_address_mode:1;
+ GLuint src1_horiz_stride:2;
+ GLuint src1_width:3;
+ GLuint src1_vert_stride:4;
+ GLuint flag_reg_nr:1;
+ GLuint pad1:6;
+ } ia1;
+
+ struct
+ {
+ GLuint src1_swz_x:2;
+ GLuint src1_swz_y:2;
+ GLint src1_indirect_offset:6;
+ GLuint src1_subreg_nr:3;
+ GLuint src1_abs:1;
+ GLuint src1_negate:1;
+ GLuint pad0:1;
+ GLuint src1_swz_z:2;
+ GLuint src1_swz_w:2;
+ GLuint pad1:1;
+ GLuint src1_vert_stride:4;
+ GLuint flag_reg_nr:1;
+ GLuint pad2:6;
+ } ia16;
+
+
+ struct
+ {
+ GLint jump_count:16; /* note: signed */
+ GLuint pop_count:4;
+ GLuint pad0:12;
+ } if_else;
+
+ struct {
+ GLuint function:4;
+ GLuint int_type:1;
+ GLuint precision:1;
+ GLuint saturate:1;
+ GLuint data_type:1;
+ GLuint pad0:8;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } math;
+
+ struct {
+ GLuint function:4;
+ GLuint int_type:1;
+ GLuint precision:1;
+ GLuint saturate:1;
+ GLuint data_type:1;
+ GLuint snapshot:1;
+ GLuint pad0:10;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } math_igdng;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint sampler:4;
+ GLuint return_format:2;
+ GLuint msg_type:2;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } sampler;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint sampler:4;
+ GLuint msg_type:4;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } sampler_g4x;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint sampler:4;
+ GLuint msg_type:4;
+ GLuint simd_mode:2;
+ GLuint pad0:1;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } sampler_igdng;
+
+ struct brw_urb_immediate urb;
+
+ struct {
+ GLuint opcode:4;
+ GLuint offset:6;
+ GLuint swizzle_control:2;
+ GLuint pad:1;
+ GLuint allocate:1;
+ GLuint used:1;
+ GLuint complete:1;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } urb_igdng;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:4;
+ GLuint msg_type:2;
+ GLuint target_cache:2;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } dp_read;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint msg_type:3;
+ GLuint target_cache:2;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_read_igdng;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint pixel_scoreboard_clear:1;
+ GLuint msg_type:3;
+ GLuint send_commit_msg:1;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } dp_write;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint pixel_scoreboard_clear:1;
+ GLuint msg_type:3;
+ GLuint send_commit_msg:1;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_write_igdng;
+
+ struct {
+ GLuint pad:16;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } generic;
+
+ struct {
+ GLuint pad:19;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } generic_igdng;
+
+ GLint d;
+ GLuint ud;
+ float f;
+ } bits3;
+};
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_structs_dump.c b/src/gallium/drivers/i965/brw_structs_dump.c
new file mode 100644
index 00000000000..cd40fc6d618
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_structs_dump.c
@@ -0,0 +1,1247 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Dump i965 data structures.
+ *
+ * Generated automatically from brw_structs.h by brw_structs_dump.py.
+ */
+
+#include "util/u_debug.h"
+
+#include "brw_types.h"
+#include "brw_structs.h"
+#include "brw_structs_dump.h"
+
+void
+brw_dump_3d_control(const struct brw_3d_control *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.notify_enable = 0x%x\n", (*ptr).header.notify_enable);
+ debug_printf("\t\t.header.wc_flush_enable = 0x%x\n", (*ptr).header.wc_flush_enable);
+ debug_printf("\t\t.header.depth_stall_enable = 0x%x\n", (*ptr).header.depth_stall_enable);
+ debug_printf("\t\t.header.operation = 0x%x\n", (*ptr).header.operation);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.dest.dest_addr_type = 0x%x\n", (*ptr).dest.dest_addr_type);
+ debug_printf("\t\t.dest.dest_addr = 0x%x\n", (*ptr).dest.dest_addr);
+ debug_printf("\t\t.dword2 = 0x%x\n", (*ptr).dword2);
+ debug_printf("\t\t.dword3 = 0x%x\n", (*ptr).dword3);
+}
+
+void
+brw_dump_3d_primitive(const struct brw_3d_primitive *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.topology = 0x%x\n", (*ptr).header.topology);
+ debug_printf("\t\t.header.indexed = 0x%x\n", (*ptr).header.indexed);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.verts_per_instance = 0x%x\n", (*ptr).verts_per_instance);
+ debug_printf("\t\t.start_vert_location = 0x%x\n", (*ptr).start_vert_location);
+ debug_printf("\t\t.instance_count = 0x%x\n", (*ptr).instance_count);
+ debug_printf("\t\t.start_instance_location = 0x%x\n", (*ptr).start_instance_location);
+ debug_printf("\t\t.base_vert_location = 0x%x\n", (*ptr).base_vert_location);
+}
+
+void
+brw_dump_aa_line_parameters(const struct brw_aa_line_parameters *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits0.aa_coverage_scope = 0x%x\n", (*ptr).bits0.aa_coverage_scope);
+ debug_printf("\t\t.bits0.aa_coverage_bias = 0x%x\n", (*ptr).bits0.aa_coverage_bias);
+ debug_printf("\t\t.bits1.aa_coverage_endcap_slope = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_slope);
+ debug_printf("\t\t.bits1.aa_coverage_endcap_bias = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_bias);
+}
+
+void
+brw_dump_binding_table_pointers(const struct brw_binding_table_pointers *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.vs = 0x%x\n", (*ptr).vs);
+ debug_printf("\t\t.gs = 0x%x\n", (*ptr).gs);
+ debug_printf("\t\t.clp = 0x%x\n", (*ptr).clp);
+ debug_printf("\t\t.sf = 0x%x\n", (*ptr).sf);
+ debug_printf("\t\t.wm = 0x%x\n", (*ptr).wm);
+}
+
+void
+brw_dump_blend_constant_color(const struct brw_blend_constant_color *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.blend_constant_color[0] = %f\n", (*ptr).blend_constant_color[0]);
+ debug_printf("\t\t.blend_constant_color[1] = %f\n", (*ptr).blend_constant_color[1]);
+ debug_printf("\t\t.blend_constant_color[2] = %f\n", (*ptr).blend_constant_color[2]);
+ debug_printf("\t\t.blend_constant_color[3] = %f\n", (*ptr).blend_constant_color[3]);
+}
+
+void
+brw_dump_cc0(const struct brw_cc0 *ptr)
+{
+ debug_printf("\t\t.bf_stencil_pass_depth_pass_op = 0x%x\n", (*ptr).bf_stencil_pass_depth_pass_op);
+ debug_printf("\t\t.bf_stencil_pass_depth_fail_op = 0x%x\n", (*ptr).bf_stencil_pass_depth_fail_op);
+ debug_printf("\t\t.bf_stencil_fail_op = 0x%x\n", (*ptr).bf_stencil_fail_op);
+ debug_printf("\t\t.bf_stencil_func = 0x%x\n", (*ptr).bf_stencil_func);
+ debug_printf("\t\t.bf_stencil_enable = 0x%x\n", (*ptr).bf_stencil_enable);
+ debug_printf("\t\t.stencil_write_enable = 0x%x\n", (*ptr).stencil_write_enable);
+ debug_printf("\t\t.stencil_pass_depth_pass_op = 0x%x\n", (*ptr).stencil_pass_depth_pass_op);
+ debug_printf("\t\t.stencil_pass_depth_fail_op = 0x%x\n", (*ptr).stencil_pass_depth_fail_op);
+ debug_printf("\t\t.stencil_fail_op = 0x%x\n", (*ptr).stencil_fail_op);
+ debug_printf("\t\t.stencil_func = 0x%x\n", (*ptr).stencil_func);
+ debug_printf("\t\t.stencil_enable = 0x%x\n", (*ptr).stencil_enable);
+}
+
+void
+brw_dump_cc1(const struct brw_cc1 *ptr)
+{
+ debug_printf("\t\t.bf_stencil_ref = 0x%x\n", (*ptr).bf_stencil_ref);
+ debug_printf("\t\t.stencil_write_mask = 0x%x\n", (*ptr).stencil_write_mask);
+ debug_printf("\t\t.stencil_test_mask = 0x%x\n", (*ptr).stencil_test_mask);
+ debug_printf("\t\t.stencil_ref = 0x%x\n", (*ptr).stencil_ref);
+}
+
+void
+brw_dump_cc2(const struct brw_cc2 *ptr)
+{
+ debug_printf("\t\t.logicop_enable = 0x%x\n", (*ptr).logicop_enable);
+ debug_printf("\t\t.depth_write_enable = 0x%x\n", (*ptr).depth_write_enable);
+ debug_printf("\t\t.depth_test_function = 0x%x\n", (*ptr).depth_test_function);
+ debug_printf("\t\t.depth_test = 0x%x\n", (*ptr).depth_test);
+ debug_printf("\t\t.bf_stencil_write_mask = 0x%x\n", (*ptr).bf_stencil_write_mask);
+ debug_printf("\t\t.bf_stencil_test_mask = 0x%x\n", (*ptr).bf_stencil_test_mask);
+}
+
+void
+brw_dump_cc3(const struct brw_cc3 *ptr)
+{
+ debug_printf("\t\t.alpha_test_func = 0x%x\n", (*ptr).alpha_test_func);
+ debug_printf("\t\t.alpha_test = 0x%x\n", (*ptr).alpha_test);
+ debug_printf("\t\t.blend_enable = 0x%x\n", (*ptr).blend_enable);
+ debug_printf("\t\t.ia_blend_enable = 0x%x\n", (*ptr).ia_blend_enable);
+ debug_printf("\t\t.alpha_test_format = 0x%x\n", (*ptr).alpha_test_format);
+}
+
+void
+brw_dump_cc4(const struct brw_cc4 *ptr)
+{
+ debug_printf("\t\t.cc_viewport_state_offset = 0x%x\n", (*ptr).cc_viewport_state_offset);
+}
+
+void
+brw_dump_cc5(const struct brw_cc5 *ptr)
+{
+ debug_printf("\t\t.ia_dest_blend_factor = 0x%x\n", (*ptr).ia_dest_blend_factor);
+ debug_printf("\t\t.ia_src_blend_factor = 0x%x\n", (*ptr).ia_src_blend_factor);
+ debug_printf("\t\t.ia_blend_function = 0x%x\n", (*ptr).ia_blend_function);
+ debug_printf("\t\t.statistics_enable = 0x%x\n", (*ptr).statistics_enable);
+ debug_printf("\t\t.logicop_func = 0x%x\n", (*ptr).logicop_func);
+ debug_printf("\t\t.dither_enable = 0x%x\n", (*ptr).dither_enable);
+}
+
+void
+brw_dump_cc6(const struct brw_cc6 *ptr)
+{
+ debug_printf("\t\t.clamp_post_alpha_blend = 0x%x\n", (*ptr).clamp_post_alpha_blend);
+ debug_printf("\t\t.clamp_pre_alpha_blend = 0x%x\n", (*ptr).clamp_pre_alpha_blend);
+ debug_printf("\t\t.clamp_range = 0x%x\n", (*ptr).clamp_range);
+ debug_printf("\t\t.y_dither_offset = 0x%x\n", (*ptr).y_dither_offset);
+ debug_printf("\t\t.x_dither_offset = 0x%x\n", (*ptr).x_dither_offset);
+ debug_printf("\t\t.dest_blend_factor = 0x%x\n", (*ptr).dest_blend_factor);
+ debug_printf("\t\t.src_blend_factor = 0x%x\n", (*ptr).src_blend_factor);
+ debug_printf("\t\t.blend_function = 0x%x\n", (*ptr).blend_function);
+}
+
+void
+brw_dump_cc7(const struct brw_cc7 *ptr)
+{
+ debug_printf("\t\t.alpha_ref.f = %f\n", (*ptr).alpha_ref.f);
+ debug_printf("\t\t.alpha_ref.ub[0] = 0x%x\n", (*ptr).alpha_ref.ub[0]);
+ debug_printf("\t\t.alpha_ref.ub[1] = 0x%x\n", (*ptr).alpha_ref.ub[1]);
+ debug_printf("\t\t.alpha_ref.ub[2] = 0x%x\n", (*ptr).alpha_ref.ub[2]);
+ debug_printf("\t\t.alpha_ref.ub[3] = 0x%x\n", (*ptr).alpha_ref.ub[3]);
+}
+
+void
+brw_dump_cc_unit_state(const struct brw_cc_unit_state *ptr)
+{
+ debug_printf("\t\t.cc0.bf_stencil_pass_depth_pass_op = 0x%x\n", (*ptr).cc0.bf_stencil_pass_depth_pass_op);
+ debug_printf("\t\t.cc0.bf_stencil_pass_depth_fail_op = 0x%x\n", (*ptr).cc0.bf_stencil_pass_depth_fail_op);
+ debug_printf("\t\t.cc0.bf_stencil_fail_op = 0x%x\n", (*ptr).cc0.bf_stencil_fail_op);
+ debug_printf("\t\t.cc0.bf_stencil_func = 0x%x\n", (*ptr).cc0.bf_stencil_func);
+ debug_printf("\t\t.cc0.bf_stencil_enable = 0x%x\n", (*ptr).cc0.bf_stencil_enable);
+ debug_printf("\t\t.cc0.stencil_write_enable = 0x%x\n", (*ptr).cc0.stencil_write_enable);
+ debug_printf("\t\t.cc0.stencil_pass_depth_pass_op = 0x%x\n", (*ptr).cc0.stencil_pass_depth_pass_op);
+ debug_printf("\t\t.cc0.stencil_pass_depth_fail_op = 0x%x\n", (*ptr).cc0.stencil_pass_depth_fail_op);
+ debug_printf("\t\t.cc0.stencil_fail_op = 0x%x\n", (*ptr).cc0.stencil_fail_op);
+ debug_printf("\t\t.cc0.stencil_func = 0x%x\n", (*ptr).cc0.stencil_func);
+ debug_printf("\t\t.cc0.stencil_enable = 0x%x\n", (*ptr).cc0.stencil_enable);
+ debug_printf("\t\t.cc1.bf_stencil_ref = 0x%x\n", (*ptr).cc1.bf_stencil_ref);
+ debug_printf("\t\t.cc1.stencil_write_mask = 0x%x\n", (*ptr).cc1.stencil_write_mask);
+ debug_printf("\t\t.cc1.stencil_test_mask = 0x%x\n", (*ptr).cc1.stencil_test_mask);
+ debug_printf("\t\t.cc1.stencil_ref = 0x%x\n", (*ptr).cc1.stencil_ref);
+ debug_printf("\t\t.cc2.logicop_enable = 0x%x\n", (*ptr).cc2.logicop_enable);
+ debug_printf("\t\t.cc2.depth_write_enable = 0x%x\n", (*ptr).cc2.depth_write_enable);
+ debug_printf("\t\t.cc2.depth_test_function = 0x%x\n", (*ptr).cc2.depth_test_function);
+ debug_printf("\t\t.cc2.depth_test = 0x%x\n", (*ptr).cc2.depth_test);
+ debug_printf("\t\t.cc2.bf_stencil_write_mask = 0x%x\n", (*ptr).cc2.bf_stencil_write_mask);
+ debug_printf("\t\t.cc2.bf_stencil_test_mask = 0x%x\n", (*ptr).cc2.bf_stencil_test_mask);
+ debug_printf("\t\t.cc3.alpha_test_func = 0x%x\n", (*ptr).cc3.alpha_test_func);
+ debug_printf("\t\t.cc3.alpha_test = 0x%x\n", (*ptr).cc3.alpha_test);
+ debug_printf("\t\t.cc3.blend_enable = 0x%x\n", (*ptr).cc3.blend_enable);
+ debug_printf("\t\t.cc3.ia_blend_enable = 0x%x\n", (*ptr).cc3.ia_blend_enable);
+ debug_printf("\t\t.cc3.alpha_test_format = 0x%x\n", (*ptr).cc3.alpha_test_format);
+ debug_printf("\t\t.cc4.cc_viewport_state_offset = 0x%x\n", (*ptr).cc4.cc_viewport_state_offset);
+ debug_printf("\t\t.cc5.ia_dest_blend_factor = 0x%x\n", (*ptr).cc5.ia_dest_blend_factor);
+ debug_printf("\t\t.cc5.ia_src_blend_factor = 0x%x\n", (*ptr).cc5.ia_src_blend_factor);
+ debug_printf("\t\t.cc5.ia_blend_function = 0x%x\n", (*ptr).cc5.ia_blend_function);
+ debug_printf("\t\t.cc5.statistics_enable = 0x%x\n", (*ptr).cc5.statistics_enable);
+ debug_printf("\t\t.cc5.logicop_func = 0x%x\n", (*ptr).cc5.logicop_func);
+ debug_printf("\t\t.cc5.dither_enable = 0x%x\n", (*ptr).cc5.dither_enable);
+ debug_printf("\t\t.cc6.clamp_post_alpha_blend = 0x%x\n", (*ptr).cc6.clamp_post_alpha_blend);
+ debug_printf("\t\t.cc6.clamp_pre_alpha_blend = 0x%x\n", (*ptr).cc6.clamp_pre_alpha_blend);
+ debug_printf("\t\t.cc6.clamp_range = 0x%x\n", (*ptr).cc6.clamp_range);
+ debug_printf("\t\t.cc6.y_dither_offset = 0x%x\n", (*ptr).cc6.y_dither_offset);
+ debug_printf("\t\t.cc6.x_dither_offset = 0x%x\n", (*ptr).cc6.x_dither_offset);
+ debug_printf("\t\t.cc6.dest_blend_factor = 0x%x\n", (*ptr).cc6.dest_blend_factor);
+ debug_printf("\t\t.cc6.src_blend_factor = 0x%x\n", (*ptr).cc6.src_blend_factor);
+ debug_printf("\t\t.cc6.blend_function = 0x%x\n", (*ptr).cc6.blend_function);
+ debug_printf("\t\t.cc7.alpha_ref.f = %f\n", (*ptr).cc7.alpha_ref.f);
+ debug_printf("\t\t.cc7.alpha_ref.ub[0] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[0]);
+ debug_printf("\t\t.cc7.alpha_ref.ub[1] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[1]);
+ debug_printf("\t\t.cc7.alpha_ref.ub[2] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[2]);
+ debug_printf("\t\t.cc7.alpha_ref.ub[3] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[3]);
+}
+
+void
+brw_dump_cc_viewport(const struct brw_cc_viewport *ptr)
+{
+ debug_printf("\t\t.min_depth = %f\n", (*ptr).min_depth);
+ debug_printf("\t\t.max_depth = %f\n", (*ptr).max_depth);
+}
+
+void
+brw_dump_clip_unit_state(const struct brw_clip_unit_state *ptr)
+{
+ debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
+ debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
+ debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
+ debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
+ debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
+ debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
+ debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
+ debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
+ debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
+ debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
+ debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
+ debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
+ debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
+ debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
+ debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
+ debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
+ debug_printf("\t\t.thread4.gs_output_stats = 0x%x\n", (*ptr).thread4.gs_output_stats);
+ debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable);
+ debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries);
+ debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size);
+ debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads);
+ debug_printf("\t\t.clip5.clip_mode = 0x%x\n", (*ptr).clip5.clip_mode);
+ debug_printf("\t\t.clip5.userclip_enable_flags = 0x%x\n", (*ptr).clip5.userclip_enable_flags);
+ debug_printf("\t\t.clip5.userclip_must_clip = 0x%x\n", (*ptr).clip5.userclip_must_clip);
+ debug_printf("\t\t.clip5.negative_w_clip_test = 0x%x\n", (*ptr).clip5.negative_w_clip_test);
+ debug_printf("\t\t.clip5.guard_band_enable = 0x%x\n", (*ptr).clip5.guard_band_enable);
+ debug_printf("\t\t.clip5.viewport_z_clip_enable = 0x%x\n", (*ptr).clip5.viewport_z_clip_enable);
+ debug_printf("\t\t.clip5.viewport_xy_clip_enable = 0x%x\n", (*ptr).clip5.viewport_xy_clip_enable);
+ debug_printf("\t\t.clip5.vertex_position_space = 0x%x\n", (*ptr).clip5.vertex_position_space);
+ debug_printf("\t\t.clip5.api_mode = 0x%x\n", (*ptr).clip5.api_mode);
+ debug_printf("\t\t.clip6.clipper_viewport_state_ptr = 0x%x\n", (*ptr).clip6.clipper_viewport_state_ptr);
+ debug_printf("\t\t.viewport_xmin = %f\n", (*ptr).viewport_xmin);
+ debug_printf("\t\t.viewport_xmax = %f\n", (*ptr).viewport_xmax);
+ debug_printf("\t\t.viewport_ymin = %f\n", (*ptr).viewport_ymin);
+ debug_printf("\t\t.viewport_ymax = %f\n", (*ptr).viewport_ymax);
+}
+
+void
+brw_dump_clipper_viewport(const struct brw_clipper_viewport *ptr)
+{
+ debug_printf("\t\t.xmin = %f\n", (*ptr).xmin);
+ debug_printf("\t\t.xmax = %f\n", (*ptr).xmax);
+ debug_printf("\t\t.ymin = %f\n", (*ptr).ymin);
+ debug_printf("\t\t.ymax = %f\n", (*ptr).ymax);
+}
+
+void
+brw_dump_constant_buffer(const struct brw_constant_buffer *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.valid = 0x%x\n", (*ptr).header.valid);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits0.buffer_length = 0x%x\n", (*ptr).bits0.buffer_length);
+ debug_printf("\t\t.bits0.buffer_address = 0x%x\n", (*ptr).bits0.buffer_address);
+}
+
+void
+brw_dump_cs_urb_state(const struct brw_cs_urb_state *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits0.nr_urb_entries = 0x%x\n", (*ptr).bits0.nr_urb_entries);
+ debug_printf("\t\t.bits0.urb_entry_size = 0x%x\n", (*ptr).bits0.urb_entry_size);
+}
+
+void
+brw_dump_depthbuffer(const struct brw_depthbuffer *ptr)
+{
+ debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length);
+ debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode);
+ debug_printf("\t\t.dword1.bits.pitch = 0x%x\n", (*ptr).dword1.bits.pitch);
+ debug_printf("\t\t.dword1.bits.format = 0x%x\n", (*ptr).dword1.bits.format);
+ debug_printf("\t\t.dword1.bits.software_tiled_rendering_mode = 0x%x\n", (*ptr).dword1.bits.software_tiled_rendering_mode);
+ debug_printf("\t\t.dword1.bits.depth_offset_disable = 0x%x\n", (*ptr).dword1.bits.depth_offset_disable);
+ debug_printf("\t\t.dword1.bits.tile_walk = 0x%x\n", (*ptr).dword1.bits.tile_walk);
+ debug_printf("\t\t.dword1.bits.tiled_surface = 0x%x\n", (*ptr).dword1.bits.tiled_surface);
+ debug_printf("\t\t.dword1.bits.surface_type = 0x%x\n", (*ptr).dword1.bits.surface_type);
+ debug_printf("\t\t.dword2_base_addr = 0x%x\n", (*ptr).dword2_base_addr);
+ debug_printf("\t\t.dword3.bits.mipmap_layout = 0x%x\n", (*ptr).dword3.bits.mipmap_layout);
+ debug_printf("\t\t.dword3.bits.lod = 0x%x\n", (*ptr).dword3.bits.lod);
+ debug_printf("\t\t.dword3.bits.width = 0x%x\n", (*ptr).dword3.bits.width);
+ debug_printf("\t\t.dword3.bits.height = 0x%x\n", (*ptr).dword3.bits.height);
+ debug_printf("\t\t.dword4.bits.min_array_element = 0x%x\n", (*ptr).dword4.bits.min_array_element);
+ debug_printf("\t\t.dword4.bits.depth = 0x%x\n", (*ptr).dword4.bits.depth);
+}
+
+void
+brw_dump_depthbuffer_g4x(const struct brw_depthbuffer_g4x *ptr)
+{
+ debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length);
+ debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode);
+ debug_printf("\t\t.dword1.bits.pitch = 0x%x\n", (*ptr).dword1.bits.pitch);
+ debug_printf("\t\t.dword1.bits.format = 0x%x\n", (*ptr).dword1.bits.format);
+ debug_printf("\t\t.dword1.bits.software_tiled_rendering_mode = 0x%x\n", (*ptr).dword1.bits.software_tiled_rendering_mode);
+ debug_printf("\t\t.dword1.bits.depth_offset_disable = 0x%x\n", (*ptr).dword1.bits.depth_offset_disable);
+ debug_printf("\t\t.dword1.bits.tile_walk = 0x%x\n", (*ptr).dword1.bits.tile_walk);
+ debug_printf("\t\t.dword1.bits.tiled_surface = 0x%x\n", (*ptr).dword1.bits.tiled_surface);
+ debug_printf("\t\t.dword1.bits.surface_type = 0x%x\n", (*ptr).dword1.bits.surface_type);
+ debug_printf("\t\t.dword2_base_addr = 0x%x\n", (*ptr).dword2_base_addr);
+ debug_printf("\t\t.dword3.bits.mipmap_layout = 0x%x\n", (*ptr).dword3.bits.mipmap_layout);
+ debug_printf("\t\t.dword3.bits.lod = 0x%x\n", (*ptr).dword3.bits.lod);
+ debug_printf("\t\t.dword3.bits.width = 0x%x\n", (*ptr).dword3.bits.width);
+ debug_printf("\t\t.dword3.bits.height = 0x%x\n", (*ptr).dword3.bits.height);
+ debug_printf("\t\t.dword4.bits.min_array_element = 0x%x\n", (*ptr).dword4.bits.min_array_element);
+ debug_printf("\t\t.dword4.bits.depth = 0x%x\n", (*ptr).dword4.bits.depth);
+ debug_printf("\t\t.dword5.bits.xoffset = 0x%x\n", (*ptr).dword5.bits.xoffset);
+ debug_printf("\t\t.dword5.bits.yoffset = 0x%x\n", (*ptr).dword5.bits.yoffset);
+}
+
+void
+brw_dump_drawrect(const struct brw_drawrect *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.xmin = 0x%x\n", (*ptr).xmin);
+ debug_printf("\t\t.ymin = 0x%x\n", (*ptr).ymin);
+ debug_printf("\t\t.xmax = 0x%x\n", (*ptr).xmax);
+ debug_printf("\t\t.ymax = 0x%x\n", (*ptr).ymax);
+ debug_printf("\t\t.xorg = 0x%x\n", (*ptr).xorg);
+ debug_printf("\t\t.yorg = 0x%x\n", (*ptr).yorg);
+}
+
+void
+brw_dump_global_depth_offset_clamp(const struct brw_global_depth_offset_clamp *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.depth_offset_clamp = %f\n", (*ptr).depth_offset_clamp);
+}
+
+void
+brw_dump_gs_unit_state(const struct brw_gs_unit_state *ptr)
+{
+ debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
+ debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
+ debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable);
+ debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
+ debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
+ debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable);
+ debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
+ debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset);
+ debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
+ debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
+ debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
+ debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
+ debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
+ debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
+ debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
+ debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
+ debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
+ debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
+ debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
+ debug_printf("\t\t.thread4.rendering_enable = 0x%x\n", (*ptr).thread4.rendering_enable);
+ debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable);
+ debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries);
+ debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size);
+ debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads);
+ debug_printf("\t\t.gs5.sampler_count = 0x%x\n", (*ptr).gs5.sampler_count);
+ debug_printf("\t\t.gs5.sampler_state_pointer = 0x%x\n", (*ptr).gs5.sampler_state_pointer);
+ debug_printf("\t\t.gs6.max_vp_index = 0x%x\n", (*ptr).gs6.max_vp_index);
+ debug_printf("\t\t.gs6.svbi_post_inc_value = 0x%x\n", (*ptr).gs6.svbi_post_inc_value);
+ debug_printf("\t\t.gs6.svbi_post_inc_enable = 0x%x\n", (*ptr).gs6.svbi_post_inc_enable);
+ debug_printf("\t\t.gs6.svbi_payload = 0x%x\n", (*ptr).gs6.svbi_payload);
+ debug_printf("\t\t.gs6.discard_adjaceny = 0x%x\n", (*ptr).gs6.discard_adjaceny);
+ debug_printf("\t\t.gs6.reorder_enable = 0x%x\n", (*ptr).gs6.reorder_enable);
+}
+
+void
+brw_dump_indexbuffer(const struct brw_indexbuffer *ptr)
+{
+ debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length);
+ debug_printf("\t\t.header.bits.index_format = 0x%x\n", (*ptr).header.bits.index_format);
+ debug_printf("\t\t.header.bits.cut_index_enable = 0x%x\n", (*ptr).header.bits.cut_index_enable);
+ debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode);
+ debug_printf("\t\t.buffer_start = 0x%x\n", (*ptr).buffer_start);
+ debug_printf("\t\t.buffer_end = 0x%x\n", (*ptr).buffer_end);
+}
+
+void
+brw_dump_line_stipple(const struct brw_line_stipple *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits0.pattern = 0x%x\n", (*ptr).bits0.pattern);
+ debug_printf("\t\t.bits1.repeat_count = 0x%x\n", (*ptr).bits1.repeat_count);
+ debug_printf("\t\t.bits1.inverse_repeat_count = 0x%x\n", (*ptr).bits1.inverse_repeat_count);
+}
+
+void
+brw_dump_mi_flush(const struct brw_mi_flush *ptr)
+{
+ debug_printf("\t\t.flags = 0x%x\n", (*ptr).flags);
+ debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode);
+}
+
+void
+brw_dump_pipe_control(const struct brw_pipe_control *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.notify_enable = 0x%x\n", (*ptr).header.notify_enable);
+ debug_printf("\t\t.header.texture_cache_flush_enable = 0x%x\n", (*ptr).header.texture_cache_flush_enable);
+ debug_printf("\t\t.header.indirect_state_pointers_disable = 0x%x\n", (*ptr).header.indirect_state_pointers_disable);
+ debug_printf("\t\t.header.instruction_state_cache_flush_enable = 0x%x\n", (*ptr).header.instruction_state_cache_flush_enable);
+ debug_printf("\t\t.header.write_cache_flush_enable = 0x%x\n", (*ptr).header.write_cache_flush_enable);
+ debug_printf("\t\t.header.depth_stall_enable = 0x%x\n", (*ptr).header.depth_stall_enable);
+ debug_printf("\t\t.header.post_sync_operation = 0x%x\n", (*ptr).header.post_sync_operation);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits1.dest_addr_type = 0x%x\n", (*ptr).bits1.dest_addr_type);
+ debug_printf("\t\t.bits1.dest_addr = 0x%x\n", (*ptr).bits1.dest_addr);
+ debug_printf("\t\t.data0 = 0x%x\n", (*ptr).data0);
+ debug_printf("\t\t.data1 = 0x%x\n", (*ptr).data1);
+}
+
+void
+brw_dump_pipeline_select(const struct brw_pipeline_select *ptr)
+{
+ debug_printf("\t\t.header.pipeline_select = 0x%x\n", (*ptr).header.pipeline_select);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+}
+
+void
+brw_dump_pipelined_state_pointers(const struct brw_pipelined_state_pointers *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.vs.offset = 0x%x\n", (*ptr).vs.offset);
+ debug_printf("\t\t.gs.enable = 0x%x\n", (*ptr).gs.enable);
+ debug_printf("\t\t.gs.offset = 0x%x\n", (*ptr).gs.offset);
+ debug_printf("\t\t.clp.enable = 0x%x\n", (*ptr).clp.enable);
+ debug_printf("\t\t.clp.offset = 0x%x\n", (*ptr).clp.offset);
+ debug_printf("\t\t.sf.offset = 0x%x\n", (*ptr).sf.offset);
+ debug_printf("\t\t.wm.offset = 0x%x\n", (*ptr).wm.offset);
+ debug_printf("\t\t.cc.offset = 0x%x\n", (*ptr).cc.offset);
+}
+
+void
+brw_dump_polygon_stipple(const struct brw_polygon_stipple *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.stipple[0] = 0x%x\n", (*ptr).stipple[0]);
+ debug_printf("\t\t.stipple[1] = 0x%x\n", (*ptr).stipple[1]);
+ debug_printf("\t\t.stipple[2] = 0x%x\n", (*ptr).stipple[2]);
+ debug_printf("\t\t.stipple[3] = 0x%x\n", (*ptr).stipple[3]);
+ debug_printf("\t\t.stipple[4] = 0x%x\n", (*ptr).stipple[4]);
+ debug_printf("\t\t.stipple[5] = 0x%x\n", (*ptr).stipple[5]);
+ debug_printf("\t\t.stipple[6] = 0x%x\n", (*ptr).stipple[6]);
+ debug_printf("\t\t.stipple[7] = 0x%x\n", (*ptr).stipple[7]);
+ debug_printf("\t\t.stipple[8] = 0x%x\n", (*ptr).stipple[8]);
+ debug_printf("\t\t.stipple[9] = 0x%x\n", (*ptr).stipple[9]);
+ debug_printf("\t\t.stipple[10] = 0x%x\n", (*ptr).stipple[10]);
+ debug_printf("\t\t.stipple[11] = 0x%x\n", (*ptr).stipple[11]);
+ debug_printf("\t\t.stipple[12] = 0x%x\n", (*ptr).stipple[12]);
+ debug_printf("\t\t.stipple[13] = 0x%x\n", (*ptr).stipple[13]);
+ debug_printf("\t\t.stipple[14] = 0x%x\n", (*ptr).stipple[14]);
+ debug_printf("\t\t.stipple[15] = 0x%x\n", (*ptr).stipple[15]);
+ debug_printf("\t\t.stipple[16] = 0x%x\n", (*ptr).stipple[16]);
+ debug_printf("\t\t.stipple[17] = 0x%x\n", (*ptr).stipple[17]);
+ debug_printf("\t\t.stipple[18] = 0x%x\n", (*ptr).stipple[18]);
+ debug_printf("\t\t.stipple[19] = 0x%x\n", (*ptr).stipple[19]);
+ debug_printf("\t\t.stipple[20] = 0x%x\n", (*ptr).stipple[20]);
+ debug_printf("\t\t.stipple[21] = 0x%x\n", (*ptr).stipple[21]);
+ debug_printf("\t\t.stipple[22] = 0x%x\n", (*ptr).stipple[22]);
+ debug_printf("\t\t.stipple[23] = 0x%x\n", (*ptr).stipple[23]);
+ debug_printf("\t\t.stipple[24] = 0x%x\n", (*ptr).stipple[24]);
+ debug_printf("\t\t.stipple[25] = 0x%x\n", (*ptr).stipple[25]);
+ debug_printf("\t\t.stipple[26] = 0x%x\n", (*ptr).stipple[26]);
+ debug_printf("\t\t.stipple[27] = 0x%x\n", (*ptr).stipple[27]);
+ debug_printf("\t\t.stipple[28] = 0x%x\n", (*ptr).stipple[28]);
+ debug_printf("\t\t.stipple[29] = 0x%x\n", (*ptr).stipple[29]);
+ debug_printf("\t\t.stipple[30] = 0x%x\n", (*ptr).stipple[30]);
+ debug_printf("\t\t.stipple[31] = 0x%x\n", (*ptr).stipple[31]);
+}
+
+void
+brw_dump_polygon_stipple_offset(const struct brw_polygon_stipple_offset *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits0.y_offset = 0x%x\n", (*ptr).bits0.y_offset);
+ debug_printf("\t\t.bits0.x_offset = 0x%x\n", (*ptr).bits0.x_offset);
+}
+
+void
+brw_dump_sampler_default_color(const struct brw_sampler_default_color *ptr)
+{
+ debug_printf("\t\t.color[0] = %f\n", (*ptr).color[0]);
+ debug_printf("\t\t.color[1] = %f\n", (*ptr).color[1]);
+ debug_printf("\t\t.color[2] = %f\n", (*ptr).color[2]);
+ debug_printf("\t\t.color[3] = %f\n", (*ptr).color[3]);
+}
+
+void
+brw_dump_sampler_state(const struct brw_sampler_state *ptr)
+{
+ debug_printf("\t\t.ss0.shadow_function = 0x%x\n", (*ptr).ss0.shadow_function);
+ debug_printf("\t\t.ss0.lod_bias = 0x%x\n", (*ptr).ss0.lod_bias);
+ debug_printf("\t\t.ss0.min_filter = 0x%x\n", (*ptr).ss0.min_filter);
+ debug_printf("\t\t.ss0.mag_filter = 0x%x\n", (*ptr).ss0.mag_filter);
+ debug_printf("\t\t.ss0.mip_filter = 0x%x\n", (*ptr).ss0.mip_filter);
+ debug_printf("\t\t.ss0.base_level = 0x%x\n", (*ptr).ss0.base_level);
+ debug_printf("\t\t.ss0.lod_preclamp = 0x%x\n", (*ptr).ss0.lod_preclamp);
+ debug_printf("\t\t.ss0.default_color_mode = 0x%x\n", (*ptr).ss0.default_color_mode);
+ debug_printf("\t\t.ss0.disable = 0x%x\n", (*ptr).ss0.disable);
+ debug_printf("\t\t.ss1.r_wrap_mode = 0x%x\n", (*ptr).ss1.r_wrap_mode);
+ debug_printf("\t\t.ss1.t_wrap_mode = 0x%x\n", (*ptr).ss1.t_wrap_mode);
+ debug_printf("\t\t.ss1.s_wrap_mode = 0x%x\n", (*ptr).ss1.s_wrap_mode);
+ debug_printf("\t\t.ss1.max_lod = 0x%x\n", (*ptr).ss1.max_lod);
+ debug_printf("\t\t.ss1.min_lod = 0x%x\n", (*ptr).ss1.min_lod);
+ debug_printf("\t\t.ss2.default_color_pointer = 0x%x\n", (*ptr).ss2.default_color_pointer);
+ debug_printf("\t\t.ss3.max_aniso = 0x%x\n", (*ptr).ss3.max_aniso);
+ debug_printf("\t\t.ss3.chroma_key_mode = 0x%x\n", (*ptr).ss3.chroma_key_mode);
+ debug_printf("\t\t.ss3.chroma_key_index = 0x%x\n", (*ptr).ss3.chroma_key_index);
+ debug_printf("\t\t.ss3.chroma_key_enable = 0x%x\n", (*ptr).ss3.chroma_key_enable);
+ debug_printf("\t\t.ss3.monochrome_filter_width = 0x%x\n", (*ptr).ss3.monochrome_filter_width);
+ debug_printf("\t\t.ss3.monochrome_filter_height = 0x%x\n", (*ptr).ss3.monochrome_filter_height);
+}
+
+void
+brw_dump_sf_unit_state(const struct brw_sf_unit_state *ptr)
+{
+ debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
+ debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
+ debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable);
+ debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
+ debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
+ debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable);
+ debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
+ debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset);
+ debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
+ debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
+ debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
+ debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
+ debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
+ debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
+ debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
+ debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
+ debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
+ debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
+ debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
+ debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable);
+ debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries);
+ debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size);
+ debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads);
+ debug_printf("\t\t.sf5.front_winding = 0x%x\n", (*ptr).sf5.front_winding);
+ debug_printf("\t\t.sf5.viewport_transform = 0x%x\n", (*ptr).sf5.viewport_transform);
+ debug_printf("\t\t.sf5.sf_viewport_state_offset = 0x%x\n", (*ptr).sf5.sf_viewport_state_offset);
+ debug_printf("\t\t.sf6.dest_org_vbias = 0x%x\n", (*ptr).sf6.dest_org_vbias);
+ debug_printf("\t\t.sf6.dest_org_hbias = 0x%x\n", (*ptr).sf6.dest_org_hbias);
+ debug_printf("\t\t.sf6.scissor = 0x%x\n", (*ptr).sf6.scissor);
+ debug_printf("\t\t.sf6.disable_2x2_trifilter = 0x%x\n", (*ptr).sf6.disable_2x2_trifilter);
+ debug_printf("\t\t.sf6.disable_zero_pix_trifilter = 0x%x\n", (*ptr).sf6.disable_zero_pix_trifilter);
+ debug_printf("\t\t.sf6.point_rast_rule = 0x%x\n", (*ptr).sf6.point_rast_rule);
+ debug_printf("\t\t.sf6.line_endcap_aa_region_width = 0x%x\n", (*ptr).sf6.line_endcap_aa_region_width);
+ debug_printf("\t\t.sf6.line_width = 0x%x\n", (*ptr).sf6.line_width);
+ debug_printf("\t\t.sf6.fast_scissor_disable = 0x%x\n", (*ptr).sf6.fast_scissor_disable);
+ debug_printf("\t\t.sf6.cull_mode = 0x%x\n", (*ptr).sf6.cull_mode);
+ debug_printf("\t\t.sf6.aa_enable = 0x%x\n", (*ptr).sf6.aa_enable);
+ debug_printf("\t\t.sf7.point_size = 0x%x\n", (*ptr).sf7.point_size);
+ debug_printf("\t\t.sf7.use_point_size_state = 0x%x\n", (*ptr).sf7.use_point_size_state);
+ debug_printf("\t\t.sf7.subpixel_precision = 0x%x\n", (*ptr).sf7.subpixel_precision);
+ debug_printf("\t\t.sf7.sprite_point = 0x%x\n", (*ptr).sf7.sprite_point);
+ debug_printf("\t\t.sf7.aa_line_distance_mode = 0x%x\n", (*ptr).sf7.aa_line_distance_mode);
+ debug_printf("\t\t.sf7.trifan_pv = 0x%x\n", (*ptr).sf7.trifan_pv);
+ debug_printf("\t\t.sf7.linestrip_pv = 0x%x\n", (*ptr).sf7.linestrip_pv);
+ debug_printf("\t\t.sf7.tristrip_pv = 0x%x\n", (*ptr).sf7.tristrip_pv);
+ debug_printf("\t\t.sf7.line_last_pixel_enable = 0x%x\n", (*ptr).sf7.line_last_pixel_enable);
+}
+
+void
+brw_dump_sf_viewport(const struct brw_sf_viewport *ptr)
+{
+ debug_printf("\t\t.viewport.m00 = %f\n", (*ptr).viewport.m00);
+ debug_printf("\t\t.viewport.m11 = %f\n", (*ptr).viewport.m11);
+ debug_printf("\t\t.viewport.m22 = %f\n", (*ptr).viewport.m22);
+ debug_printf("\t\t.viewport.m30 = %f\n", (*ptr).viewport.m30);
+ debug_printf("\t\t.viewport.m31 = %f\n", (*ptr).viewport.m31);
+ debug_printf("\t\t.viewport.m32 = %f\n", (*ptr).viewport.m32);
+ debug_printf("\t\t.scissor.xmin = 0x%x\n", (*ptr).scissor.xmin);
+ debug_printf("\t\t.scissor.ymin = 0x%x\n", (*ptr).scissor.ymin);
+ debug_printf("\t\t.scissor.xmax = 0x%x\n", (*ptr).scissor.xmax);
+ debug_printf("\t\t.scissor.ymax = 0x%x\n", (*ptr).scissor.ymax);
+}
+
+void
+brw_dump_ss0(const struct brw_ss0 *ptr)
+{
+ debug_printf("\t\t.shadow_function = 0x%x\n", (*ptr).shadow_function);
+ debug_printf("\t\t.lod_bias = 0x%x\n", (*ptr).lod_bias);
+ debug_printf("\t\t.min_filter = 0x%x\n", (*ptr).min_filter);
+ debug_printf("\t\t.mag_filter = 0x%x\n", (*ptr).mag_filter);
+ debug_printf("\t\t.mip_filter = 0x%x\n", (*ptr).mip_filter);
+ debug_printf("\t\t.base_level = 0x%x\n", (*ptr).base_level);
+ debug_printf("\t\t.lod_preclamp = 0x%x\n", (*ptr).lod_preclamp);
+ debug_printf("\t\t.default_color_mode = 0x%x\n", (*ptr).default_color_mode);
+ debug_printf("\t\t.disable = 0x%x\n", (*ptr).disable);
+}
+
+void
+brw_dump_ss1(const struct brw_ss1 *ptr)
+{
+ debug_printf("\t\t.r_wrap_mode = 0x%x\n", (*ptr).r_wrap_mode);
+ debug_printf("\t\t.t_wrap_mode = 0x%x\n", (*ptr).t_wrap_mode);
+ debug_printf("\t\t.s_wrap_mode = 0x%x\n", (*ptr).s_wrap_mode);
+ debug_printf("\t\t.max_lod = 0x%x\n", (*ptr).max_lod);
+ debug_printf("\t\t.min_lod = 0x%x\n", (*ptr).min_lod);
+}
+
+void
+brw_dump_ss2(const struct brw_ss2 *ptr)
+{
+ debug_printf("\t\t.default_color_pointer = 0x%x\n", (*ptr).default_color_pointer);
+}
+
+void
+brw_dump_ss3(const struct brw_ss3 *ptr)
+{
+ debug_printf("\t\t.max_aniso = 0x%x\n", (*ptr).max_aniso);
+ debug_printf("\t\t.chroma_key_mode = 0x%x\n", (*ptr).chroma_key_mode);
+ debug_printf("\t\t.chroma_key_index = 0x%x\n", (*ptr).chroma_key_index);
+ debug_printf("\t\t.chroma_key_enable = 0x%x\n", (*ptr).chroma_key_enable);
+ debug_printf("\t\t.monochrome_filter_width = 0x%x\n", (*ptr).monochrome_filter_width);
+ debug_printf("\t\t.monochrome_filter_height = 0x%x\n", (*ptr).monochrome_filter_height);
+}
+
+void
+brw_dump_state_base_address(const struct brw_state_base_address *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits0.modify_enable = 0x%x\n", (*ptr).bits0.modify_enable);
+ debug_printf("\t\t.bits0.general_state_address = 0x%x\n", (*ptr).bits0.general_state_address);
+ debug_printf("\t\t.bits1.modify_enable = 0x%x\n", (*ptr).bits1.modify_enable);
+ debug_printf("\t\t.bits1.surface_state_address = 0x%x\n", (*ptr).bits1.surface_state_address);
+ debug_printf("\t\t.bits2.modify_enable = 0x%x\n", (*ptr).bits2.modify_enable);
+ debug_printf("\t\t.bits2.indirect_object_state_address = 0x%x\n", (*ptr).bits2.indirect_object_state_address);
+ debug_printf("\t\t.bits3.modify_enable = 0x%x\n", (*ptr).bits3.modify_enable);
+ debug_printf("\t\t.bits3.general_state_upper_bound = 0x%x\n", (*ptr).bits3.general_state_upper_bound);
+ debug_printf("\t\t.bits4.modify_enable = 0x%x\n", (*ptr).bits4.modify_enable);
+ debug_printf("\t\t.bits4.indirect_object_state_upper_bound = 0x%x\n", (*ptr).bits4.indirect_object_state_upper_bound);
+}
+
+void
+brw_dump_state_prefetch(const struct brw_state_prefetch *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits0.prefetch_count = 0x%x\n", (*ptr).bits0.prefetch_count);
+ debug_printf("\t\t.bits0.prefetch_pointer = 0x%x\n", (*ptr).bits0.prefetch_pointer);
+}
+
+void
+brw_dump_surf_ss0(const struct brw_surf_ss0 *ptr)
+{
+ debug_printf("\t\t.cube_pos_z = 0x%x\n", (*ptr).cube_pos_z);
+ debug_printf("\t\t.cube_neg_z = 0x%x\n", (*ptr).cube_neg_z);
+ debug_printf("\t\t.cube_pos_y = 0x%x\n", (*ptr).cube_pos_y);
+ debug_printf("\t\t.cube_neg_y = 0x%x\n", (*ptr).cube_neg_y);
+ debug_printf("\t\t.cube_pos_x = 0x%x\n", (*ptr).cube_pos_x);
+ debug_printf("\t\t.cube_neg_x = 0x%x\n", (*ptr).cube_neg_x);
+ debug_printf("\t\t.mipmap_layout_mode = 0x%x\n", (*ptr).mipmap_layout_mode);
+ debug_printf("\t\t.vert_line_stride_ofs = 0x%x\n", (*ptr).vert_line_stride_ofs);
+ debug_printf("\t\t.vert_line_stride = 0x%x\n", (*ptr).vert_line_stride);
+ debug_printf("\t\t.color_blend = 0x%x\n", (*ptr).color_blend);
+ debug_printf("\t\t.writedisable_blue = 0x%x\n", (*ptr).writedisable_blue);
+ debug_printf("\t\t.writedisable_green = 0x%x\n", (*ptr).writedisable_green);
+ debug_printf("\t\t.writedisable_red = 0x%x\n", (*ptr).writedisable_red);
+ debug_printf("\t\t.writedisable_alpha = 0x%x\n", (*ptr).writedisable_alpha);
+ debug_printf("\t\t.surface_format = 0x%x\n", (*ptr).surface_format);
+ debug_printf("\t\t.data_return_format = 0x%x\n", (*ptr).data_return_format);
+ debug_printf("\t\t.surface_type = 0x%x\n", (*ptr).surface_type);
+}
+
+void
+brw_dump_surf_ss1(const struct brw_surf_ss1 *ptr)
+{
+ debug_printf("\t\t.base_addr = 0x%x\n", (*ptr).base_addr);
+}
+
+void
+brw_dump_surf_ss2(const struct brw_surf_ss2 *ptr)
+{
+ debug_printf("\t\t.mip_count = 0x%x\n", (*ptr).mip_count);
+ debug_printf("\t\t.width = 0x%x\n", (*ptr).width);
+ debug_printf("\t\t.height = 0x%x\n", (*ptr).height);
+}
+
+void
+brw_dump_surf_ss3(const struct brw_surf_ss3 *ptr)
+{
+ debug_printf("\t\t.tile_walk = 0x%x\n", (*ptr).tile_walk);
+ debug_printf("\t\t.tiled_surface = 0x%x\n", (*ptr).tiled_surface);
+ debug_printf("\t\t.pitch = 0x%x\n", (*ptr).pitch);
+ debug_printf("\t\t.depth = 0x%x\n", (*ptr).depth);
+}
+
+void
+brw_dump_surf_ss4(const struct brw_surf_ss4 *ptr)
+{
+ debug_printf("\t\t.multisample_position_palette_index = 0x%x\n", (*ptr).multisample_position_palette_index);
+ debug_printf("\t\t.num_multisamples = 0x%x\n", (*ptr).num_multisamples);
+ debug_printf("\t\t.render_target_view_extent = 0x%x\n", (*ptr).render_target_view_extent);
+ debug_printf("\t\t.min_array_elt = 0x%x\n", (*ptr).min_array_elt);
+ debug_printf("\t\t.min_lod = 0x%x\n", (*ptr).min_lod);
+}
+
+void
+brw_dump_surf_ss5(const struct brw_surf_ss5 *ptr)
+{
+ debug_printf("\t\t.llc_mapping = 0x%x\n", (*ptr).llc_mapping);
+ debug_printf("\t\t.mlc_mapping = 0x%x\n", (*ptr).mlc_mapping);
+ debug_printf("\t\t.gfdt = 0x%x\n", (*ptr).gfdt);
+ debug_printf("\t\t.gfdt_src = 0x%x\n", (*ptr).gfdt_src);
+ debug_printf("\t\t.y_offset = 0x%x\n", (*ptr).y_offset);
+ debug_printf("\t\t.x_offset = 0x%x\n", (*ptr).x_offset);
+}
+
+void
+brw_dump_surface_state(const struct brw_surface_state *ptr)
+{
+ debug_printf("\t\t.ss0.cube_pos_z = 0x%x\n", (*ptr).ss0.cube_pos_z);
+ debug_printf("\t\t.ss0.cube_neg_z = 0x%x\n", (*ptr).ss0.cube_neg_z);
+ debug_printf("\t\t.ss0.cube_pos_y = 0x%x\n", (*ptr).ss0.cube_pos_y);
+ debug_printf("\t\t.ss0.cube_neg_y = 0x%x\n", (*ptr).ss0.cube_neg_y);
+ debug_printf("\t\t.ss0.cube_pos_x = 0x%x\n", (*ptr).ss0.cube_pos_x);
+ debug_printf("\t\t.ss0.cube_neg_x = 0x%x\n", (*ptr).ss0.cube_neg_x);
+ debug_printf("\t\t.ss0.mipmap_layout_mode = 0x%x\n", (*ptr).ss0.mipmap_layout_mode);
+ debug_printf("\t\t.ss0.vert_line_stride_ofs = 0x%x\n", (*ptr).ss0.vert_line_stride_ofs);
+ debug_printf("\t\t.ss0.vert_line_stride = 0x%x\n", (*ptr).ss0.vert_line_stride);
+ debug_printf("\t\t.ss0.color_blend = 0x%x\n", (*ptr).ss0.color_blend);
+ debug_printf("\t\t.ss0.writedisable_blue = 0x%x\n", (*ptr).ss0.writedisable_blue);
+ debug_printf("\t\t.ss0.writedisable_green = 0x%x\n", (*ptr).ss0.writedisable_green);
+ debug_printf("\t\t.ss0.writedisable_red = 0x%x\n", (*ptr).ss0.writedisable_red);
+ debug_printf("\t\t.ss0.writedisable_alpha = 0x%x\n", (*ptr).ss0.writedisable_alpha);
+ debug_printf("\t\t.ss0.surface_format = 0x%x\n", (*ptr).ss0.surface_format);
+ debug_printf("\t\t.ss0.data_return_format = 0x%x\n", (*ptr).ss0.data_return_format);
+ debug_printf("\t\t.ss0.surface_type = 0x%x\n", (*ptr).ss0.surface_type);
+ debug_printf("\t\t.ss1.base_addr = 0x%x\n", (*ptr).ss1.base_addr);
+ debug_printf("\t\t.ss2.mip_count = 0x%x\n", (*ptr).ss2.mip_count);
+ debug_printf("\t\t.ss2.width = 0x%x\n", (*ptr).ss2.width);
+ debug_printf("\t\t.ss2.height = 0x%x\n", (*ptr).ss2.height);
+ debug_printf("\t\t.ss3.tile_walk = 0x%x\n", (*ptr).ss3.tile_walk);
+ debug_printf("\t\t.ss3.tiled_surface = 0x%x\n", (*ptr).ss3.tiled_surface);
+ debug_printf("\t\t.ss3.pitch = 0x%x\n", (*ptr).ss3.pitch);
+ debug_printf("\t\t.ss3.depth = 0x%x\n", (*ptr).ss3.depth);
+ debug_printf("\t\t.ss4.multisample_position_palette_index = 0x%x\n", (*ptr).ss4.multisample_position_palette_index);
+ debug_printf("\t\t.ss4.num_multisamples = 0x%x\n", (*ptr).ss4.num_multisamples);
+ debug_printf("\t\t.ss4.render_target_view_extent = 0x%x\n", (*ptr).ss4.render_target_view_extent);
+ debug_printf("\t\t.ss4.min_array_elt = 0x%x\n", (*ptr).ss4.min_array_elt);
+ debug_printf("\t\t.ss4.min_lod = 0x%x\n", (*ptr).ss4.min_lod);
+ debug_printf("\t\t.ss5.llc_mapping = 0x%x\n", (*ptr).ss5.llc_mapping);
+ debug_printf("\t\t.ss5.mlc_mapping = 0x%x\n", (*ptr).ss5.mlc_mapping);
+ debug_printf("\t\t.ss5.gfdt = 0x%x\n", (*ptr).ss5.gfdt);
+ debug_printf("\t\t.ss5.gfdt_src = 0x%x\n", (*ptr).ss5.gfdt_src);
+ debug_printf("\t\t.ss5.y_offset = 0x%x\n", (*ptr).ss5.y_offset);
+ debug_printf("\t\t.ss5.x_offset = 0x%x\n", (*ptr).ss5.x_offset);
+}
+
+void
+brw_dump_system_instruction_pointer(const struct brw_system_instruction_pointer *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits0.system_instruction_pointer = 0x%x\n", (*ptr).bits0.system_instruction_pointer);
+}
+
+void
+brw_dump_urb_fence(const struct brw_urb_fence *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.vs_realloc = 0x%x\n", (*ptr).header.vs_realloc);
+ debug_printf("\t\t.header.gs_realloc = 0x%x\n", (*ptr).header.gs_realloc);
+ debug_printf("\t\t.header.clp_realloc = 0x%x\n", (*ptr).header.clp_realloc);
+ debug_printf("\t\t.header.sf_realloc = 0x%x\n", (*ptr).header.sf_realloc);
+ debug_printf("\t\t.header.vfe_realloc = 0x%x\n", (*ptr).header.vfe_realloc);
+ debug_printf("\t\t.header.cs_realloc = 0x%x\n", (*ptr).header.cs_realloc);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.bits0.vs_fence = 0x%x\n", (*ptr).bits0.vs_fence);
+ debug_printf("\t\t.bits0.gs_fence = 0x%x\n", (*ptr).bits0.gs_fence);
+ debug_printf("\t\t.bits0.clp_fence = 0x%x\n", (*ptr).bits0.clp_fence);
+ debug_printf("\t\t.bits1.sf_fence = 0x%x\n", (*ptr).bits1.sf_fence);
+ debug_printf("\t\t.bits1.vf_fence = 0x%x\n", (*ptr).bits1.vf_fence);
+ debug_printf("\t\t.bits1.cs_fence = 0x%x\n", (*ptr).bits1.cs_fence);
+}
+
+void
+brw_dump_urb_immediate(const struct brw_urb_immediate *ptr)
+{
+ debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode);
+ debug_printf("\t\t.offset = 0x%x\n", (*ptr).offset);
+ debug_printf("\t\t.swizzle_control = 0x%x\n", (*ptr).swizzle_control);
+ debug_printf("\t\t.allocate = 0x%x\n", (*ptr).allocate);
+ debug_printf("\t\t.used = 0x%x\n", (*ptr).used);
+ debug_printf("\t\t.complete = 0x%x\n", (*ptr).complete);
+ debug_printf("\t\t.response_length = 0x%x\n", (*ptr).response_length);
+ debug_printf("\t\t.msg_length = 0x%x\n", (*ptr).msg_length);
+ debug_printf("\t\t.msg_target = 0x%x\n", (*ptr).msg_target);
+ debug_printf("\t\t.end_of_thread = 0x%x\n", (*ptr).end_of_thread);
+}
+
+void
+brw_dump_vb_array_state(const struct brw_vb_array_state *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.vb[0].vb0.pitch = 0x%x\n", (*ptr).vb[0].vb0.pitch);
+ debug_printf("\t\t.vb[0].vb0.access_type = 0x%x\n", (*ptr).vb[0].vb0.access_type);
+ debug_printf("\t\t.vb[0].vb0.vb_index = 0x%x\n", (*ptr).vb[0].vb0.vb_index);
+ debug_printf("\t\t.vb[0].start_addr = 0x%x\n", (*ptr).vb[0].start_addr);
+ debug_printf("\t\t.vb[0].max_index = 0x%x\n", (*ptr).vb[0].max_index);
+ debug_printf("\t\t.vb[0].instance_data_step_rate = 0x%x\n", (*ptr).vb[0].instance_data_step_rate);
+ debug_printf("\t\t.vb[1].vb0.pitch = 0x%x\n", (*ptr).vb[1].vb0.pitch);
+ debug_printf("\t\t.vb[1].vb0.access_type = 0x%x\n", (*ptr).vb[1].vb0.access_type);
+ debug_printf("\t\t.vb[1].vb0.vb_index = 0x%x\n", (*ptr).vb[1].vb0.vb_index);
+ debug_printf("\t\t.vb[1].start_addr = 0x%x\n", (*ptr).vb[1].start_addr);
+ debug_printf("\t\t.vb[1].max_index = 0x%x\n", (*ptr).vb[1].max_index);
+ debug_printf("\t\t.vb[1].instance_data_step_rate = 0x%x\n", (*ptr).vb[1].instance_data_step_rate);
+ debug_printf("\t\t.vb[2].vb0.pitch = 0x%x\n", (*ptr).vb[2].vb0.pitch);
+ debug_printf("\t\t.vb[2].vb0.access_type = 0x%x\n", (*ptr).vb[2].vb0.access_type);
+ debug_printf("\t\t.vb[2].vb0.vb_index = 0x%x\n", (*ptr).vb[2].vb0.vb_index);
+ debug_printf("\t\t.vb[2].start_addr = 0x%x\n", (*ptr).vb[2].start_addr);
+ debug_printf("\t\t.vb[2].max_index = 0x%x\n", (*ptr).vb[2].max_index);
+ debug_printf("\t\t.vb[2].instance_data_step_rate = 0x%x\n", (*ptr).vb[2].instance_data_step_rate);
+ debug_printf("\t\t.vb[3].vb0.pitch = 0x%x\n", (*ptr).vb[3].vb0.pitch);
+ debug_printf("\t\t.vb[3].vb0.access_type = 0x%x\n", (*ptr).vb[3].vb0.access_type);
+ debug_printf("\t\t.vb[3].vb0.vb_index = 0x%x\n", (*ptr).vb[3].vb0.vb_index);
+ debug_printf("\t\t.vb[3].start_addr = 0x%x\n", (*ptr).vb[3].start_addr);
+ debug_printf("\t\t.vb[3].max_index = 0x%x\n", (*ptr).vb[3].max_index);
+ debug_printf("\t\t.vb[3].instance_data_step_rate = 0x%x\n", (*ptr).vb[3].instance_data_step_rate);
+ debug_printf("\t\t.vb[4].vb0.pitch = 0x%x\n", (*ptr).vb[4].vb0.pitch);
+ debug_printf("\t\t.vb[4].vb0.access_type = 0x%x\n", (*ptr).vb[4].vb0.access_type);
+ debug_printf("\t\t.vb[4].vb0.vb_index = 0x%x\n", (*ptr).vb[4].vb0.vb_index);
+ debug_printf("\t\t.vb[4].start_addr = 0x%x\n", (*ptr).vb[4].start_addr);
+ debug_printf("\t\t.vb[4].max_index = 0x%x\n", (*ptr).vb[4].max_index);
+ debug_printf("\t\t.vb[4].instance_data_step_rate = 0x%x\n", (*ptr).vb[4].instance_data_step_rate);
+ debug_printf("\t\t.vb[5].vb0.pitch = 0x%x\n", (*ptr).vb[5].vb0.pitch);
+ debug_printf("\t\t.vb[5].vb0.access_type = 0x%x\n", (*ptr).vb[5].vb0.access_type);
+ debug_printf("\t\t.vb[5].vb0.vb_index = 0x%x\n", (*ptr).vb[5].vb0.vb_index);
+ debug_printf("\t\t.vb[5].start_addr = 0x%x\n", (*ptr).vb[5].start_addr);
+ debug_printf("\t\t.vb[5].max_index = 0x%x\n", (*ptr).vb[5].max_index);
+ debug_printf("\t\t.vb[5].instance_data_step_rate = 0x%x\n", (*ptr).vb[5].instance_data_step_rate);
+ debug_printf("\t\t.vb[6].vb0.pitch = 0x%x\n", (*ptr).vb[6].vb0.pitch);
+ debug_printf("\t\t.vb[6].vb0.access_type = 0x%x\n", (*ptr).vb[6].vb0.access_type);
+ debug_printf("\t\t.vb[6].vb0.vb_index = 0x%x\n", (*ptr).vb[6].vb0.vb_index);
+ debug_printf("\t\t.vb[6].start_addr = 0x%x\n", (*ptr).vb[6].start_addr);
+ debug_printf("\t\t.vb[6].max_index = 0x%x\n", (*ptr).vb[6].max_index);
+ debug_printf("\t\t.vb[6].instance_data_step_rate = 0x%x\n", (*ptr).vb[6].instance_data_step_rate);
+ debug_printf("\t\t.vb[7].vb0.pitch = 0x%x\n", (*ptr).vb[7].vb0.pitch);
+ debug_printf("\t\t.vb[7].vb0.access_type = 0x%x\n", (*ptr).vb[7].vb0.access_type);
+ debug_printf("\t\t.vb[7].vb0.vb_index = 0x%x\n", (*ptr).vb[7].vb0.vb_index);
+ debug_printf("\t\t.vb[7].start_addr = 0x%x\n", (*ptr).vb[7].start_addr);
+ debug_printf("\t\t.vb[7].max_index = 0x%x\n", (*ptr).vb[7].max_index);
+ debug_printf("\t\t.vb[7].instance_data_step_rate = 0x%x\n", (*ptr).vb[7].instance_data_step_rate);
+ debug_printf("\t\t.vb[8].vb0.pitch = 0x%x\n", (*ptr).vb[8].vb0.pitch);
+ debug_printf("\t\t.vb[8].vb0.access_type = 0x%x\n", (*ptr).vb[8].vb0.access_type);
+ debug_printf("\t\t.vb[8].vb0.vb_index = 0x%x\n", (*ptr).vb[8].vb0.vb_index);
+ debug_printf("\t\t.vb[8].start_addr = 0x%x\n", (*ptr).vb[8].start_addr);
+ debug_printf("\t\t.vb[8].max_index = 0x%x\n", (*ptr).vb[8].max_index);
+ debug_printf("\t\t.vb[8].instance_data_step_rate = 0x%x\n", (*ptr).vb[8].instance_data_step_rate);
+ debug_printf("\t\t.vb[9].vb0.pitch = 0x%x\n", (*ptr).vb[9].vb0.pitch);
+ debug_printf("\t\t.vb[9].vb0.access_type = 0x%x\n", (*ptr).vb[9].vb0.access_type);
+ debug_printf("\t\t.vb[9].vb0.vb_index = 0x%x\n", (*ptr).vb[9].vb0.vb_index);
+ debug_printf("\t\t.vb[9].start_addr = 0x%x\n", (*ptr).vb[9].start_addr);
+ debug_printf("\t\t.vb[9].max_index = 0x%x\n", (*ptr).vb[9].max_index);
+ debug_printf("\t\t.vb[9].instance_data_step_rate = 0x%x\n", (*ptr).vb[9].instance_data_step_rate);
+ debug_printf("\t\t.vb[10].vb0.pitch = 0x%x\n", (*ptr).vb[10].vb0.pitch);
+ debug_printf("\t\t.vb[10].vb0.access_type = 0x%x\n", (*ptr).vb[10].vb0.access_type);
+ debug_printf("\t\t.vb[10].vb0.vb_index = 0x%x\n", (*ptr).vb[10].vb0.vb_index);
+ debug_printf("\t\t.vb[10].start_addr = 0x%x\n", (*ptr).vb[10].start_addr);
+ debug_printf("\t\t.vb[10].max_index = 0x%x\n", (*ptr).vb[10].max_index);
+ debug_printf("\t\t.vb[10].instance_data_step_rate = 0x%x\n", (*ptr).vb[10].instance_data_step_rate);
+ debug_printf("\t\t.vb[11].vb0.pitch = 0x%x\n", (*ptr).vb[11].vb0.pitch);
+ debug_printf("\t\t.vb[11].vb0.access_type = 0x%x\n", (*ptr).vb[11].vb0.access_type);
+ debug_printf("\t\t.vb[11].vb0.vb_index = 0x%x\n", (*ptr).vb[11].vb0.vb_index);
+ debug_printf("\t\t.vb[11].start_addr = 0x%x\n", (*ptr).vb[11].start_addr);
+ debug_printf("\t\t.vb[11].max_index = 0x%x\n", (*ptr).vb[11].max_index);
+ debug_printf("\t\t.vb[11].instance_data_step_rate = 0x%x\n", (*ptr).vb[11].instance_data_step_rate);
+ debug_printf("\t\t.vb[12].vb0.pitch = 0x%x\n", (*ptr).vb[12].vb0.pitch);
+ debug_printf("\t\t.vb[12].vb0.access_type = 0x%x\n", (*ptr).vb[12].vb0.access_type);
+ debug_printf("\t\t.vb[12].vb0.vb_index = 0x%x\n", (*ptr).vb[12].vb0.vb_index);
+ debug_printf("\t\t.vb[12].start_addr = 0x%x\n", (*ptr).vb[12].start_addr);
+ debug_printf("\t\t.vb[12].max_index = 0x%x\n", (*ptr).vb[12].max_index);
+ debug_printf("\t\t.vb[12].instance_data_step_rate = 0x%x\n", (*ptr).vb[12].instance_data_step_rate);
+ debug_printf("\t\t.vb[13].vb0.pitch = 0x%x\n", (*ptr).vb[13].vb0.pitch);
+ debug_printf("\t\t.vb[13].vb0.access_type = 0x%x\n", (*ptr).vb[13].vb0.access_type);
+ debug_printf("\t\t.vb[13].vb0.vb_index = 0x%x\n", (*ptr).vb[13].vb0.vb_index);
+ debug_printf("\t\t.vb[13].start_addr = 0x%x\n", (*ptr).vb[13].start_addr);
+ debug_printf("\t\t.vb[13].max_index = 0x%x\n", (*ptr).vb[13].max_index);
+ debug_printf("\t\t.vb[13].instance_data_step_rate = 0x%x\n", (*ptr).vb[13].instance_data_step_rate);
+ debug_printf("\t\t.vb[14].vb0.pitch = 0x%x\n", (*ptr).vb[14].vb0.pitch);
+ debug_printf("\t\t.vb[14].vb0.access_type = 0x%x\n", (*ptr).vb[14].vb0.access_type);
+ debug_printf("\t\t.vb[14].vb0.vb_index = 0x%x\n", (*ptr).vb[14].vb0.vb_index);
+ debug_printf("\t\t.vb[14].start_addr = 0x%x\n", (*ptr).vb[14].start_addr);
+ debug_printf("\t\t.vb[14].max_index = 0x%x\n", (*ptr).vb[14].max_index);
+ debug_printf("\t\t.vb[14].instance_data_step_rate = 0x%x\n", (*ptr).vb[14].instance_data_step_rate);
+ debug_printf("\t\t.vb[15].vb0.pitch = 0x%x\n", (*ptr).vb[15].vb0.pitch);
+ debug_printf("\t\t.vb[15].vb0.access_type = 0x%x\n", (*ptr).vb[15].vb0.access_type);
+ debug_printf("\t\t.vb[15].vb0.vb_index = 0x%x\n", (*ptr).vb[15].vb0.vb_index);
+ debug_printf("\t\t.vb[15].start_addr = 0x%x\n", (*ptr).vb[15].start_addr);
+ debug_printf("\t\t.vb[15].max_index = 0x%x\n", (*ptr).vb[15].max_index);
+ debug_printf("\t\t.vb[15].instance_data_step_rate = 0x%x\n", (*ptr).vb[15].instance_data_step_rate);
+ debug_printf("\t\t.vb[16].vb0.pitch = 0x%x\n", (*ptr).vb[16].vb0.pitch);
+ debug_printf("\t\t.vb[16].vb0.access_type = 0x%x\n", (*ptr).vb[16].vb0.access_type);
+ debug_printf("\t\t.vb[16].vb0.vb_index = 0x%x\n", (*ptr).vb[16].vb0.vb_index);
+ debug_printf("\t\t.vb[16].start_addr = 0x%x\n", (*ptr).vb[16].start_addr);
+ debug_printf("\t\t.vb[16].max_index = 0x%x\n", (*ptr).vb[16].max_index);
+ debug_printf("\t\t.vb[16].instance_data_step_rate = 0x%x\n", (*ptr).vb[16].instance_data_step_rate);
+}
+
+void
+brw_dump_vertex_buffer_state(const struct brw_vertex_buffer_state *ptr)
+{
+ debug_printf("\t\t.vb0.pitch = 0x%x\n", (*ptr).vb0.pitch);
+ debug_printf("\t\t.vb0.access_type = 0x%x\n", (*ptr).vb0.access_type);
+ debug_printf("\t\t.vb0.vb_index = 0x%x\n", (*ptr).vb0.vb_index);
+ debug_printf("\t\t.start_addr = 0x%x\n", (*ptr).start_addr);
+ debug_printf("\t\t.max_index = 0x%x\n", (*ptr).max_index);
+ debug_printf("\t\t.instance_data_step_rate = 0x%x\n", (*ptr).instance_data_step_rate);
+}
+
+void
+brw_dump_vertex_element_packet(const struct brw_vertex_element_packet *ptr)
+{
+ debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+ debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+ debug_printf("\t\t.ve[0].ve0.src_offset = 0x%x\n", (*ptr).ve[0].ve0.src_offset);
+ debug_printf("\t\t.ve[0].ve0.src_format = 0x%x\n", (*ptr).ve[0].ve0.src_format);
+ debug_printf("\t\t.ve[0].ve0.valid = 0x%x\n", (*ptr).ve[0].ve0.valid);
+ debug_printf("\t\t.ve[0].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[0].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[0].ve1.dst_offset = 0x%x\n", (*ptr).ve[0].ve1.dst_offset);
+ debug_printf("\t\t.ve[0].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[0].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[0].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[0].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[1].ve0.src_offset = 0x%x\n", (*ptr).ve[1].ve0.src_offset);
+ debug_printf("\t\t.ve[1].ve0.src_format = 0x%x\n", (*ptr).ve[1].ve0.src_format);
+ debug_printf("\t\t.ve[1].ve0.valid = 0x%x\n", (*ptr).ve[1].ve0.valid);
+ debug_printf("\t\t.ve[1].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[1].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[1].ve1.dst_offset = 0x%x\n", (*ptr).ve[1].ve1.dst_offset);
+ debug_printf("\t\t.ve[1].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[1].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[1].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[1].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[2].ve0.src_offset = 0x%x\n", (*ptr).ve[2].ve0.src_offset);
+ debug_printf("\t\t.ve[2].ve0.src_format = 0x%x\n", (*ptr).ve[2].ve0.src_format);
+ debug_printf("\t\t.ve[2].ve0.valid = 0x%x\n", (*ptr).ve[2].ve0.valid);
+ debug_printf("\t\t.ve[2].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[2].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[2].ve1.dst_offset = 0x%x\n", (*ptr).ve[2].ve1.dst_offset);
+ debug_printf("\t\t.ve[2].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[2].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[2].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[2].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[3].ve0.src_offset = 0x%x\n", (*ptr).ve[3].ve0.src_offset);
+ debug_printf("\t\t.ve[3].ve0.src_format = 0x%x\n", (*ptr).ve[3].ve0.src_format);
+ debug_printf("\t\t.ve[3].ve0.valid = 0x%x\n", (*ptr).ve[3].ve0.valid);
+ debug_printf("\t\t.ve[3].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[3].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[3].ve1.dst_offset = 0x%x\n", (*ptr).ve[3].ve1.dst_offset);
+ debug_printf("\t\t.ve[3].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[3].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[3].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[3].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[4].ve0.src_offset = 0x%x\n", (*ptr).ve[4].ve0.src_offset);
+ debug_printf("\t\t.ve[4].ve0.src_format = 0x%x\n", (*ptr).ve[4].ve0.src_format);
+ debug_printf("\t\t.ve[4].ve0.valid = 0x%x\n", (*ptr).ve[4].ve0.valid);
+ debug_printf("\t\t.ve[4].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[4].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[4].ve1.dst_offset = 0x%x\n", (*ptr).ve[4].ve1.dst_offset);
+ debug_printf("\t\t.ve[4].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[4].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[4].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[4].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[5].ve0.src_offset = 0x%x\n", (*ptr).ve[5].ve0.src_offset);
+ debug_printf("\t\t.ve[5].ve0.src_format = 0x%x\n", (*ptr).ve[5].ve0.src_format);
+ debug_printf("\t\t.ve[5].ve0.valid = 0x%x\n", (*ptr).ve[5].ve0.valid);
+ debug_printf("\t\t.ve[5].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[5].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[5].ve1.dst_offset = 0x%x\n", (*ptr).ve[5].ve1.dst_offset);
+ debug_printf("\t\t.ve[5].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[5].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[5].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[5].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[6].ve0.src_offset = 0x%x\n", (*ptr).ve[6].ve0.src_offset);
+ debug_printf("\t\t.ve[6].ve0.src_format = 0x%x\n", (*ptr).ve[6].ve0.src_format);
+ debug_printf("\t\t.ve[6].ve0.valid = 0x%x\n", (*ptr).ve[6].ve0.valid);
+ debug_printf("\t\t.ve[6].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[6].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[6].ve1.dst_offset = 0x%x\n", (*ptr).ve[6].ve1.dst_offset);
+ debug_printf("\t\t.ve[6].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[6].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[6].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[6].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[7].ve0.src_offset = 0x%x\n", (*ptr).ve[7].ve0.src_offset);
+ debug_printf("\t\t.ve[7].ve0.src_format = 0x%x\n", (*ptr).ve[7].ve0.src_format);
+ debug_printf("\t\t.ve[7].ve0.valid = 0x%x\n", (*ptr).ve[7].ve0.valid);
+ debug_printf("\t\t.ve[7].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[7].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[7].ve1.dst_offset = 0x%x\n", (*ptr).ve[7].ve1.dst_offset);
+ debug_printf("\t\t.ve[7].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[7].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[7].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[7].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[8].ve0.src_offset = 0x%x\n", (*ptr).ve[8].ve0.src_offset);
+ debug_printf("\t\t.ve[8].ve0.src_format = 0x%x\n", (*ptr).ve[8].ve0.src_format);
+ debug_printf("\t\t.ve[8].ve0.valid = 0x%x\n", (*ptr).ve[8].ve0.valid);
+ debug_printf("\t\t.ve[8].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[8].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[8].ve1.dst_offset = 0x%x\n", (*ptr).ve[8].ve1.dst_offset);
+ debug_printf("\t\t.ve[8].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[8].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[8].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[8].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[9].ve0.src_offset = 0x%x\n", (*ptr).ve[9].ve0.src_offset);
+ debug_printf("\t\t.ve[9].ve0.src_format = 0x%x\n", (*ptr).ve[9].ve0.src_format);
+ debug_printf("\t\t.ve[9].ve0.valid = 0x%x\n", (*ptr).ve[9].ve0.valid);
+ debug_printf("\t\t.ve[9].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[9].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[9].ve1.dst_offset = 0x%x\n", (*ptr).ve[9].ve1.dst_offset);
+ debug_printf("\t\t.ve[9].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[9].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[9].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[9].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[10].ve0.src_offset = 0x%x\n", (*ptr).ve[10].ve0.src_offset);
+ debug_printf("\t\t.ve[10].ve0.src_format = 0x%x\n", (*ptr).ve[10].ve0.src_format);
+ debug_printf("\t\t.ve[10].ve0.valid = 0x%x\n", (*ptr).ve[10].ve0.valid);
+ debug_printf("\t\t.ve[10].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[10].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[10].ve1.dst_offset = 0x%x\n", (*ptr).ve[10].ve1.dst_offset);
+ debug_printf("\t\t.ve[10].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[10].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[10].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[10].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[11].ve0.src_offset = 0x%x\n", (*ptr).ve[11].ve0.src_offset);
+ debug_printf("\t\t.ve[11].ve0.src_format = 0x%x\n", (*ptr).ve[11].ve0.src_format);
+ debug_printf("\t\t.ve[11].ve0.valid = 0x%x\n", (*ptr).ve[11].ve0.valid);
+ debug_printf("\t\t.ve[11].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[11].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[11].ve1.dst_offset = 0x%x\n", (*ptr).ve[11].ve1.dst_offset);
+ debug_printf("\t\t.ve[11].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[11].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[11].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[11].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[12].ve0.src_offset = 0x%x\n", (*ptr).ve[12].ve0.src_offset);
+ debug_printf("\t\t.ve[12].ve0.src_format = 0x%x\n", (*ptr).ve[12].ve0.src_format);
+ debug_printf("\t\t.ve[12].ve0.valid = 0x%x\n", (*ptr).ve[12].ve0.valid);
+ debug_printf("\t\t.ve[12].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[12].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[12].ve1.dst_offset = 0x%x\n", (*ptr).ve[12].ve1.dst_offset);
+ debug_printf("\t\t.ve[12].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[12].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[12].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[12].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[13].ve0.src_offset = 0x%x\n", (*ptr).ve[13].ve0.src_offset);
+ debug_printf("\t\t.ve[13].ve0.src_format = 0x%x\n", (*ptr).ve[13].ve0.src_format);
+ debug_printf("\t\t.ve[13].ve0.valid = 0x%x\n", (*ptr).ve[13].ve0.valid);
+ debug_printf("\t\t.ve[13].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[13].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[13].ve1.dst_offset = 0x%x\n", (*ptr).ve[13].ve1.dst_offset);
+ debug_printf("\t\t.ve[13].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[13].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[13].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[13].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[14].ve0.src_offset = 0x%x\n", (*ptr).ve[14].ve0.src_offset);
+ debug_printf("\t\t.ve[14].ve0.src_format = 0x%x\n", (*ptr).ve[14].ve0.src_format);
+ debug_printf("\t\t.ve[14].ve0.valid = 0x%x\n", (*ptr).ve[14].ve0.valid);
+ debug_printf("\t\t.ve[14].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[14].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[14].ve1.dst_offset = 0x%x\n", (*ptr).ve[14].ve1.dst_offset);
+ debug_printf("\t\t.ve[14].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[14].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[14].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[14].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[15].ve0.src_offset = 0x%x\n", (*ptr).ve[15].ve0.src_offset);
+ debug_printf("\t\t.ve[15].ve0.src_format = 0x%x\n", (*ptr).ve[15].ve0.src_format);
+ debug_printf("\t\t.ve[15].ve0.valid = 0x%x\n", (*ptr).ve[15].ve0.valid);
+ debug_printf("\t\t.ve[15].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[15].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[15].ve1.dst_offset = 0x%x\n", (*ptr).ve[15].ve1.dst_offset);
+ debug_printf("\t\t.ve[15].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[15].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[15].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[15].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[16].ve0.src_offset = 0x%x\n", (*ptr).ve[16].ve0.src_offset);
+ debug_printf("\t\t.ve[16].ve0.src_format = 0x%x\n", (*ptr).ve[16].ve0.src_format);
+ debug_printf("\t\t.ve[16].ve0.valid = 0x%x\n", (*ptr).ve[16].ve0.valid);
+ debug_printf("\t\t.ve[16].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[16].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[16].ve1.dst_offset = 0x%x\n", (*ptr).ve[16].ve1.dst_offset);
+ debug_printf("\t\t.ve[16].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[16].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[16].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[16].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent0);
+ debug_printf("\t\t.ve[17].ve0.src_offset = 0x%x\n", (*ptr).ve[17].ve0.src_offset);
+ debug_printf("\t\t.ve[17].ve0.src_format = 0x%x\n", (*ptr).ve[17].ve0.src_format);
+ debug_printf("\t\t.ve[17].ve0.valid = 0x%x\n", (*ptr).ve[17].ve0.valid);
+ debug_printf("\t\t.ve[17].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[17].ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve[17].ve1.dst_offset = 0x%x\n", (*ptr).ve[17].ve1.dst_offset);
+ debug_printf("\t\t.ve[17].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent3);
+ debug_printf("\t\t.ve[17].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent2);
+ debug_printf("\t\t.ve[17].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent1);
+ debug_printf("\t\t.ve[17].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent0);
+}
+
+void
+brw_dump_vertex_element_state(const struct brw_vertex_element_state *ptr)
+{
+ debug_printf("\t\t.ve0.src_offset = 0x%x\n", (*ptr).ve0.src_offset);
+ debug_printf("\t\t.ve0.src_format = 0x%x\n", (*ptr).ve0.src_format);
+ debug_printf("\t\t.ve0.valid = 0x%x\n", (*ptr).ve0.valid);
+ debug_printf("\t\t.ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve0.vertex_buffer_index);
+ debug_printf("\t\t.ve1.dst_offset = 0x%x\n", (*ptr).ve1.dst_offset);
+ debug_printf("\t\t.ve1.vfcomponent3 = 0x%x\n", (*ptr).ve1.vfcomponent3);
+ debug_printf("\t\t.ve1.vfcomponent2 = 0x%x\n", (*ptr).ve1.vfcomponent2);
+ debug_printf("\t\t.ve1.vfcomponent1 = 0x%x\n", (*ptr).ve1.vfcomponent1);
+ debug_printf("\t\t.ve1.vfcomponent0 = 0x%x\n", (*ptr).ve1.vfcomponent0);
+}
+
+void
+brw_dump_vf_statistics(const struct brw_vf_statistics *ptr)
+{
+ debug_printf("\t\t.statistics_enable = 0x%x\n", (*ptr).statistics_enable);
+ debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode);
+}
+
+void
+brw_dump_vs_unit_state(const struct brw_vs_unit_state *ptr)
+{
+ debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
+ debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
+ debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable);
+ debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
+ debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
+ debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable);
+ debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
+ debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset);
+ debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
+ debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
+ debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
+ debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
+ debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
+ debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
+ debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
+ debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
+ debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
+ debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
+ debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
+ debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable);
+ debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries);
+ debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size);
+ debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads);
+ debug_printf("\t\t.vs5.sampler_count = 0x%x\n", (*ptr).vs5.sampler_count);
+ debug_printf("\t\t.vs5.sampler_state_pointer = 0x%x\n", (*ptr).vs5.sampler_state_pointer);
+ debug_printf("\t\t.vs6.vs_enable = 0x%x\n", (*ptr).vs6.vs_enable);
+ debug_printf("\t\t.vs6.vert_cache_disable = 0x%x\n", (*ptr).vs6.vert_cache_disable);
+}
+
+void
+brw_dump_wm_unit_state(const struct brw_wm_unit_state *ptr)
+{
+ debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
+ debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
+ debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable);
+ debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
+ debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
+ debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable);
+ debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
+ debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset);
+ debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
+ debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
+ debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
+ debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
+ debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
+ debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
+ debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
+ debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
+ debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
+ debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
+ debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
+ debug_printf("\t\t.wm4.stats_enable = 0x%x\n", (*ptr).wm4.stats_enable);
+ debug_printf("\t\t.wm4.depth_buffer_clear = 0x%x\n", (*ptr).wm4.depth_buffer_clear);
+ debug_printf("\t\t.wm4.sampler_count = 0x%x\n", (*ptr).wm4.sampler_count);
+ debug_printf("\t\t.wm4.sampler_state_pointer = 0x%x\n", (*ptr).wm4.sampler_state_pointer);
+ debug_printf("\t\t.wm5.enable_8_pix = 0x%x\n", (*ptr).wm5.enable_8_pix);
+ debug_printf("\t\t.wm5.enable_16_pix = 0x%x\n", (*ptr).wm5.enable_16_pix);
+ debug_printf("\t\t.wm5.enable_32_pix = 0x%x\n", (*ptr).wm5.enable_32_pix);
+ debug_printf("\t\t.wm5.enable_con_32_pix = 0x%x\n", (*ptr).wm5.enable_con_32_pix);
+ debug_printf("\t\t.wm5.enable_con_64_pix = 0x%x\n", (*ptr).wm5.enable_con_64_pix);
+ debug_printf("\t\t.wm5.legacy_global_depth_bias = 0x%x\n", (*ptr).wm5.legacy_global_depth_bias);
+ debug_printf("\t\t.wm5.line_stipple = 0x%x\n", (*ptr).wm5.line_stipple);
+ debug_printf("\t\t.wm5.depth_offset = 0x%x\n", (*ptr).wm5.depth_offset);
+ debug_printf("\t\t.wm5.polygon_stipple = 0x%x\n", (*ptr).wm5.polygon_stipple);
+ debug_printf("\t\t.wm5.line_aa_region_width = 0x%x\n", (*ptr).wm5.line_aa_region_width);
+ debug_printf("\t\t.wm5.line_endcap_aa_region_width = 0x%x\n", (*ptr).wm5.line_endcap_aa_region_width);
+ debug_printf("\t\t.wm5.early_depth_test = 0x%x\n", (*ptr).wm5.early_depth_test);
+ debug_printf("\t\t.wm5.thread_dispatch_enable = 0x%x\n", (*ptr).wm5.thread_dispatch_enable);
+ debug_printf("\t\t.wm5.program_uses_depth = 0x%x\n", (*ptr).wm5.program_uses_depth);
+ debug_printf("\t\t.wm5.program_computes_depth = 0x%x\n", (*ptr).wm5.program_computes_depth);
+ debug_printf("\t\t.wm5.program_uses_killpixel = 0x%x\n", (*ptr).wm5.program_uses_killpixel);
+ debug_printf("\t\t.wm5.legacy_line_rast = 0x%x\n", (*ptr).wm5.legacy_line_rast);
+ debug_printf("\t\t.wm5.transposed_urb_read_enable = 0x%x\n", (*ptr).wm5.transposed_urb_read_enable);
+ debug_printf("\t\t.wm5.max_threads = 0x%x\n", (*ptr).wm5.max_threads);
+ debug_printf("\t\t.global_depth_offset_constant = %f\n", (*ptr).global_depth_offset_constant);
+ debug_printf("\t\t.global_depth_offset_scale = %f\n", (*ptr).global_depth_offset_scale);
+ debug_printf("\t\t.wm8.grf_reg_count_1 = 0x%x\n", (*ptr).wm8.grf_reg_count_1);
+ debug_printf("\t\t.wm8.kernel_start_pointer_1 = 0x%x\n", (*ptr).wm8.kernel_start_pointer_1);
+ debug_printf("\t\t.wm9.grf_reg_count_2 = 0x%x\n", (*ptr).wm9.grf_reg_count_2);
+ debug_printf("\t\t.wm9.kernel_start_pointer_2 = 0x%x\n", (*ptr).wm9.kernel_start_pointer_2);
+ debug_printf("\t\t.wm10.grf_reg_count_3 = 0x%x\n", (*ptr).wm10.grf_reg_count_3);
+ debug_printf("\t\t.wm10.kernel_start_pointer_3 = 0x%x\n", (*ptr).wm10.kernel_start_pointer_3);
+}
+
diff --git a/src/gallium/drivers/i965/brw_structs_dump.h b/src/gallium/drivers/i965/brw_structs_dump.h
new file mode 100644
index 00000000000..7c02dbfe33f
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_structs_dump.h
@@ -0,0 +1,276 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Dump i965 data structures.
+ *
+ * Generated automatically from brw_structs.h by brw_structs_dump.py.
+ */
+
+#ifndef BRW_STRUCTS_DUMP_H
+#define BRW_STRUCTS_DUMP_H
+
+struct brw_3d_control;
+struct brw_3d_primitive;
+struct brw_aa_line_parameters;
+struct brw_binding_table_pointers;
+struct brw_blend_constant_color;
+struct brw_cc0;
+struct brw_cc1;
+struct brw_cc2;
+struct brw_cc3;
+struct brw_cc4;
+struct brw_cc5;
+struct brw_cc6;
+struct brw_cc7;
+struct brw_cc_unit_state;
+struct brw_cc_viewport;
+struct brw_clip_unit_state;
+struct brw_clipper_viewport;
+struct brw_constant_buffer;
+struct brw_cs_urb_state;
+struct brw_depthbuffer;
+struct brw_depthbuffer_g4x;
+struct brw_drawrect;
+struct brw_global_depth_offset_clamp;
+struct brw_gs_unit_state;
+struct brw_indexbuffer;
+struct brw_line_stipple;
+struct brw_mi_flush;
+struct brw_pipe_control;
+struct brw_pipeline_select;
+struct brw_pipelined_state_pointers;
+struct brw_polygon_stipple;
+struct brw_polygon_stipple_offset;
+struct brw_sampler_default_color;
+struct brw_sampler_state;
+struct brw_sf_unit_state;
+struct brw_sf_viewport;
+struct brw_ss0;
+struct brw_ss1;
+struct brw_ss2;
+struct brw_ss3;
+struct brw_state_base_address;
+struct brw_state_prefetch;
+struct brw_surf_ss0;
+struct brw_surf_ss1;
+struct brw_surf_ss2;
+struct brw_surf_ss3;
+struct brw_surf_ss4;
+struct brw_surf_ss5;
+struct brw_surface_state;
+struct brw_system_instruction_pointer;
+struct brw_urb_fence;
+struct brw_urb_immediate;
+struct brw_vb_array_state;
+struct brw_vertex_buffer_state;
+struct brw_vertex_element_packet;
+struct brw_vertex_element_state;
+struct brw_vf_statistics;
+struct brw_vs_unit_state;
+struct brw_wm_unit_state;
+
+void
+brw_dump_3d_control(const struct brw_3d_control *ptr);
+
+void
+brw_dump_3d_primitive(const struct brw_3d_primitive *ptr);
+
+void
+brw_dump_aa_line_parameters(const struct brw_aa_line_parameters *ptr);
+
+void
+brw_dump_binding_table_pointers(const struct brw_binding_table_pointers *ptr);
+
+void
+brw_dump_blend_constant_color(const struct brw_blend_constant_color *ptr);
+
+void
+brw_dump_cc0(const struct brw_cc0 *ptr);
+
+void
+brw_dump_cc1(const struct brw_cc1 *ptr);
+
+void
+brw_dump_cc2(const struct brw_cc2 *ptr);
+
+void
+brw_dump_cc3(const struct brw_cc3 *ptr);
+
+void
+brw_dump_cc4(const struct brw_cc4 *ptr);
+
+void
+brw_dump_cc5(const struct brw_cc5 *ptr);
+
+void
+brw_dump_cc6(const struct brw_cc6 *ptr);
+
+void
+brw_dump_cc7(const struct brw_cc7 *ptr);
+
+void
+brw_dump_cc_unit_state(const struct brw_cc_unit_state *ptr);
+
+void
+brw_dump_cc_viewport(const struct brw_cc_viewport *ptr);
+
+void
+brw_dump_clip_unit_state(const struct brw_clip_unit_state *ptr);
+
+void
+brw_dump_clipper_viewport(const struct brw_clipper_viewport *ptr);
+
+void
+brw_dump_constant_buffer(const struct brw_constant_buffer *ptr);
+
+void
+brw_dump_cs_urb_state(const struct brw_cs_urb_state *ptr);
+
+void
+brw_dump_depthbuffer(const struct brw_depthbuffer *ptr);
+
+void
+brw_dump_depthbuffer_g4x(const struct brw_depthbuffer_g4x *ptr);
+
+void
+brw_dump_drawrect(const struct brw_drawrect *ptr);
+
+void
+brw_dump_global_depth_offset_clamp(const struct brw_global_depth_offset_clamp *ptr);
+
+void
+brw_dump_gs_unit_state(const struct brw_gs_unit_state *ptr);
+
+void
+brw_dump_indexbuffer(const struct brw_indexbuffer *ptr);
+
+void
+brw_dump_line_stipple(const struct brw_line_stipple *ptr);
+
+void
+brw_dump_mi_flush(const struct brw_mi_flush *ptr);
+
+void
+brw_dump_pipe_control(const struct brw_pipe_control *ptr);
+
+void
+brw_dump_pipeline_select(const struct brw_pipeline_select *ptr);
+
+void
+brw_dump_pipelined_state_pointers(const struct brw_pipelined_state_pointers *ptr);
+
+void
+brw_dump_polygon_stipple(const struct brw_polygon_stipple *ptr);
+
+void
+brw_dump_polygon_stipple_offset(const struct brw_polygon_stipple_offset *ptr);
+
+void
+brw_dump_sampler_default_color(const struct brw_sampler_default_color *ptr);
+
+void
+brw_dump_sampler_state(const struct brw_sampler_state *ptr);
+
+void
+brw_dump_sf_unit_state(const struct brw_sf_unit_state *ptr);
+
+void
+brw_dump_sf_viewport(const struct brw_sf_viewport *ptr);
+
+void
+brw_dump_ss0(const struct brw_ss0 *ptr);
+
+void
+brw_dump_ss1(const struct brw_ss1 *ptr);
+
+void
+brw_dump_ss2(const struct brw_ss2 *ptr);
+
+void
+brw_dump_ss3(const struct brw_ss3 *ptr);
+
+void
+brw_dump_state_base_address(const struct brw_state_base_address *ptr);
+
+void
+brw_dump_state_prefetch(const struct brw_state_prefetch *ptr);
+
+void
+brw_dump_surf_ss0(const struct brw_surf_ss0 *ptr);
+
+void
+brw_dump_surf_ss1(const struct brw_surf_ss1 *ptr);
+
+void
+brw_dump_surf_ss2(const struct brw_surf_ss2 *ptr);
+
+void
+brw_dump_surf_ss3(const struct brw_surf_ss3 *ptr);
+
+void
+brw_dump_surf_ss4(const struct brw_surf_ss4 *ptr);
+
+void
+brw_dump_surf_ss5(const struct brw_surf_ss5 *ptr);
+
+void
+brw_dump_surface_state(const struct brw_surface_state *ptr);
+
+void
+brw_dump_system_instruction_pointer(const struct brw_system_instruction_pointer *ptr);
+
+void
+brw_dump_urb_fence(const struct brw_urb_fence *ptr);
+
+void
+brw_dump_urb_immediate(const struct brw_urb_immediate *ptr);
+
+void
+brw_dump_vb_array_state(const struct brw_vb_array_state *ptr);
+
+void
+brw_dump_vertex_buffer_state(const struct brw_vertex_buffer_state *ptr);
+
+void
+brw_dump_vertex_element_packet(const struct brw_vertex_element_packet *ptr);
+
+void
+brw_dump_vertex_element_state(const struct brw_vertex_element_state *ptr);
+
+void
+brw_dump_vf_statistics(const struct brw_vf_statistics *ptr);
+
+void
+brw_dump_vs_unit_state(const struct brw_vs_unit_state *ptr);
+
+void
+brw_dump_wm_unit_state(const struct brw_wm_unit_state *ptr);
+
+
+#endif /* BRW_STRUCTS_DUMP_H */
diff --git a/src/gallium/drivers/i965/brw_structs_dump.py b/src/gallium/drivers/i965/brw_structs_dump.py
new file mode 100755
index 00000000000..6dba49ad911
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_structs_dump.py
@@ -0,0 +1,291 @@
+#!/usr/bin/env python
+'''
+Generates dumpers for the i965 state strucutures using pygccxml.
+
+Run as
+
+ PYTHONPATH=/path/to/pygccxml-1.0.0 python brw_structs_dump.py
+
+Jose Fonseca <jfonseca@vmware.com>
+'''
+
+copyright = '''
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+ '''
+
+import os
+import sys
+import re
+
+from pygccxml import parser
+from pygccxml import declarations
+
+from pygccxml.declarations import algorithm
+from pygccxml.declarations import decl_visitor
+from pygccxml.declarations import type_traits
+from pygccxml.declarations import type_visitor
+
+
+enums = True
+
+
+def vars_filter(variable):
+ name = variable.name
+ return not re.match('^pad\d*', name) and name != 'dword'
+
+
+class decl_dumper_t(decl_visitor.decl_visitor_t):
+
+ def __init__(self, stream, instance = '', decl = None):
+ decl_visitor.decl_visitor_t.__init__(self)
+ self.stream = stream
+ self._instance = instance
+ self.decl = decl
+
+ def clone(self):
+ return decl_dumper_t(self.stream, self._instance, self.decl)
+
+ def visit_class(self):
+ class_ = self.decl
+ assert self.decl.class_type in ('struct', 'union')
+
+ for variable in class_.variables(recursive = False):
+ if vars_filter(variable):
+ dump_type(self.stream, self._instance + '.' + variable.name, variable.type)
+
+ def visit_enumeration(self):
+ if enums:
+ self.stream.write(' switch(%s) {\n' % ("(*ptr)" + self._instance,))
+ for name, value in self.decl.values:
+ self.stream.write(' case %s:\n' % (name,))
+ self.stream.write(' debug_printf("\\t\\t%s = %s\\n");\n' % (self._instance, name))
+ self.stream.write(' break;\n')
+ self.stream.write(' default:\n')
+ self.stream.write(' debug_printf("\\t\\t%s = %%i\\n", %s);\n' % (self._instance, "(*ptr)" + self._instance))
+ self.stream.write(' break;\n')
+ self.stream.write(' }\n')
+ else:
+ self.stream.write(' debug_printf("\\t\\t%s = %%i\\n", %s);\n' % (self._instance, "(*ptr)" + self._instance))
+
+
+def dump_decl(stream, instance, decl):
+ dumper = decl_dumper_t(stream, instance, decl)
+ algorithm.apply_visitor(dumper, decl)
+
+
+class type_dumper_t(type_visitor.type_visitor_t):
+
+ def __init__(self, stream, instance, type_):
+ type_visitor.type_visitor_t.__init__(self)
+ self.stream = stream
+ self.instance = instance
+ self.type = type_
+
+ def clone(self):
+ return type_dumper_t(self.instance, self.type)
+
+ def visit_bool(self):
+ self.print_instance('%i')
+
+ def visit_char(self):
+ #self.print_instance('%i')
+ self.print_instance('0x%x')
+
+ def visit_unsigned_char(self):
+ #self.print_instance('%u')
+ self.print_instance('0x%x')
+
+ def visit_signed_char(self):
+ #self.print_instance('%i')
+ self.print_instance('0x%x')
+
+ def visit_wchar(self):
+ self.print_instance('0x%x')
+
+ def visit_short_int(self):
+ #self.print_instance('%i')
+ self.print_instance('0x%x')
+
+ def visit_short_unsigned_int(self):
+ #self.print_instance('%u')
+ self.print_instance('0x%x')
+
+ def visit_int(self):
+ #self.print_instance('%i')
+ self.print_instance('0x%x')
+
+ def visit_unsigned_int(self):
+ #self.print_instance('%u')
+ self.print_instance('0x%x')
+
+ def visit_long_int(self):
+ #self.print_instance('%li')
+ self.print_instance('0x%lx')
+
+ def visit_long_unsigned_int(self):
+ #self.print_instance('%lu')
+ self.print_instance('%0xlx')
+
+ def visit_long_long_int(self):
+ #self.print_instance('%lli')
+ self.print_instance('%0xllx')
+
+ def visit_long_long_unsigned_int(self):
+ #self.print_instance('%llu')
+ self.print_instance('0x%llx')
+
+ def visit_float(self):
+ self.print_instance('%f')
+
+ def visit_double(self):
+ self.print_instance('%f')
+
+ def visit_array(self):
+ for i in range(type_traits.array_size(self.type)):
+ dump_type(self.stream, self.instance + '[%i]' % i, type_traits.base_type(self.type))
+
+ def visit_pointer(self):
+ self.print_instance('%p')
+
+ def visit_declarated(self):
+ #stream.write('decl = %r\n' % self.type.decl_string)
+ decl = type_traits.remove_declarated(self.type)
+ dump_decl(self.stream, self.instance, decl)
+
+ def print_instance(self, format):
+ self.stream.write(' debug_printf("\\t\\t%s = %s\\n", %s);\n' % (self.instance, format, "(*ptr)" + self.instance))
+
+
+
+def dump_type(stream, instance, type_):
+ type_ = type_traits.remove_alias(type_)
+ visitor = type_dumper_t(stream, instance, type_)
+ algorithm.apply_visitor(visitor, type_)
+
+
+def dump_struct_interface(stream, class_, suffix = ';'):
+ name = class_.name
+ assert name.startswith('brw_');
+ name = name[:4] + 'dump_' + name[4:]
+ stream.write('void\n')
+ stream.write('%s(const struct %s *ptr)%s\n' % (name, class_.name, suffix))
+
+
+def dump_struct_implementation(stream, decls, class_):
+ dump_struct_interface(stream, class_, suffix = '')
+ stream.write('{\n')
+ dump_decl(stream, '', class_)
+ stream.write('}\n')
+ stream.write('\n')
+
+
+def dump_header(stream):
+ stream.write(copyright.strip() + '\n')
+ stream.write('\n')
+ stream.write('/**\n')
+ stream.write(' * @file\n')
+ stream.write(' * Dump i965 data structures.\n')
+ stream.write(' *\n')
+ stream.write(' * Generated automatically from brw_structs.h by brw_structs_dump.py.\n')
+ stream.write(' */\n')
+ stream.write('\n')
+
+
+def dump_interfaces(decls, global_ns, names):
+ stream = open('brw_structs_dump.h', 'wt')
+
+ dump_header(stream)
+
+ stream.write('#ifndef BRW_STRUCTS_DUMP_H\n')
+ stream.write('#define BRW_STRUCTS_DUMP_H\n')
+ stream.write('\n')
+
+ for name in names:
+ stream.write('struct %s;\n' % (name,))
+ stream.write('\n')
+
+ for name in names:
+ (class_,) = global_ns.classes(name = name)
+ dump_struct_interface(stream, class_)
+ stream.write('\n')
+ stream.write('\n')
+
+ stream.write('#endif /* BRW_STRUCTS_DUMP_H */\n')
+
+
+def dump_implementations(decls, global_ns, names):
+ stream = open('brw_structs_dump.c', 'wt')
+
+ dump_header(stream)
+
+ stream.write('#include "util/u_debug.h"\n')
+ stream.write('\n')
+ stream.write('#include "brw_types.h"\n')
+ stream.write('#include "brw_structs.h"\n')
+ stream.write('#include "brw_structs_dump.h"\n')
+ stream.write('\n')
+
+ for name in names:
+ (class_,) = global_ns.classes(name = name)
+ dump_struct_implementation(stream, decls, class_)
+
+
+def decl_filter(decl):
+ '''Filter the declarations we're interested in'''
+ name = decl.name
+ return name.startswith('brw_') and name not in ('brw_instruction',)
+
+
+def main():
+
+ config = parser.config_t(
+ include_paths = [
+ '../../include',
+ ],
+ compiler = 'gcc',
+ )
+
+ headers = [
+ 'brw_types.h',
+ 'brw_structs.h',
+ ]
+
+ decls = parser.parse(headers, config, parser.COMPILATION_MODE.ALL_AT_ONCE)
+ global_ns = declarations.get_global_namespace(decls)
+
+ names = []
+ for class_ in global_ns.classes(decl_filter):
+ names.append(class_.name)
+ names.sort()
+
+ dump_interfaces(decls, global_ns, names)
+ dump_implementations(decls, global_ns, names)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/src/gallium/drivers/i965/brw_swtnl.c b/src/gallium/drivers/i965/brw_swtnl.c
new file mode 100644
index 00000000000..464013e7c40
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_swtnl.c
@@ -0,0 +1,95 @@
+
+#include "brw_context.h"
+#include "brw_pipe_rast.h"
+
+
+static GLboolean need_swtnl( struct brw_context *brw )
+{
+ const struct pipe_rasterizer_state *rast = &brw->curr.rast->templ;
+
+ /* If we don't require strict OpenGL conformance, never
+ * use fallbacks. If we're forcing fallbacks, always
+ * use fallfacks.
+ */
+ if (brw->flags.no_swtnl)
+ return FALSE;
+
+ if (brw->flags.force_swtnl)
+ return TRUE;
+
+ /* Exceeding hw limits on number of VS inputs?
+ */
+ if (brw->curr.num_vertex_elements == 0 ||
+ brw->curr.num_vertex_elements >= BRW_VEP_MAX) {
+ return TRUE;
+ }
+
+ /* Position array with zero stride?
+ *
+ * XXX: position isn't always at zero...
+ * XXX: eliminate zero-stride arrays
+ */
+ {
+ int ve0_vb = brw->curr.vertex_element[0].vertex_buffer_index;
+
+ if (brw->curr.vertex_buffer[ve0_vb].stride == 0)
+ return TRUE;
+ }
+
+ /* XXX: short-circuit
+ */
+ return FALSE;
+
+ if (brw->reduced_primitive == PIPE_PRIM_TRIANGLES) {
+ if (rast->poly_smooth)
+ return TRUE;
+
+ }
+
+ if (brw->reduced_primitive == PIPE_PRIM_LINES ||
+ (brw->reduced_primitive == PIPE_PRIM_TRIANGLES &&
+ (rast->fill_cw == PIPE_POLYGON_MODE_LINE ||
+ rast->fill_ccw == PIPE_POLYGON_MODE_LINE)))
+ {
+ /* BRW hardware will do AA lines, but they are non-conformant it
+ * seems. TBD whether we keep this fallback:
+ */
+ if (rast->line_smooth)
+ return TRUE;
+
+ /* XXX: was a fallback in mesa (gs doesn't get enough
+ * information to know when to reset stipple counter), but there
+ * must be a way around it.
+ */
+ if (rast->line_stipple_enable &&
+ (brw->reduced_primitive == PIPE_PRIM_TRIANGLES ||
+ brw->primitive == PIPE_PRIM_LINE_LOOP ||
+ brw->primitive == PIPE_PRIM_LINE_STRIP))
+ return TRUE;
+ }
+
+
+ if (brw->reduced_primitive == PIPE_PRIM_POINTS ||
+ (brw->reduced_primitive == PIPE_PRIM_TRIANGLES &&
+ (rast->fill_cw == PIPE_POLYGON_MODE_POINT ||
+ rast->fill_ccw == PIPE_POLYGON_MODE_POINT)))
+ {
+ if (rast->point_smooth)
+ return TRUE;
+ }
+
+ /* BRW hardware doesn't handle CLAMP texturing correctly;
+ * brw_wm_sampler_state:translate_wrap_mode() treats CLAMP
+ * as CLAMP_TO_EDGE instead. If we're using CLAMP, and
+ * we want strict conformance, force the fallback.
+ *
+ * XXX: need a workaround for this.
+ */
+
+ /* Nothing stopping us from the fast path now */
+ return FALSE;
+}
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_types.h b/src/gallium/drivers/i965/brw_types.h
new file mode 100644
index 00000000000..89e08a5c804
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_types.h
@@ -0,0 +1,21 @@
+#ifndef BRW_TYPES_H
+#define BRW_TYPES_H
+
+#include "pipe/p_compiler.h"
+
+typedef uint32_t GLuint;
+typedef uint8_t GLubyte;
+typedef uint16_t GLushort;
+typedef int32_t GLint;
+typedef int8_t GLbyte;
+typedef int16_t GLshort;
+typedef float GLfloat;
+
+/* no GLenum, translate all away */
+
+typedef uint8_t GLboolean;
+
+#define GL_FALSE FALSE
+#define GL_TRUE TRUE
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c
new file mode 100644
index 00000000000..907ec56c6ca
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_urb.c
@@ -0,0 +1,263 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+
+#include "brw_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_debug.h"
+
+#define VS 0
+#define GS 1
+#define CLP 2
+#define SF 3
+#define CS 4
+
+/** @file brw_urb.c
+ *
+ * Manages the division of the URB space between the various fixed-function
+ * units.
+ *
+ * See the Thread Initiation Management section of the GEN4 B-Spec, and
+ * the individual *_STATE structures for restrictions on numbers of
+ * entries and threads.
+ */
+
+/*
+ * Generally, a unit requires a min_nr_entries based on how many entries
+ * it produces before the downstream unit gets unblocked and can use and
+ * dereference some of its handles.
+ *
+ * The SF unit preallocates a PUE at the start of thread dispatch, and only
+ * uses that one. So it requires one entry per thread.
+ *
+ * For CLIP, the SF unit will hold the previous primitive while the
+ * next is getting assembled, meaning that linestrips require 3 CLIP VUEs
+ * (vertices) to ensure continued processing, trifans require 4, and tristrips
+ * require 5. There can be 1 or 2 threads, and each has the same requirement.
+ *
+ * GS has the same requirement as CLIP, but it never handles tristrips,
+ * so we can lower the minimum to 4 for the POLYGONs (trifans) it produces.
+ * We only run it single-threaded.
+ *
+ * For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X).
+ * Each thread processes 2 preallocated VUEs (vertices) at a time, and they
+ * get streamed down as soon as threads processing earlier vertices get
+ * theirs accepted.
+ *
+ * Each unit will take the number of URB entries we give it (based on the
+ * entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs,
+ * and brw_curbe.c for the CURBEs) and decide its maximum number of
+ * threads it can support based on that. in brw_*_state.c.
+ *
+ * XXX: Are the min_entry_size numbers useful?
+ * XXX: Verify min_nr_entries, esp for VS.
+ * XXX: Verify SF min_entry_size.
+ */
+static const struct urb_limits {
+ GLuint min_nr_entries;
+ GLuint preferred_nr_entries;
+ GLuint min_entry_size;
+ GLuint max_entry_size;
+} limits[CS+1] = {
+ { 16, 32, 1, 5 }, /* vs */
+ { 4, 8, 1, 5 }, /* gs */
+ { 5, 10, 1, 5 }, /* clp */
+ { 1, 8, 1, 12 }, /* sf */
+ { 1, 4, 1, 32 } /* cs */
+};
+
+
+static GLboolean check_urb_layout( struct brw_context *brw )
+{
+ brw->urb.vs_start = 0;
+ brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize;
+ brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize;
+ brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize;
+ brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize;
+
+ return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= URB_SIZES(brw);
+}
+
+/* Most minimal update, forces re-emit of URB fence packet after GS
+ * unit turned on/off.
+ */
+static int recalculate_urb_fence( struct brw_context *brw )
+{
+ GLuint csize = brw->curbe.total_size;
+ GLuint vsize = brw->vs.prog_data->urb_entry_size;
+ GLuint sfsize = brw->sf.prog_data->urb_entry_size;
+
+ if (csize < limits[CS].min_entry_size)
+ csize = limits[CS].min_entry_size;
+
+ if (vsize < limits[VS].min_entry_size)
+ vsize = limits[VS].min_entry_size;
+
+ if (sfsize < limits[SF].min_entry_size)
+ sfsize = limits[SF].min_entry_size;
+
+ if (brw->urb.vsize < vsize ||
+ brw->urb.sfsize < sfsize ||
+ brw->urb.csize < csize ||
+ (brw->urb.constrained && (brw->urb.vsize > vsize ||
+ brw->urb.sfsize > sfsize ||
+ brw->urb.csize > csize))) {
+
+
+ brw->urb.csize = csize;
+ brw->urb.sfsize = sfsize;
+ brw->urb.vsize = vsize;
+
+ brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+ brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries;
+ brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries;
+ brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
+ brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries;
+
+ brw->urb.constrained = 0;
+
+ if (BRW_IS_IGDNG(brw)) {
+ brw->urb.nr_vs_entries = 128;
+ brw->urb.nr_sf_entries = 48;
+ if (check_urb_layout(brw)) {
+ goto done;
+ } else {
+ brw->urb.constrained = 1;
+ brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+ brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
+ }
+ } else if (BRW_IS_G4X(brw)) {
+ brw->urb.nr_vs_entries = 64;
+ if (check_urb_layout(brw)) {
+ goto done;
+ } else {
+ brw->urb.constrained = 1;
+ brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+ }
+ }
+
+ if (BRW_DEBUG & DEBUG_MIN_URB) {
+ brw->urb.nr_vs_entries = limits[VS].min_nr_entries;
+ brw->urb.nr_gs_entries = limits[GS].min_nr_entries;
+ brw->urb.nr_clip_entries = limits[CLP].min_nr_entries;
+ brw->urb.nr_sf_entries = limits[SF].min_nr_entries;
+ brw->urb.nr_cs_entries = limits[CS].min_nr_entries;
+ brw->urb.constrained = 1;
+ }
+
+ if (!check_urb_layout(brw)) {
+ brw->urb.nr_vs_entries = limits[VS].min_nr_entries;
+ brw->urb.nr_gs_entries = limits[GS].min_nr_entries;
+ brw->urb.nr_clip_entries = limits[CLP].min_nr_entries;
+ brw->urb.nr_sf_entries = limits[SF].min_nr_entries;
+ brw->urb.nr_cs_entries = limits[CS].min_nr_entries;
+
+ /* Mark us as operating with constrained nr_entries, so that next
+ * time we recalculate we'll resize the fences in the hope of
+ * escaping constrained mode and getting back to normal performance.
+ */
+ brw->urb.constrained = 1;
+
+ if (!check_urb_layout(brw)) {
+ /* This is impossible, given the maximal sizes of urb
+ * entries and the values for minimum nr of entries
+ * provided above.
+ */
+ debug_printf("couldn't calculate URB layout!\n");
+ exit(1);
+ }
+
+ if (BRW_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS))
+ debug_printf("URB CONSTRAINED\n");
+ }
+
+done:
+ if (BRW_DEBUG & DEBUG_URB)
+ debug_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
+ brw->urb.vs_start,
+ brw->urb.gs_start,
+ brw->urb.clip_start,
+ brw->urb.sf_start,
+ brw->urb.cs_start,
+ URB_SIZES(brw));
+
+ brw->state.dirty.brw |= BRW_NEW_URB_FENCE;
+ }
+
+ return 0;
+}
+
+
+const struct brw_tracked_state brw_recalculate_urb_fence = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_CURBE_OFFSETS,
+ .cache = (CACHE_NEW_VS_PROG |
+ CACHE_NEW_SF_PROG)
+ },
+ .prepare = recalculate_urb_fence
+};
+
+
+
+
+
+int brw_upload_urb_fence(struct brw_context *brw)
+{
+ struct brw_urb_fence uf;
+ memset(&uf, 0, sizeof(uf));
+
+ uf.header.opcode = CMD_URB_FENCE;
+ uf.header.length = sizeof(uf)/4-2;
+ uf.header.vs_realloc = 1;
+ uf.header.gs_realloc = 1;
+ uf.header.clp_realloc = 1;
+ uf.header.sf_realloc = 1;
+ uf.header.vfe_realloc = 1;
+ uf.header.cs_realloc = 1;
+
+ /* The ordering below is correct, not the layout in the
+ * instruction.
+ *
+ * There are 256/384 urb reg pairs in total.
+ */
+ uf.bits0.vs_fence = brw->urb.gs_start;
+ uf.bits0.gs_fence = brw->urb.clip_start;
+ uf.bits0.clp_fence = brw->urb.sf_start;
+ uf.bits1.sf_fence = brw->urb.cs_start;
+ uf.bits1.cs_fence = URB_SIZES(brw);
+
+ BRW_BATCH_STRUCT(brw, &uf);
+ return 0;
+}
diff --git a/src/gallium/drivers/i965/brw_util.c b/src/gallium/drivers/i965/brw_util.c
new file mode 100644
index 00000000000..458058d668d
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_util.c
@@ -0,0 +1,38 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_util.h"
+#include "brw_defines.h"
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_util.h b/src/gallium/drivers/i965/brw_util.h
new file mode 100644
index 00000000000..b5f9a36e7bc
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_util.h
@@ -0,0 +1,44 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_UTIL_H
+#define BRW_UTIL_H
+
+#include "brw_types.h"
+
+extern GLuint brw_count_bits( GLuint val );
+extern GLuint brw_translate_blend_factor( unsigned factor );
+extern GLuint brw_translate_blend_equation( unsigned mode );
+
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c
new file mode 100644
index 00000000000..e3ea5a3a135
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs.c
@@ -0,0 +1,131 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "tgsi/tgsi_dump.h"
+
+#include "brw_context.h"
+#include "brw_vs.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_pipe_rast.h"
+
+
+
+static enum pipe_error do_vs_prog( struct brw_context *brw,
+ struct brw_vertex_shader *vp,
+ struct brw_vs_prog_key *key,
+ struct brw_winsys_buffer **bo_out)
+{
+ enum pipe_error ret;
+ GLuint program_size;
+ const GLuint *program;
+ struct brw_vs_compile c;
+
+ memset(&c, 0, sizeof(c));
+ memcpy(&c.key, key, sizeof(*key));
+
+ brw_init_compile(brw, &c.func);
+ c.vp = vp;
+
+ c.prog_data.nr_outputs = vp->info.num_outputs;
+ c.prog_data.nr_inputs = vp->info.num_inputs;
+
+ if (1)
+ tgsi_dump(c.vp->tokens, 0);
+
+ /* Emit GEN4 code.
+ */
+ brw_vs_emit(&c);
+
+ /* get the program
+ */
+ ret = brw_get_program(&c.func, &program, &program_size);
+ if (ret)
+ return ret;
+
+ ret = brw_upload_cache( &brw->cache, BRW_VS_PROG,
+ &c.key, brw_vs_prog_key_size(&c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ &brw->vs.prog_data,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+static enum pipe_error brw_upload_vs_prog(struct brw_context *brw)
+{
+ struct brw_vs_prog_key key;
+ struct brw_vertex_shader *vp = brw->curr.vertex_shader;
+ struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature;
+ enum pipe_error ret;
+
+ memset(&key, 0, sizeof(key));
+
+ key.program_string_id = vp->id;
+ key.nr_userclip = brw->curr.ucp.nr;
+
+ memcpy(&key.fs_signature, sig, brw_fs_signature_size(sig));
+
+
+ /* Make an early check for the key.
+ */
+ if (brw_search_cache(&brw->cache, BRW_VS_PROG,
+ &key, brw_vs_prog_key_size(&key),
+ NULL, 0,
+ &brw->vs.prog_data,
+ &brw->vs.prog_bo))
+ return PIPE_OK;
+
+ ret = do_vs_prog(brw, vp, &key, &brw->vs.prog_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+/* See brw_vs.c:
+ */
+const struct brw_tracked_state brw_vs_prog = {
+ .dirty = {
+ .mesa = (PIPE_NEW_CLIP |
+ PIPE_NEW_RAST |
+ PIPE_NEW_FRAGMENT_SIGNATURE),
+ .brw = BRW_NEW_VERTEX_PROGRAM,
+ .cache = 0
+ },
+ .prepare = brw_upload_vs_prog
+};
diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h
new file mode 100644
index 00000000000..944d88c84cc
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs.h
@@ -0,0 +1,106 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_VS_H
+#define BRW_VS_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+
+struct brw_vs_prog_key {
+ GLuint program_string_id;
+ GLuint nr_userclip:4;
+ GLuint pad:26;
+ struct brw_fs_signature fs_signature;
+};
+
+#define brw_vs_prog_key_size(s) (offsetof(struct brw_vs_prog_key, fs_signature) + \
+ brw_fs_signature_size(&(s)->fs_signature))
+
+
+#define MAX_IF_DEPTH 32
+#define MAX_LOOP_DEPTH 32
+
+struct brw_vs_compile {
+ struct brw_compile func;
+ struct brw_vs_prog_key key;
+ struct brw_vs_prog_data prog_data;
+ struct brw_chipset chipset;
+
+ struct brw_vertex_shader *vp;
+
+ GLuint nr_inputs;
+ GLuint nr_outputs;
+ GLuint nr_immediates;
+ GLfloat immediate[128][4];
+
+ GLuint overflow_grf_start;
+ GLuint overflow_count;
+
+ GLuint first_tmp;
+ GLuint last_tmp;
+
+ struct brw_reg r0;
+ struct brw_reg r1;
+ struct brw_reg regs[TGSI_FILE_COUNT][128];
+ struct brw_reg tmp;
+ struct brw_reg stack;
+
+ struct {
+ GLboolean used_in_src;
+ struct brw_reg reg;
+ } output_regs[128];
+
+ struct brw_reg userplane[6];
+
+ /** we may need up to 3 constants per instruction (if use_const_buffer) */
+ struct {
+ GLint index;
+ struct brw_reg reg;
+ } current_const[3];
+
+ struct brw_instruction *if_inst[MAX_IF_DEPTH];
+ struct brw_instruction *loop_inst[MAX_LOOP_DEPTH];
+ GLuint insn;
+ GLuint if_depth;
+ GLuint loop_depth;
+ GLuint end_offset;
+
+ struct brw_indirect stack_index;
+};
+
+
+void brw_vs_emit( struct brw_vs_compile *c );
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
new file mode 100644
index 00000000000..8a16205d2f6
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -0,0 +1,1654 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_shader_tokens.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
+
+#include "brw_context.h"
+#include "brw_vs.h"
+#include "brw_debug.h"
+#include "brw_disasm.h"
+
+/* Choose one of the 4 vec4's which can be packed into each 16-wide reg.
+ */
+static INLINE struct brw_reg brw_vec4_grf_repeat( GLuint reg, GLuint slot )
+{
+ int nr = reg + slot/2;
+ int subnr = (slot%2) * 4;
+
+ return stride(brw_vec4_grf(nr, subnr), 0, 4, 1);
+}
+
+
+static struct brw_reg get_tmp( struct brw_vs_compile *c )
+{
+ struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
+
+ if (++c->last_tmp > c->prog_data.total_grf)
+ c->prog_data.total_grf = c->last_tmp;
+
+ return tmp;
+}
+
+static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
+{
+ if (tmp.nr == c->last_tmp-1)
+ c->last_tmp--;
+}
+
+static void release_tmps( struct brw_vs_compile *c )
+{
+ c->last_tmp = c->first_tmp;
+}
+
+
+static boolean is_position_output( struct brw_vs_compile *c,
+ unsigned vs_output )
+{
+ const struct brw_vertex_shader *vs = c->vp;
+ unsigned semantic = vs->info.output_semantic_name[vs_output];
+ unsigned index = vs->info.output_semantic_index[vs_output];
+
+ return (semantic == TGSI_SEMANTIC_POSITION &&
+ index == 0);
+}
+
+
+static boolean find_output_slot( struct brw_vs_compile *c,
+ unsigned vs_output,
+ unsigned *fs_input_slot )
+{
+ const struct brw_vertex_shader *vs = c->vp;
+ unsigned semantic = vs->info.output_semantic_name[vs_output];
+ unsigned index = vs->info.output_semantic_index[vs_output];
+ unsigned i;
+
+ for (i = 0; i < c->key.fs_signature.nr_inputs; i++) {
+ if (c->key.fs_signature.input[i].semantic == semantic &&
+ c->key.fs_signature.input[i].semantic_index == index) {
+ *fs_input_slot = i;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+
+/**
+ * Preallocate GRF register before code emit.
+ * Do things as simply as possible. Allocate and populate all regs
+ * ahead of time.
+ */
+static void brw_vs_alloc_regs( struct brw_vs_compile *c )
+{
+ GLuint i, reg = 0, subreg = 0, mrf;
+ int attributes_in_vue;
+
+ /* Determine whether to use a real constant buffer or use a block
+ * of GRF registers for constants. The later is faster but only
+ * works if everything fits in the GRF.
+ * XXX this heuristic/check may need some fine tuning...
+ */
+ if (c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1 +
+ c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + 1 +
+ c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 1 + 21 > BRW_MAX_GRF)
+ c->vp->use_const_buffer = GL_TRUE;
+ else {
+ /* XXX: immediates can go elsewhere if necessary:
+ */
+ assert(c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + 1 +
+ c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 1 + 21 <= BRW_MAX_GRF);
+
+ c->vp->use_const_buffer = GL_FALSE;
+ }
+
+ /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
+
+ /* r0 -- reserved as usual
+ */
+ c->r0 = brw_vec8_grf(reg, 0);
+ reg++;
+
+ /* User clip planes from curbe:
+ */
+ if (c->key.nr_userclip) {
+ /* Skip over fixed planes: Or never read them into vs unit?
+ */
+ subreg += 6;
+
+ for (i = 0; i < c->key.nr_userclip; i++, subreg++) {
+ c->userplane[i] =
+ stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
+ }
+
+ /* Deal with curbe alignment:
+ */
+ subreg = align(subreg, 2);
+ /*reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;*/
+ }
+
+
+ /* Immediates: always in the curbe.
+ *
+ * XXX: Can try to encode some immediates as brw immediates
+ * XXX: Make sure ureg sets minimal immediate size and respect it
+ * here.
+ */
+ for (i = 0; i < c->vp->info.immediate_count; i++, subreg++) {
+ c->regs[TGSI_FILE_IMMEDIATE][i] =
+ stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
+ }
+ c->prog_data.nr_params = c->vp->info.immediate_count * 4;
+
+
+ /* Vertex constant buffer.
+ *
+ * Constants from the buffer can be either cached in the curbe or
+ * loaded as needed from the actual constant buffer.
+ */
+ if (!c->vp->use_const_buffer) {
+ GLuint nr_params = c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1;
+
+ for (i = 0; i < nr_params; i++, subreg++) {
+ c->regs[TGSI_FILE_CONSTANT][i] =
+ stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
+ }
+
+ c->prog_data.nr_params += nr_params * 4;
+ }
+
+ /* All regs allocated
+ */
+ reg += (subreg + 1) / 2;
+ c->prog_data.curb_read_length = reg - 1;
+
+
+ /* Allocate input regs:
+ */
+ c->nr_inputs = c->vp->info.num_inputs;
+ for (i = 0; i < c->nr_inputs; i++) {
+ c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+
+ /* If there are no inputs, we'll still be reading one attribute's worth
+ * because it's required -- see urb_read_length setting.
+ */
+ if (c->nr_inputs == 0)
+ reg++;
+
+
+
+ /* Allocate outputs. The non-position outputs go straight into message regs.
+ */
+ c->nr_outputs = c->prog_data.nr_outputs;
+
+ if (c->chipset.is_igdng)
+ mrf = 8;
+ else
+ mrf = 4;
+
+
+ if (c->key.fs_signature.nr_inputs > BRW_MAX_MRF) {
+ c->overflow_grf_start = reg;
+ c->overflow_count = c->key.fs_signature.nr_inputs - BRW_MAX_MRF;
+ reg += c->overflow_count;
+ }
+
+ /* XXX: need to access vertex output semantics here:
+ */
+ for (i = 0; i < c->nr_outputs; i++) {
+ unsigned slot;
+
+ /* XXX: Put output position in slot zero always. Clipper, etc,
+ * need access to this reg.
+ */
+ if (is_position_output(c, i)) {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); /* copy to mrf 0 */
+ reg++;
+ }
+ else if (find_output_slot(c, i, &slot)) {
+
+ if (0 /* is_psize_output(c, i) */ ) {
+ /* c->psize_out.grf = reg; */
+ /* c->psize_out.mrf = i; */
+ }
+
+ /* The first (16-4) outputs can go straight into the message regs.
+ */
+ if (slot + mrf < BRW_MAX_MRF) {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(slot + mrf);
+ }
+ else {
+ int grf = c->overflow_grf_start + slot - BRW_MAX_MRF;
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(grf, 0);
+ }
+ }
+ else {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_null_reg();
+ }
+ }
+
+ /* Allocate program temporaries:
+ */
+
+ for (i = 0; i < c->vp->info.file_max[TGSI_FILE_TEMPORARY]+1; i++) {
+ c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+
+ /* Address reg(s). Don't try to use the internal address reg until
+ * deref time.
+ */
+ for (i = 0; i < c->vp->info.file_max[TGSI_FILE_ADDRESS]+1; i++) {
+ c->regs[TGSI_FILE_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE,
+ reg,
+ 0,
+ BRW_REGISTER_TYPE_D,
+ BRW_VERTICAL_STRIDE_8,
+ BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XXXX,
+ BRW_WRITEMASK_X);
+ reg++;
+ }
+
+ if (c->vp->use_const_buffer) {
+ for (i = 0; i < 3; i++) {
+ c->current_const[i].index = -1;
+ c->current_const[i].reg = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+ }
+
+#if 0
+ for (i = 0; i < 128; i++) {
+ if (c->output_regs[i].used_in_src) {
+ c->output_regs[i].reg = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+ }
+#endif
+
+ if (c->vp->has_flow_control) {
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
+ reg += 2;
+ }
+
+ /* Some opcodes need an internal temporary:
+ */
+ c->first_tmp = reg;
+ c->last_tmp = reg; /* for allocation purposes */
+
+ /* Each input reg holds data from two vertices. The
+ * urb_read_length is the number of registers read from *each*
+ * vertex urb, so is half the amount:
+ */
+ c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2;
+
+ /* Setting this field to 0 leads to undefined behavior according to the
+ * the VS_STATE docs. Our VUEs will always have at least one attribute
+ * sitting in them, even if it's padding.
+ */
+ if (c->prog_data.urb_read_length == 0)
+ c->prog_data.urb_read_length = 1;
+
+ /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size
+ * them to fit the biggest thing they need to.
+ */
+ attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs);
+
+ if (c->chipset.is_igdng)
+ c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
+ else
+ c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
+
+ c->prog_data.total_grf = reg;
+
+ if (BRW_DEBUG & DEBUG_VS) {
+ debug_printf("%s NumAddrRegs %d\n", __FUNCTION__,
+ c->vp->info.file_max[TGSI_FILE_ADDRESS]+1);
+ debug_printf("%s NumTemps %d\n", __FUNCTION__,
+ c->vp->info.file_max[TGSI_FILE_TEMPORARY]+1);
+ debug_printf("%s reg = %d\n", __FUNCTION__, reg);
+ }
+}
+
+
+/**
+ * If an instruction uses a temp reg both as a src and the dest, we
+ * sometimes need to allocate an intermediate temporary.
+ */
+static void unalias1( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ void (*func)( struct brw_vs_compile *,
+ struct brw_reg,
+ struct brw_reg ))
+{
+ if (dst.file == arg0.file && dst.nr == arg0.nr) {
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+ func(c, tmp, arg0);
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+ else {
+ func(c, dst, arg0);
+ }
+}
+
+/**
+ * \sa unalias2
+ * Checkes if 2-operand instruction needs an intermediate temporary.
+ */
+static void unalias2( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ void (*func)( struct brw_vs_compile *,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg ))
+{
+ if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
+ (dst.file == arg1.file && dst.nr == arg1.nr)) {
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+ func(c, tmp, arg0, arg1);
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+ else {
+ func(c, dst, arg0, arg1);
+ }
+}
+
+/**
+ * \sa unalias2
+ * Checkes if 3-operand instruction needs an intermediate temporary.
+ */
+static void unalias3( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ struct brw_reg arg2,
+ void (*func)( struct brw_vs_compile *,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg ))
+{
+ if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
+ (dst.file == arg1.file && dst.nr == arg1.nr) ||
+ (dst.file == arg2.file && dst.nr == arg2.nr)) {
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+ func(c, tmp, arg0, arg1, arg2);
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+ else {
+ func(c, dst, arg0, arg1, arg2);
+ }
+}
+
+static void emit_sop( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ GLuint cond)
+{
+ brw_MOV(p, dst, brw_imm_f(0.0f));
+ brw_CMP(p, brw_null_reg(), cond, arg0, arg1);
+ brw_MOV(p, dst, brw_imm_f(1.0f));
+ brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+static void emit_seq( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ);
+}
+
+static void emit_sne( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ);
+}
+static void emit_slt( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L);
+}
+
+static void emit_sle( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE);
+}
+
+static void emit_sgt( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G);
+}
+
+static void emit_sge( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE);
+}
+
+static void emit_max( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
+ brw_SEL(p, dst, arg1, arg0);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+static void emit_min( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
+ brw_SEL(p, dst, arg0, arg1);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+
+static void emit_math1( struct brw_vs_compile *c,
+ GLuint function,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ GLuint precision)
+{
+ /* There are various odd behaviours with SEND on the simulator. In
+ * addition there are documented issues with the fact that the GEN4
+ * processor doesn't do dependency control properly on SEND
+ * results. So, on balance, this kludge to get around failures
+ * with writemasked math results looks like it might be necessary
+ * whether that turns out to be a simulator bug or not:
+ */
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+ dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_math(p,
+ tmp,
+ function,
+ BRW_MATH_SATURATE_NONE,
+ 2,
+ arg0,
+ BRW_MATH_DATA_SCALAR,
+ precision);
+
+ if (need_tmp) {
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+}
+
+
+static void emit_math2( struct brw_vs_compile *c,
+ GLuint function,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ GLuint precision)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+ dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_MOV(p, brw_message_reg(3), arg1);
+
+ brw_math(p,
+ tmp,
+ function,
+ BRW_MATH_SATURATE_NONE,
+ 2,
+ arg0,
+ BRW_MATH_DATA_SCALAR,
+ precision);
+
+ if (need_tmp) {
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+}
+
+
+static void emit_exp_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+
+
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_X) {
+ struct brw_reg tmp = get_tmp(c);
+ struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D);
+
+ /* tmp_d = floor(arg0.x) */
+ brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0));
+
+ /* result[0] = 2.0 ^ tmp */
+
+ /* Adjust exponent for floating point:
+ * exp += 127
+ */
+ brw_ADD(p, brw_writemask(tmp_d, BRW_WRITEMASK_X), tmp_d, brw_imm_d(127));
+
+ /* Install exponent and sign.
+ * Excess drops off the edge:
+ */
+ brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), BRW_WRITEMASK_X),
+ tmp_d, brw_imm_d(23));
+
+ release_tmp(c, tmp);
+ }
+
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_Y) {
+ /* result[1] = arg0.x - floor(arg0.x) */
+ brw_FRC(p, brw_writemask(dst, BRW_WRITEMASK_Y), brw_swizzle1(arg0, 0));
+ }
+
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z) {
+ /* As with the LOG instruction, we might be better off just
+ * doing a taylor expansion here, seeing as we have to do all
+ * the prep work.
+ *
+ * If mathbox partial precision is too low, consider also:
+ * result[3] = result[0] * EXP(result[1])
+ */
+ emit_math1(c,
+ BRW_MATH_FUNCTION_EXP,
+ brw_writemask(dst, BRW_WRITEMASK_Z),
+ brw_swizzle1(arg0, 0),
+ BRW_MATH_PRECISION_FULL);
+ }
+
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_W) {
+ /* result[3] = 1.0; */
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_W), brw_imm_f(1));
+ }
+}
+
+
+static void emit_log_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
+ struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD);
+ GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+ dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp) {
+ tmp = get_tmp(c);
+ tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
+ }
+
+ /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
+ * according to spec:
+ *
+ * These almost look likey they could be joined up, but not really
+ * practical:
+ *
+ * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
+ * result[1].i = (x.i & ((1<<23)-1) + (127<<23)
+ */
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_XZ) {
+ brw_AND(p,
+ brw_writemask(tmp_ud, BRW_WRITEMASK_X),
+ brw_swizzle1(arg0_ud, 0),
+ brw_imm_ud((1U<<31)-1));
+
+ brw_SHR(p,
+ brw_writemask(tmp_ud, BRW_WRITEMASK_X),
+ tmp_ud,
+ brw_imm_ud(23));
+
+ brw_ADD(p,
+ brw_writemask(tmp, BRW_WRITEMASK_X),
+ retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */
+ brw_imm_d(-127));
+ }
+
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_YZ) {
+ brw_AND(p,
+ brw_writemask(tmp_ud, BRW_WRITEMASK_Y),
+ brw_swizzle1(arg0_ud, 0),
+ brw_imm_ud((1<<23)-1));
+
+ brw_OR(p,
+ brw_writemask(tmp_ud, BRW_WRITEMASK_Y),
+ tmp_ud,
+ brw_imm_ud(127<<23));
+ }
+
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z) {
+ /* result[2] = result[0] + LOG2(result[1]); */
+
+ /* Why bother? The above is just a hint how to do this with a
+ * taylor series. Maybe we *should* use a taylor series as by
+ * the time all the above has been done it's almost certainly
+ * quicker than calling the mathbox, even with low precision.
+ *
+ * Options are:
+ * - result[0] + mathbox.LOG2(result[1])
+ * - mathbox.LOG2(arg0.x)
+ * - result[0] + inline_taylor_approx(result[1])
+ */
+ emit_math1(c,
+ BRW_MATH_FUNCTION_LOG,
+ brw_writemask(tmp, BRW_WRITEMASK_Z),
+ brw_swizzle1(tmp, 1),
+ BRW_MATH_PRECISION_FULL);
+
+ brw_ADD(p,
+ brw_writemask(tmp, BRW_WRITEMASK_Z),
+ brw_swizzle1(tmp, 2),
+ brw_swizzle1(tmp, 0));
+ }
+
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_W) {
+ /* result[3] = 1.0; */
+ brw_MOV(p, brw_writemask(tmp, BRW_WRITEMASK_W), brw_imm_f(1));
+ }
+
+ if (need_tmp) {
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+}
+
+
+/* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1
+ */
+static void emit_dst_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1)
+{
+ struct brw_compile *p = &c->func;
+
+ /* There must be a better way to do this:
+ */
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_X)
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_X), brw_imm_f(1.0));
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_Y)
+ brw_MUL(p, brw_writemask(dst, BRW_WRITEMASK_Y), arg0, arg1);
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z)
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_Z), arg0);
+ if (dst.dw1.bits.writemask & BRW_WRITEMASK_W)
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_W), arg1);
+}
+
+
+static void emit_xpd( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg t,
+ struct brw_reg u)
+{
+ brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3), brw_swizzle(u,2,0,1,3));
+ brw_MAC(p, dst, negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3));
+}
+
+
+static void emit_lit_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *if_insn;
+ struct brw_reg tmp = dst;
+ GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_YZ), brw_imm_f(0));
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_XW), brw_imm_f(1));
+
+ /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
+ * to get all channels active inside the IF. In the clipping code
+ * we run with NoMask, so it's not an option and we can use
+ * BRW_EXECUTE_1 for all comparisions.
+ */
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0));
+ if_insn = brw_IF(p, BRW_EXECUTE_8);
+ {
+ brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_Y), brw_swizzle1(arg0,0));
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0));
+ brw_MOV(p, brw_writemask(tmp, BRW_WRITEMASK_Z), brw_swizzle1(arg0,1));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ emit_math2(c,
+ BRW_MATH_FUNCTION_POW,
+ brw_writemask(dst, BRW_WRITEMASK_Z),
+ brw_swizzle1(tmp, 2),
+ brw_swizzle1(arg0, 3),
+ BRW_MATH_PRECISION_PARTIAL);
+ }
+
+ brw_ENDIF(p, if_insn);
+
+ release_tmp(c, tmp);
+}
+
+static void emit_lrp_noalias(struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ struct brw_reg arg2)
+{
+ struct brw_compile *p = &c->func;
+
+ brw_ADD(p, dst, negate(arg0), brw_imm_f(1.0));
+ brw_MUL(p, brw_null_reg(), dst, arg2);
+ brw_MAC(p, dst, arg0, arg1);
+}
+
+/** 3 or 4-component vector normalization */
+static void emit_nrm( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ int num_comps)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = get_tmp(c);
+
+ /* tmp = dot(arg0, arg0) */
+ if (num_comps == 3)
+ brw_DP3(p, tmp, arg0, arg0);
+ else
+ brw_DP4(p, tmp, arg0, arg0);
+
+ /* tmp = 1 / sqrt(tmp) */
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, tmp, tmp, BRW_MATH_PRECISION_FULL);
+
+ /* dst = arg0 * tmp */
+ brw_MUL(p, dst, arg0, tmp);
+
+ release_tmp(c, tmp);
+}
+
+
+static struct brw_reg
+get_constant(struct brw_vs_compile *c,
+ GLuint argIndex,
+ GLuint index,
+ GLboolean relAddr)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg const_reg;
+ struct brw_reg const2_reg;
+
+ assert(argIndex < 3);
+
+ if (c->current_const[argIndex].index != index || relAddr) {
+ struct brw_reg addrReg = c->regs[TGSI_FILE_ADDRESS][0];
+
+ c->current_const[argIndex].index = index;
+
+#if 0
+ printf(" fetch const[%d] for arg %d into reg %d\n",
+ src.Index, argIndex, c->current_const[argIndex].reg.nr);
+#endif
+ /* need to fetch the constant now */
+ brw_dp_READ_4_vs(p,
+ c->current_const[argIndex].reg,/* writeback dest */
+ 0, /* oword */
+ relAddr, /* relative indexing? */
+ addrReg, /* address register */
+ 16 * index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
+ );
+
+ if (relAddr) {
+ /* second read */
+ const2_reg = get_tmp(c);
+
+ /* use upper half of address reg for second read */
+ addrReg = stride(addrReg, 0, 4, 0);
+ addrReg.subnr = 16;
+
+ brw_dp_READ_4_vs(p,
+ const2_reg, /* writeback dest */
+ 1, /* oword */
+ relAddr, /* relative indexing? */
+ addrReg, /* address register */
+ 16 * index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER
+ );
+ }
+ }
+
+ const_reg = c->current_const[argIndex].reg;
+
+ if (relAddr) {
+ /* merge the two Owords into the constant register */
+ /* const_reg[7..4] = const2_reg[7..4] */
+ brw_MOV(p,
+ suboffset(stride(const_reg, 0, 4, 1), 4),
+ suboffset(stride(const2_reg, 0, 4, 1), 4));
+ release_tmp(c, const2_reg);
+ }
+ else {
+ /* replicate lower four floats into upper half (to get XYZWXYZW) */
+ const_reg = stride(const_reg, 0, 4, 0);
+ const_reg.subnr = 0;
+ }
+
+ return const_reg;
+}
+
+
+
+/* TODO: relative addressing!
+ */
+static struct brw_reg get_reg( struct brw_vs_compile *c,
+ enum tgsi_file_type file,
+ GLuint index )
+{
+ switch (file) {
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_OUTPUT:
+ case TGSI_FILE_CONSTANT:
+ assert(c->regs[file][index].nr != 0);
+ return c->regs[file][index];
+
+ case TGSI_FILE_ADDRESS:
+ assert(index == 0);
+ return c->regs[file][index];
+
+ case TGSI_FILE_NULL: /* undef values */
+ return brw_null_reg();
+
+ default:
+ assert(0);
+ return brw_null_reg();
+ }
+}
+
+
+/**
+ * Indirect addressing: get reg[[arg] + offset].
+ */
+static struct brw_reg deref( struct brw_vs_compile *c,
+ struct brw_reg arg,
+ GLint offset)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = vec4(get_tmp(c));
+ struct brw_reg addr_reg = c->regs[TGSI_FILE_ADDRESS][0];
+ struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
+ GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
+ struct brw_reg indirect = brw_vec4_indirect(0,0);
+
+ {
+ brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ /* This is pretty clunky - load the address register twice and
+ * fetch each 4-dword value in turn. There must be a way to do
+ * this in a single pass, but I couldn't get it to work.
+ */
+ brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
+ brw_MOV(p, tmp, indirect);
+
+ brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
+ brw_MOV(p, suboffset(tmp, 4), indirect);
+
+ brw_pop_insn_state(p);
+ }
+
+ /* NOTE: tmp not released */
+ return vec8(tmp);
+}
+
+
+/**
+ * Get brw reg corresponding to the instruction's [argIndex] src reg.
+ * TODO: relative addressing!
+ */
+static struct brw_reg
+get_src_reg( struct brw_vs_compile *c,
+ GLuint argIndex,
+ GLuint file,
+ GLint index,
+ GLboolean relAddr )
+{
+
+ switch (file) {
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_OUTPUT:
+ if (relAddr) {
+ return deref(c, c->regs[file][0], index);
+ }
+ else {
+ assert(c->regs[file][index].nr != 0);
+ return c->regs[file][index];
+ }
+
+ case TGSI_FILE_IMMEDIATE:
+ return c->regs[file][index];
+
+ case TGSI_FILE_CONSTANT:
+ if (c->vp->use_const_buffer) {
+ return get_constant(c, argIndex, index, relAddr);
+ }
+ else if (relAddr) {
+ return deref(c, c->regs[TGSI_FILE_CONSTANT][0], index);
+ }
+ else {
+ assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0);
+ return c->regs[TGSI_FILE_CONSTANT][index];
+ }
+ case TGSI_FILE_ADDRESS:
+ assert(index == 0);
+ return c->regs[file][index];
+
+ case TGSI_FILE_NULL:
+ /* this is a normal case since we loop over all three src args */
+ return brw_null_reg();
+
+ default:
+ assert(0);
+ return brw_null_reg();
+ }
+}
+
+
+static void emit_arl( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */
+ brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */
+
+ if (need_tmp)
+ release_tmp(c, tmp);
+}
+
+
+/**
+ * Return the brw reg for the given instruction's src argument.
+ */
+static struct brw_reg get_arg( struct brw_vs_compile *c,
+ const struct tgsi_full_src_register *src,
+ GLuint argIndex )
+{
+ struct brw_reg reg;
+
+ if (src->Register.File == TGSI_FILE_NULL)
+ return brw_null_reg();
+
+ reg = get_src_reg(c, argIndex,
+ src->Register.File,
+ src->Register.Index,
+ src->Register.Indirect);
+
+ /* Convert 3-bit swizzle to 2-bit.
+ */
+ reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->Register.SwizzleX,
+ src->Register.SwizzleY,
+ src->Register.SwizzleZ,
+ src->Register.SwizzleW);
+
+ reg.negate = src->Register.Negate ? 1 : 0;
+
+ /* XXX: abs, absneg
+ */
+
+ return reg;
+}
+
+
+/**
+ * Get brw register for the given program dest register.
+ */
+static struct brw_reg get_dst( struct brw_vs_compile *c,
+ unsigned file,
+ unsigned index,
+ unsigned writemask )
+{
+ struct brw_reg reg;
+
+ switch (file) {
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_OUTPUT:
+ assert(c->regs[file][index].nr != 0);
+ reg = c->regs[file][index];
+ break;
+ case TGSI_FILE_ADDRESS:
+ assert(index == 0);
+ reg = c->regs[file][index];
+ break;
+ case TGSI_FILE_NULL:
+ /* we may hit this for OPCODE_END, OPCODE_KIL, etc */
+ reg = brw_null_reg();
+ break;
+ default:
+ assert(0);
+ reg = brw_null_reg();
+ }
+
+ reg.dw1.bits.writemask = writemask;
+
+ return reg;
+}
+
+
+
+
+/**
+ * Post-vertex-program processing. Send the results to the URB.
+ */
+static void emit_vertex_write( struct brw_vs_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg m0 = brw_message_reg(0);
+ struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS];
+ struct brw_reg ndc;
+ int eot;
+ int i;
+ GLuint len_vertext_header = 2;
+
+ /* Build ndc coords */
+ ndc = get_tmp(c);
+ /* ndc = 1.0 / pos.w */
+ emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
+ /* ndc.xyz = pos * ndc */
+ brw_MUL(p, brw_writemask(ndc, BRW_WRITEMASK_XYZ), pos, ndc);
+
+ /* Update the header for point size, user clipping flags, and -ve rhw
+ * workaround.
+ */
+ if (c->prog_data.writes_psiz ||
+ c->key.nr_userclip ||
+ c->chipset.is_965)
+ {
+ struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ GLuint i;
+
+ brw_MOV(p, header1, brw_imm_ud(0));
+
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+ if (c->prog_data.writes_psiz) {
+ struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_PSIZ];
+ brw_MUL(p, brw_writemask(header1, BRW_WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
+ brw_AND(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
+ }
+
+ for (i = 0; i < c->key.nr_userclip; i++) {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
+ brw_OR(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(1<<i));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+
+ /* i965 clipping workaround:
+ * 1) Test for -ve rhw
+ * 2) If set,
+ * set ndc = (0,0,0,0)
+ * set ucp[6] = 1
+ *
+ * Later, clipping will detect ucp[6] and ensure the primitive is
+ * clipped against all fixed planes.
+ */
+ if (c->chipset.is_965) {
+ brw_CMP(p,
+ vec8(brw_null_reg()),
+ BRW_CONDITIONAL_L,
+ brw_swizzle1(ndc, 3),
+ brw_imm_f(0));
+
+ brw_OR(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(1<<6));
+ brw_MOV(p, ndc, brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+
+ brw_set_access_mode(p, BRW_ALIGN_1); /* why? */
+ brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1);
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+ release_tmp(c, header1);
+ }
+ else {
+ brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
+ }
+
+ /* Emit the (interleaved) headers for the two vertices - an 8-reg
+ * of zeros followed by two sets of NDC coordinates:
+ */
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, offset(m0, 2), ndc);
+
+ if (c->chipset.is_igdng) {
+ /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */
+ brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */
+ /* m4, m5 contain the distances from vertex to the user clip planeXXX.
+ * Seems it is useless for us.
+ * m6 is used for aligning, so that the remainder of vertex element is
+ * reg-aligned.
+ */
+ brw_MOV(p, offset(m0, 7), pos); /* the remainder of vertex element */
+ len_vertext_header = 6;
+ } else {
+ brw_MOV(p, offset(m0, 3), pos);
+ len_vertext_header = 2;
+ }
+
+ eot = (c->overflow_count == 0);
+
+ brw_urb_WRITE(p,
+ brw_null_reg(), /* dest */
+ 0, /* starting mrf reg nr */
+ c->r0, /* src */
+ 0, /* allocate */
+ 1, /* used */
+ MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */
+ 0, /* response len */
+ eot, /* eot */
+ eot, /* writes complete */
+ 0, /* urb destination offset */
+ BRW_URB_SWIZZLE_INTERLEAVE);
+
+ /* Not all of the vertex outputs/results fit into the MRF.
+ * Move the overflowed attributes from the GRF to the MRF and
+ * issue another brw_urb_WRITE().
+ */
+ for (i = 0; i < c->overflow_count; i += BRW_MAX_MRF) {
+ unsigned nr = MIN2(c->overflow_count - i, BRW_MAX_MRF);
+ GLuint j;
+
+ eot = (i + nr >= c->overflow_count);
+
+ /* XXX I'm not 100% sure about which MRF regs to use here. Starting
+ * at mrf[4] atm...
+ */
+ for (j = 0; j < nr; j++) {
+ brw_MOV(p, brw_message_reg(4+j),
+ brw_vec8_grf(c->overflow_grf_start + i + j, 0));
+ }
+
+ brw_urb_WRITE(p,
+ brw_null_reg(), /* dest */
+ 4, /* starting mrf reg nr */
+ c->r0, /* src */
+ 0, /* allocate */
+ 1, /* used */
+ nr+1, /* msg len */
+ 0, /* response len */
+ eot, /* eot */
+ eot, /* writes complete */
+ i-1, /* urb destination offset */
+ BRW_URB_SWIZZLE_INTERLEAVE);
+ }
+}
+
+
+/**
+ * Called after code generation to resolve subroutine calls and the
+ * END instruction.
+ * \param end_inst points to brw code for END instruction
+ * \param last_inst points to last instruction emitted before vertex write
+ */
+static void
+post_vs_emit( struct brw_vs_compile *c,
+ struct brw_instruction *end_inst,
+ struct brw_instruction *last_inst )
+{
+ GLint offset;
+
+ brw_resolve_cals(&c->func);
+
+ /* patch up the END code to jump past subroutines, etc */
+ offset = last_inst - end_inst;
+ if (offset > 1) {
+ brw_set_src1(end_inst, brw_imm_d(offset * 16));
+ } else {
+ end_inst->header.opcode = BRW_OPCODE_NOP;
+ }
+}
+
+static uint32_t
+get_predicate(const struct tgsi_full_instruction *inst)
+{
+ /* XXX: disabling for now
+ */
+#if 0
+ if (inst->dst.CondMask == COND_TR)
+ return BRW_PREDICATE_NONE;
+
+ /* All of GLSL only produces predicates for COND_NE and one channel per
+ * vector. Fail badly if someone starts doing something else, as it might
+ * mean infinite looping or something.
+ *
+ * We'd like to support all the condition codes, but our hardware doesn't
+ * quite match the Mesa IR, which is modeled after the NV extensions. For
+ * those, the instruction may update the condition codes or not, then any
+ * later instruction may use one of those condition codes. For gen4, the
+ * instruction may update the flags register based on one of the condition
+ * codes output by the instruction, and then further instructions may
+ * predicate on that. We can probably support this, but it won't
+ * necessarily be easy.
+ */
+/* assert(inst->dst.CondMask == COND_NE); */
+
+ switch (inst->dst.CondSwizzle) {
+ case SWIZZLE_XXXX:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_X;
+ case SWIZZLE_YYYY:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_Y;
+ case SWIZZLE_ZZZZ:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_Z;
+ case SWIZZLE_WWWW:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_W;
+ default:
+ debug_printf("Unexpected predicate: 0x%08x\n",
+ inst->dst.CondMask);
+ return BRW_PREDICATE_NORMAL;
+ }
+#else
+ return BRW_PREDICATE_NORMAL;
+#endif
+}
+
+static void emit_insn(struct brw_vs_compile *c,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned opcode = inst->Instruction.Opcode;
+ unsigned label = inst->Label.Label;
+ struct brw_compile *p = &c->func;
+ struct brw_reg args[3], dst;
+ GLuint i;
+
+#if 0
+ printf("%d: ", insn);
+ _mesa_print_instruction(inst);
+#endif
+
+ /* Get argument regs.
+ */
+ for (i = 0; i < 3; i++) {
+ args[i] = get_arg(c, &inst->Src[i], i);
+ }
+
+ /* Get dest regs. Note that it is possible for a reg to be both
+ * dst and arg, given the static allocation of registers. So
+ * care needs to be taken emitting multi-operation instructions.
+ */
+ dst = get_dst(c,
+ inst->Dst[0].Register.File,
+ inst->Dst[0].Register.Index,
+ inst->Dst[0].Register.WriteMask);
+
+ /* XXX: saturate
+ */
+ if (inst->Instruction.Saturate != TGSI_SAT_NONE) {
+ debug_printf("Unsupported saturate in vertex shader");
+ }
+
+ switch (opcode) {
+ case TGSI_OPCODE_ABS:
+ brw_MOV(p, dst, brw_abs(args[0]));
+ break;
+ case TGSI_OPCODE_ADD:
+ brw_ADD(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_COS:
+ emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_DP3:
+ brw_DP3(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_DP4:
+ brw_DP4(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_DPH:
+ brw_DPH(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_NRM:
+ emit_nrm(c, dst, args[0], 3);
+ break;
+ case TGSI_OPCODE_NRM4:
+ emit_nrm(c, dst, args[0], 4);
+ break;
+ case TGSI_OPCODE_DST:
+ unalias2(c, dst, args[0], args[1], emit_dst_noalias);
+ break;
+ case TGSI_OPCODE_EXP:
+ unalias1(c, dst, args[0], emit_exp_noalias);
+ break;
+ case TGSI_OPCODE_EX2:
+ emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_ARL:
+ emit_arl(c, dst, args[0]);
+ break;
+ case TGSI_OPCODE_FLR:
+ brw_RNDD(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_FRC:
+ brw_FRC(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ unalias1(c, dst, args[0], emit_log_noalias);
+ break;
+ case TGSI_OPCODE_LG2:
+ emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_LIT:
+ unalias1(c, dst, args[0], emit_lit_noalias);
+ break;
+ case TGSI_OPCODE_LRP:
+ unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias);
+ break;
+ case TGSI_OPCODE_MAD:
+ brw_MOV(p, brw_acc_reg(), args[2]);
+ brw_MAC(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_MAX:
+ emit_max(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_MIN:
+ emit_min(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_MOV:
+ brw_MOV(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_MUL:
+ brw_MUL(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_POW:
+ emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_RCP:
+ emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_RSQ:
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst,
+ brw_swizzle(args[0], 0,0,0,0), BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_SEQ:
+ emit_seq(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SIN:
+ emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_SNE:
+ emit_sne(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SGE:
+ emit_sge(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SGT:
+ emit_sgt(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SLT:
+ emit_slt(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SLE:
+ emit_sle(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SUB:
+ brw_ADD(p, dst, args[0], negate(args[1]));
+ break;
+ case TGSI_OPCODE_TRUNC:
+ /* round toward zero */
+ brw_RNDZ(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_XPD:
+ emit_xpd(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_IF:
+ assert(c->if_depth < MAX_IF_DEPTH);
+ c->if_inst[c->if_depth] = brw_IF(p, BRW_EXECUTE_8);
+ /* Note that brw_IF smashes the predicate_control field. */
+ c->if_inst[c->if_depth]->header.predicate_control = get_predicate(inst);
+ c->if_depth++;
+ break;
+ case TGSI_OPCODE_ELSE:
+ c->if_inst[c->if_depth-1] = brw_ELSE(p, c->if_inst[c->if_depth-1]);
+ break;
+ case TGSI_OPCODE_ENDIF:
+ assert(c->if_depth > 0);
+ brw_ENDIF(p, c->if_inst[--c->if_depth]);
+ break;
+ case TGSI_OPCODE_BGNLOOP:
+ c->loop_inst[c->loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
+ break;
+ case TGSI_OPCODE_BRK:
+ brw_set_predicate_control(p, get_predicate(inst));
+ brw_BREAK(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case TGSI_OPCODE_CONT:
+ brw_set_predicate_control(p, get_predicate(inst));
+ brw_CONT(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case TGSI_OPCODE_ENDLOOP:
+ {
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ c->loop_depth--;
+
+ if (c->chipset.is_igdng)
+ br = 2;
+
+ inst0 = inst1 = brw_WHILE(p, c->loop_inst[c->loop_depth]);
+ /* patch all the BREAK/CONT instructions from last BEGINLOOP */
+ while (inst0 > c->loop_inst[c->loop_depth]) {
+ inst0--;
+ if (inst0->header.opcode == TGSI_OPCODE_BRK) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ else if (inst0->header.opcode == TGSI_OPCODE_CONT) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ }
+ }
+ break;
+ case TGSI_OPCODE_BRA:
+ brw_set_predicate_control(p, get_predicate(inst));
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case TGSI_OPCODE_CAL:
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_ADD(p, deref_1d(c->stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_ADD(p, get_addr_reg(c->stack_index),
+ get_addr_reg(c->stack_index), brw_imm_d(4));
+ brw_save_call(p, label, p->nr_insn);
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ break;
+ case TGSI_OPCODE_RET:
+ brw_ADD(p, get_addr_reg(c->stack_index),
+ get_addr_reg(c->stack_index), brw_imm_d(-4));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, brw_ip_reg(), deref_1d(c->stack_index, 0));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ break;
+ case TGSI_OPCODE_END:
+ c->end_offset = p->nr_insn;
+ /* this instruction will get patched later to jump past subroutine
+ * code, etc.
+ */
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ break;
+ case TGSI_OPCODE_BGNSUB:
+ brw_save_label(p, p->nr_insn, p->nr_insn);
+ break;
+ case TGSI_OPCODE_ENDSUB:
+ /* no-op */
+ break;
+ default:
+ debug_printf("Unsupported opcode %i (%s) in vertex shader",
+ opcode,
+ tgsi_get_opcode_name(opcode));
+ }
+
+ /* Set the predication update on the last instruction of the native
+ * instruction sequence.
+ *
+ * This would be problematic if it was set on a math instruction,
+ * but that shouldn't be the case with the current GLSL compiler.
+ */
+#if 0
+ /* XXX: disabled
+ */
+ if (inst->CondUpdate) {
+ struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1];
+
+ assert(hw_insn->header.destreg__conditionalmod == 0);
+ hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ;
+ }
+#endif
+
+ release_tmps(c);
+}
+
+
+/* Emit the vertex program instructions here.
+ */
+void brw_vs_emit(struct brw_vs_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ const struct tgsi_token *tokens = c->vp->tokens;
+ struct brw_instruction *end_inst, *last_inst;
+ struct tgsi_parse_context parse;
+ struct tgsi_full_instruction *inst;
+
+ if (BRW_DEBUG & DEBUG_VS)
+ tgsi_dump(c->vp->tokens, 0);
+
+ c->stack_index = brw_indirect(0, 0);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+
+ /* Static register allocation
+ */
+ brw_vs_alloc_regs(c);
+
+ if (c->vp->has_flow_control) {
+ brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack));
+ }
+
+ /* Instructions
+ */
+ tgsi_parse_init( &parse, tokens );
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ inst = &parse.FullToken.FullInstruction;
+ emit_insn( c, inst );
+ break;
+
+ default:
+ assert( 0 );
+ }
+ }
+ tgsi_parse_free( &parse );
+
+ end_inst = &p->store[c->end_offset];
+ last_inst = &p->store[p->nr_insn];
+
+ /* The END instruction will be patched to jump to this code */
+ emit_vertex_write(c);
+
+ post_vs_emit(c, end_inst, last_inst);
+
+ if (BRW_DEBUG & DEBUG_VS) {
+ debug_printf("vs-native:\n");
+ brw_disasm(stderr, p->store, p->nr_insn);
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c
new file mode 100644
index 00000000000..dadbb622e4d
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs_state.c
@@ -0,0 +1,201 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+
+
+#include "brw_debug.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+struct brw_vs_unit_key {
+ unsigned int total_grf;
+ unsigned int urb_entry_read_length;
+ unsigned int curb_entry_read_length;
+
+ unsigned int curbe_offset;
+
+ unsigned int nr_urb_entries, urb_size;
+
+ unsigned int nr_surfaces;
+};
+
+static void
+vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
+{
+ memset(key, 0, sizeof(*key));
+
+ /* CACHE_NEW_VS_PROG */
+ key->total_grf = brw->vs.prog_data->total_grf;
+ key->urb_entry_read_length = brw->vs.prog_data->urb_read_length;
+ key->curb_entry_read_length = brw->vs.prog_data->curb_read_length;
+
+ /* BRW_NEW_URB_FENCE */
+ key->nr_urb_entries = brw->urb.nr_vs_entries;
+ key->urb_size = brw->urb.vsize;
+
+ /* BRW_NEW_NR_VS_SURFACES */
+ key->nr_surfaces = brw->vs.nr_surfaces;
+
+ /* PIPE_NEW_CLIP */
+ if (brw->curr.ucp.nr) {
+ /* Note that we read in the userclip planes as well, hence
+ * clip_start:
+ */
+ key->curbe_offset = brw->curbe.clip_start;
+ }
+ else {
+ key->curbe_offset = brw->curbe.vs_start;
+ }
+}
+
+static enum pipe_error
+vs_unit_create_from_key(struct brw_context *brw,
+ struct brw_vs_unit_key *key,
+ struct brw_winsys_reloc *reloc,
+ struct brw_winsys_buffer **bo_out)
+{
+ enum pipe_error ret;
+ struct brw_vs_unit_state vs;
+ int chipset_max_threads;
+
+ memset(&vs, 0, sizeof(vs));
+
+ vs.thread0.kernel_start_pointer = 0; /* reloc */
+ vs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
+ vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ /* Choosing multiple program flow means that we may get 2-vertex threads,
+ * which will have the channel mask for dwords 4-7 enabled in the thread,
+ * and those dwords will be written to the second URB handle when we
+ * brw_urb_WRITE() results.
+ */
+ vs.thread1.single_program_flow = 0;
+
+ if (BRW_IS_IGDNG(brw))
+ vs.thread1.binding_table_entry_count = 0; /* hardware requirement */
+ else
+ vs.thread1.binding_table_entry_count = key->nr_surfaces;
+
+ vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
+ vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+ vs.thread3.dispatch_grf_start_reg = 1;
+ vs.thread3.urb_entry_read_offset = 0;
+ vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+
+ if (BRW_IS_IGDNG(brw))
+ vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2;
+ else
+ vs.thread4.nr_urb_entries = key->nr_urb_entries;
+
+ vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
+
+ if (BRW_IS_IGDNG(brw))
+ chipset_max_threads = 72;
+ else if (BRW_IS_G4X(brw))
+ chipset_max_threads = 32;
+ else
+ chipset_max_threads = 16;
+
+ vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2,
+ 1, chipset_max_threads) - 1;
+
+ if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
+ vs.thread4.max_threads = 0;
+
+ /* No samplers for ARB_vp programs:
+ */
+ /* It has to be set to 0 for IGDNG
+ */
+ vs.vs5.sampler_count = 0;
+
+ if (BRW_DEBUG & DEBUG_STATS)
+ vs.thread4.stats_enable = 1;
+
+ /* Vertex program always enabled:
+ */
+ vs.vs6.vs_enable = 1;
+
+ ret = brw_upload_cache(&brw->cache, BRW_VS_UNIT,
+ key, sizeof(*key),
+ reloc, 1,
+ &vs, sizeof(vs),
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+static int prepare_vs_unit(struct brw_context *brw)
+{
+ struct brw_vs_unit_key key;
+ enum pipe_error ret;
+ struct brw_winsys_reloc reloc[1];
+ unsigned grf_reg_count;
+
+ vs_unit_populate_key(brw, &key);
+
+ grf_reg_count = (align(key.total_grf, 16) / 16 - 1);
+
+ /* Emit VS program relocation */
+ make_reloc(&reloc[0],
+ BRW_USAGE_STATE,
+ grf_reg_count << 1,
+ offsetof(struct brw_vs_unit_state, thread0),
+ brw->vs.prog_bo);
+
+
+ if (brw_search_cache(&brw->cache, BRW_VS_UNIT,
+ &key, sizeof(key),
+ reloc, 1,
+ NULL,
+ &brw->vs.state_bo))
+ return PIPE_OK;
+
+ ret = vs_unit_create_from_key(brw, &key, reloc, &brw->vs.state_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_vs_unit = {
+ .dirty = {
+ .mesa = (PIPE_NEW_CLIP),
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_NR_VS_SURFACES |
+ BRW_NEW_URB_FENCE),
+ .cache = CACHE_NEW_VS_PROG
+ },
+ .prepare = prepare_vs_unit,
+};
diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c
new file mode 100644
index 00000000000..177a5170d23
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs_surface_state.c
@@ -0,0 +1,232 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_winsys.h"
+
+/* XXX: disabled true constant buffer functionality
+ */
+
+
+/* Creates a new VS constant buffer reflecting the current VS program's
+ * constants, if needed by the VS program.
+ *
+ * Otherwise, constants go through the CURBEs using the brw_constant_buffer
+ * state atom.
+ */
+#if 0
+static struct brw_winsys_buffer *
+brw_vs_update_constant_buffer(struct brw_context *brw)
+{
+ /* XXX: true constant buffers
+ */
+ struct brw_vertex_program *vp =
+ (struct brw_vertex_program *) brw->vertex_program;
+ const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
+ const int size = params->NumParameters * 4 * sizeof(GLfloat);
+ drm_intel_bo *const_buffer;
+
+ /* BRW_NEW_VERTEX_PROGRAM */
+ if (!vp->use_const_buffer)
+ return NULL;
+
+ const_buffer = brw->sws->bo_alloc(brw->sws,
+ BRW_BUFFER_TYPE_SHADER_CONSTANTS,
+ size, 64);
+
+ /* _NEW_PROGRAM_CONSTANTS */
+ brw->sws->bo_subdata(const_buffer, 0, size, params->ParameterValues,
+ NULL, 0);
+
+ return const_buffer;
+}
+#endif
+
+/**
+ * Update the surface state for a VS constant buffer.
+ *
+ * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer.
+ */
+#if 0
+static void
+brw_update_vs_constant_surface( struct brw_context *brw,
+ GLuint surf)
+{
+ struct brw_surface_key key;
+ struct pipe_buffer *cb = brw->curr.vs_constants;
+ enum pipe_error ret;
+
+ assert(surf == 0);
+
+ /* If we're in this state update atom, we need to update VS constants, so
+ * free the old buffer and create a new one for the new contents.
+ */
+ ret = brw_vs_update_constant_buffer(brw, &vp->const_buffer);
+ if (ret)
+ return ret;
+
+ /* If there's no constant buffer, then no surface BO is needed to point at
+ * it.
+ */
+ if (vp->const_buffer == NULL) {
+ bo_reference(brw->vs.surf_bo[surf], NULL);
+ return PIPE_OK;
+ }
+
+ memset(&key, 0, sizeof(key));
+
+ key.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ key.bo = vp->const_buffer;
+ key.depthmode = GL_NONE;
+ key.pitch = params->NumParameters;
+ key.width = params->NumParameters;
+ key.height = 1;
+ key.depth = 1;
+ key.cpp = 16;
+
+ /*
+ printf("%s:\n", __FUNCTION__);
+ printf(" width %d height %d depth %d cpp %d pitch %d\n",
+ key.width, key.height, key.depth, key.cpp, key.pitch);
+ */
+
+ if (brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, key.bo ? 1 : 0,
+ NULL,
+ &brw->vs.surf_bo[surf]))
+ return PIPE_OK;
+
+ ret = brw_create_constant_surface(brw, &key
+ &brw->vs.surf_bo[surf]);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+#endif
+
+
+/**
+ * Constructs the binding table for the VS surface state.
+ */
+static enum pipe_error
+brw_vs_get_binding_table(struct brw_context *brw,
+ struct brw_winsys_buffer **bo_out)
+{
+#if 0
+ static GLuint data[BRW_VS_MAX_SURF]; /* always zero */
+ struct brw_winsys_reloc reloc[BRW_VS_MAX_SURF];
+ int i;
+
+ /* Emit binding table relocations to surface state */
+ for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+ make_reloc(&reloc[i],
+ BRW_USAGE_STATE,
+ 0,
+ i * 4,
+ brw->vs.surf_bo[i]);
+ }
+
+ ret = brw_cache_data( &brw->surface_cache,
+ BRW_SS_SURF_BIND,
+ NULL, 0,
+ reloc, nr_reloc,
+ data, sizeof data,
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+ FREE(data);
+ return PIPE_OK;
+#else
+ return PIPE_OK;
+#endif
+}
+
+/**
+ * Vertex shader surfaces (constant buffer).
+ *
+ * This consumes the state updates for the constant buffer needing
+ * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and
+ * CACHE_NEW_SURF_BIND for the binding table upload.
+ */
+static enum pipe_error prepare_vs_surfaces(struct brw_context *brw )
+{
+ enum pipe_error ret;
+
+#if 0
+ int i;
+ int nr_surfaces = 0;
+
+ brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER);
+
+ for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+ if (brw->vs.surf_bo[i] != NULL) {
+ nr_surfaces = i + 1;
+ }
+ }
+
+ if (brw->vs.nr_surfaces != nr_surfaces) {
+ brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
+ brw->vs.nr_surfaces = nr_surfaces;
+ }
+#endif
+
+ /* Note that we don't end up updating the bind_bo if we don't have a
+ * surface to be pointing at. This should be relatively harmless, as it
+ * just slightly increases our working set size.
+ */
+ if (brw->vs.nr_surfaces != 0) {
+ ret = brw_vs_get_binding_table(brw, &brw->vs.bind_bo);
+ if (ret)
+ return ret;
+ }
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_vs_surfaces = {
+ .dirty = {
+ .mesa = (PIPE_NEW_VERTEX_CONSTANTS |
+ PIPE_NEW_VERTEX_SHADER),
+ .brw = 0,
+ .cache = 0
+ },
+ .prepare = prepare_vs_surfaces,
+};
+
+
+
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
new file mode 100644
index 00000000000..a242e31218a
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -0,0 +1,309 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 Jakob Bornecrantz
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef BRW_WINSYS_H
+#define BRW_WINSYS_H
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_refcnt.h"
+
+struct brw_winsys;
+struct pipe_fence_handle;
+
+/* Not sure why the winsys needs this:
+ */
+#define BRW_BATCH_SIZE (32*1024)
+
+struct brw_winsys_screen;
+
+/* Need a tiny bit of information inside the abstract buffer struct:
+ */
+struct brw_winsys_buffer {
+ struct pipe_reference reference;
+ struct brw_winsys_screen *sws;
+ unsigned size;
+};
+
+
+/* Should be possible to validate usages above against buffer creation
+ * types, below:
+ */
+enum brw_buffer_type
+{
+ BRW_BUFFER_TYPE_TEXTURE,
+ BRW_BUFFER_TYPE_SCANOUT, /**< a texture used for scanning out from */
+ BRW_BUFFER_TYPE_VERTEX,
+ BRW_BUFFER_TYPE_CURBE,
+ BRW_BUFFER_TYPE_QUERY,
+ BRW_BUFFER_TYPE_SHADER_CONSTANTS,
+ BRW_BUFFER_TYPE_SHADER_SCRATCH,
+ BRW_BUFFER_TYPE_BATCH,
+ BRW_BUFFER_TYPE_GENERAL_STATE,
+ BRW_BUFFER_TYPE_SURFACE_STATE,
+ BRW_BUFFER_TYPE_PIXEL, /* image uploads, pbo's, etc */
+ BRW_BUFFER_TYPE_GENERIC, /* unknown */
+ BRW_BUFFER_TYPE_MAX /* Count of possible values */
+};
+
+
+/* Describe the usage of a particular buffer in a relocation. The DRM
+ * winsys will translate these back to GEM read/write domain flags.
+ */
+enum brw_buffer_usage {
+ BRW_USAGE_STATE, /* INSTRUCTION, 0 */
+ BRW_USAGE_QUERY_RESULT, /* INSTRUCTION, INSTRUCTION */
+ BRW_USAGE_RENDER_TARGET, /* RENDER, 0 */
+ BRW_USAGE_DEPTH_BUFFER, /* RENDER, RENDER */
+ BRW_USAGE_BLIT_SOURCE, /* RENDER, 0 */
+ BRW_USAGE_BLIT_DEST, /* RENDER, RENDER */
+ BRW_USAGE_SAMPLER, /* SAMPLER, 0 */
+ BRW_USAGE_VERTEX, /* VERTEX, 0 */
+ BRW_USAGE_SCRATCH, /* 0, 0 */
+ BRW_USAGE_MAX
+};
+
+enum brw_buffer_data_type {
+ BRW_DATA_GS_CC_VP,
+ BRW_DATA_GS_CC_UNIT,
+ BRW_DATA_GS_WM_PROG,
+ BRW_DATA_GS_SAMPLER_DEFAULT_COLOR,
+ BRW_DATA_GS_SAMPLER,
+ BRW_DATA_GS_WM_UNIT,
+ BRW_DATA_GS_SF_PROG,
+ BRW_DATA_GS_SF_VP,
+ BRW_DATA_GS_SF_UNIT,
+ BRW_DATA_GS_VS_UNIT,
+ BRW_DATA_GS_VS_PROG,
+ BRW_DATA_GS_GS_UNIT,
+ BRW_DATA_GS_GS_PROG,
+ BRW_DATA_GS_CLIP_VP,
+ BRW_DATA_GS_CLIP_UNIT,
+ BRW_DATA_GS_CLIP_PROG,
+ BRW_DATA_SS_SURFACE,
+ BRW_DATA_SS_SURF_BIND,
+ BRW_DATA_CONSTANT_BUFFER,
+ BRW_DATA_BATCH_BUFFER,
+ BRW_DATA_OTHER,
+ BRW_DATA_MAX
+};
+
+
+/* Matches the i915_drm definitions:
+ */
+#define BRW_TILING_NONE 0
+#define BRW_TILING_X 1
+#define BRW_TILING_Y 2
+
+
+/* Relocations to be applied with subdata in a call to sws->bo_subdata, below.
+ *
+ * Effectively this encodes:
+ *
+ * (unsigned *)(subdata + offset) = bo->offset + delta
+ */
+struct brw_winsys_reloc {
+ enum brw_buffer_usage usage; /* debug only */
+ unsigned delta;
+ unsigned offset;
+ struct brw_winsys_buffer *bo;
+};
+
+static INLINE void make_reloc(struct brw_winsys_reloc *reloc,
+ enum brw_buffer_usage usage,
+ unsigned delta,
+ unsigned offset,
+ struct brw_winsys_buffer *bo)
+{
+ reloc->usage = usage;
+ reloc->delta = delta;
+ reloc->offset = offset;
+ reloc->bo = bo; /* Note - note taking a reference yet */
+}
+
+
+
+struct brw_winsys_screen {
+
+
+ /**
+ * Buffer functions.
+ */
+
+ /*@{*/
+ /**
+ * Create a buffer.
+ */
+ enum pipe_error (*bo_alloc)(struct brw_winsys_screen *sws,
+ enum brw_buffer_type type,
+ unsigned size,
+ unsigned alignment,
+ struct brw_winsys_buffer **bo_out);
+
+ /* Destroy a buffer when our refcount goes to zero:
+ */
+ void (*bo_destroy)(struct brw_winsys_buffer *buffer);
+
+ /* delta -- added to b2->offset, and written into buffer
+ * offset -- location above value is written to within buffer
+ */
+ enum pipe_error (*bo_emit_reloc)(struct brw_winsys_buffer *buffer,
+ enum brw_buffer_usage usage,
+ unsigned delta,
+ unsigned offset,
+ struct brw_winsys_buffer *b2);
+
+ enum pipe_error (*bo_exec)(struct brw_winsys_buffer *buffer,
+ unsigned bytes_used);
+
+ enum pipe_error (*bo_subdata)(struct brw_winsys_buffer *buffer,
+ enum brw_buffer_data_type data_type,
+ size_t offset,
+ size_t size,
+ const void *data,
+ const struct brw_winsys_reloc *reloc,
+ unsigned nr_reloc );
+
+ boolean (*bo_is_busy)(struct brw_winsys_buffer *buffer);
+ boolean (*bo_references)(struct brw_winsys_buffer *a,
+ struct brw_winsys_buffer *b);
+
+ /* XXX: couldn't this be handled by returning true/false on
+ * bo_emit_reloc?
+ */
+ enum pipe_error (*check_aperture_space)(struct brw_winsys_screen *iws,
+ struct brw_winsys_buffer **buffers,
+ unsigned count);
+
+ /**
+ * Map a buffer.
+ */
+ void *(*bo_map)(struct brw_winsys_buffer *buffer,
+ enum brw_buffer_data_type data_type,
+ unsigned offset,
+ unsigned length,
+ boolean write,
+ boolean discard,
+ boolean flush_explicit);
+
+ void (*bo_flush_range)(struct brw_winsys_buffer *buffer,
+ unsigned offset,
+ unsigned length);
+
+ /**
+ * Unmap a buffer.
+ */
+ void (*bo_unmap)(struct brw_winsys_buffer *buffer);
+ /*@}*/
+
+
+ /* Wait for buffer to go idle. Similar to map+unmap, but doesn't
+ * mark buffer contents as dirty.
+ */
+ void (*bo_wait_idle)(struct brw_winsys_buffer *buffer);
+
+ /**
+ * Destroy the winsys.
+ */
+ void (*destroy)(struct brw_winsys_screen *iws);
+};
+
+static INLINE void *
+bo_map_read(struct brw_winsys_screen *sws, struct brw_winsys_buffer *buf)
+{
+ return sws->bo_map( buf,
+ BRW_DATA_OTHER,
+ 0, buf->size,
+ FALSE, FALSE, FALSE );
+}
+
+static INLINE void
+bo_reference(struct brw_winsys_buffer **ptr, struct brw_winsys_buffer *buf)
+{
+ struct brw_winsys_buffer *old_buf = *ptr;
+
+ if (pipe_reference(&(*ptr)->reference, &buf->reference))
+ old_buf->sws->bo_destroy(old_buf);
+
+ *ptr = buf;
+}
+
+
+/**
+ * Create brw pipe_screen.
+ */
+struct pipe_screen *brw_create_screen(struct brw_winsys_screen *iws, unsigned pci_id);
+
+/**
+ * Create a brw pipe_context.
+ */
+struct pipe_context *brw_create_context(struct pipe_screen *screen);
+
+/**
+ * Get the brw_winsys buffer backing the texture.
+ *
+ * TODO UGLY
+ */
+struct pipe_texture;
+boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture,
+ struct brw_winsys_buffer **buffer,
+ unsigned *stride);
+
+/**
+ * Wrap a brw_winsys buffer with a texture blanket.
+ *
+ * TODO UGLY
+ */
+struct pipe_texture *
+brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
+ const struct pipe_texture *template,
+ unsigned pitch,
+ unsigned tiling,
+ struct brw_winsys_buffer *buffer);
+
+
+/*************************************************************************
+ * Cooperative dumping between winsys and driver. TODO: make this
+ * driver-only by wrapping calls to winsys->bo_subdata().
+ */
+
+#ifdef DEBUG
+extern int BRW_DUMP;
+#else
+#define BRW_DUMP 0
+#endif
+
+#define DUMP_ASM 0x1
+#define DUMP_STATE 0x2
+#define DUMP_BATCH 0x4
+
+void brw_dump_data( unsigned pci_id,
+ enum brw_buffer_data_type data_type,
+ unsigned offset,
+ const void *data,
+ size_t size );
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_winsys_debug.c b/src/gallium/drivers/i965/brw_winsys_debug.c
new file mode 100644
index 00000000000..f8f6a539bc9
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_winsys_debug.c
@@ -0,0 +1,87 @@
+#include "brw_winsys.h"
+#include "brw_disasm.h"
+#include "brw_structs_dump.h"
+#include "brw_structs.h"
+#include "intel_decode.h"
+
+
+void brw_dump_data( unsigned pci_id,
+ enum brw_buffer_data_type data_type,
+ unsigned offset,
+ const void *data,
+ size_t size )
+{
+ if (BRW_DUMP & DUMP_ASM) {
+ switch (data_type) {
+ case BRW_DATA_GS_WM_PROG:
+ case BRW_DATA_GS_SF_PROG:
+ case BRW_DATA_GS_VS_PROG:
+ case BRW_DATA_GS_GS_PROG:
+ case BRW_DATA_GS_CLIP_PROG:
+ brw_disasm( stderr, data, size / sizeof(struct brw_instruction) );
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (BRW_DUMP & DUMP_STATE) {
+ switch (data_type) {
+ case BRW_DATA_GS_CC_VP:
+ brw_dump_cc_viewport( data );
+ break;
+ case BRW_DATA_GS_CC_UNIT:
+ brw_dump_cc_unit_state( data );
+ break;
+ case BRW_DATA_GS_SAMPLER_DEFAULT_COLOR:
+ brw_dump_sampler_default_color( data );
+ break;
+ case BRW_DATA_GS_SAMPLER:
+ brw_dump_sampler_state( data );
+ break;
+ case BRW_DATA_GS_WM_UNIT:
+ brw_dump_wm_unit_state( data );
+ break;
+ case BRW_DATA_GS_SF_VP:
+ brw_dump_sf_viewport( data );
+ break;
+ case BRW_DATA_GS_SF_UNIT:
+ brw_dump_sf_unit_state( data );
+ break;
+ case BRW_DATA_GS_VS_UNIT:
+ brw_dump_vs_unit_state( data );
+ break;
+ case BRW_DATA_GS_GS_UNIT:
+ brw_dump_gs_unit_state( data );
+ break;
+ case BRW_DATA_GS_CLIP_VP:
+ brw_dump_clipper_viewport( data );
+ break;
+ case BRW_DATA_GS_CLIP_UNIT:
+ brw_dump_clip_unit_state( data );
+ break;
+ case BRW_DATA_SS_SURFACE:
+ brw_dump_surface_state( data );
+ break;
+ case BRW_DATA_SS_SURF_BIND:
+ break;
+ case BRW_DATA_OTHER:
+ break;
+ case BRW_DATA_CONSTANT_BUFFER:
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (BRW_DUMP & DUMP_BATCH) {
+ switch (data_type) {
+ case BRW_DATA_BATCH_BUFFER:
+ intel_decode(data, size / 4, offset, pci_id);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
new file mode 100644
index 00000000000..fdf820a9aae
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -0,0 +1,319 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "tgsi/tgsi_info.h"
+
+#include "brw_context.h"
+#include "brw_screen.h"
+#include "brw_util.h"
+#include "brw_wm.h"
+#include "brw_state.h"
+#include "brw_debug.h"
+#include "brw_pipe_rast.h"
+
+
+/** Return number of src args for given instruction */
+GLuint brw_wm_nr_args( GLuint opcode )
+{
+ switch (opcode) {
+ case WM_FRONTFACING:
+ case WM_PIXELXY:
+ return 0;
+ case WM_CINTERP:
+ case WM_WPOSXY:
+ case WM_DELTAXY:
+ return 1;
+ case WM_LINTERP:
+ case WM_PIXELW:
+ return 2;
+ case WM_FB_WRITE:
+ case WM_PINTERP:
+ return 3;
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXP:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXD:
+ /* sampler arg is held as a field in the instruction, not in an
+ * actual register:
+ */
+ return tgsi_get_opcode_info(opcode)->num_src - 1;
+
+ default:
+ assert(opcode < MAX_OPCODE);
+ return tgsi_get_opcode_info(opcode)->num_src;
+ }
+}
+
+
+GLuint brw_wm_is_scalar_result( GLuint opcode )
+{
+ switch (opcode) {
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_POW:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_SIN:
+ case TGSI_OPCODE_DP3:
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_DPH:
+ case TGSI_OPCODE_DST:
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+
+/**
+ * Do GPU code generation for shaders without flow control. Shaders
+ * without flow control instructions can more readily be analysed for
+ * SSA-style optimizations.
+ */
+static void
+brw_wm_linear_shader_emit(struct brw_context *brw, struct brw_wm_compile *c)
+{
+ /* Augment fragment program. Add instructions for pre- and
+ * post-fragment-program tasks such as interpolation and fogging.
+ */
+ brw_wm_pass_fp(c);
+
+ /* Translate to intermediate representation. Build register usage
+ * chains.
+ */
+ brw_wm_pass0(c);
+
+ /* Dead code removal.
+ */
+ brw_wm_pass1(c);
+
+ /* Register allocation.
+ * Divide by two because we operate on 16 pixels at a time and require
+ * two GRF entries for each logical shader register.
+ */
+ c->grf_limit = BRW_WM_MAX_GRF / 2;
+
+ brw_wm_pass2(c);
+
+ /* how many general-purpose registers are used */
+ c->prog_data.total_grf = c->max_wm_grf;
+
+ /* Scratch space is used for register spilling */
+ if (c->last_scratch) {
+ c->prog_data.total_scratch = c->last_scratch + 0x40;
+ }
+ else {
+ c->prog_data.total_scratch = 0;
+ }
+
+ /* Emit GEN4 code.
+ */
+ brw_wm_emit(c);
+}
+
+
+/**
+ * All Mesa program -> GPU code generation goes through this function.
+ * Depending on the instructions used (i.e. flow control instructions)
+ * we'll use one of two code generators.
+ */
+static enum pipe_error do_wm_prog( struct brw_context *brw,
+ struct brw_fragment_shader *fp,
+ struct brw_wm_prog_key *key,
+ struct brw_winsys_buffer **bo_out)
+{
+ enum pipe_error ret;
+ struct brw_wm_compile *c;
+ const GLuint *program;
+ GLuint program_size;
+
+ if (brw->wm.compile_data == NULL) {
+ brw->wm.compile_data = MALLOC(sizeof(*brw->wm.compile_data));
+ if (!brw->wm.compile_data)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
+ c = brw->wm.compile_data;
+ memset(c, 0, sizeof *c);
+
+ c->key = *key;
+ c->fp = fp;
+ c->env_param = NULL; /*brw->intel.ctx.FragmentProgram.Parameters;*/
+
+ brw_init_compile(brw, &c->func);
+
+ /*
+ * Shader which use GLSL features such as flow control are handled
+ * differently from "simple" shaders.
+ */
+ if (fp->has_flow_control) {
+ c->dispatch_width = 8;
+ /* XXX: GLSL support
+ */
+ exit(1);
+ /* brw_wm_branching_shader_emit(brw, c); */
+ }
+ else {
+ c->dispatch_width = 16;
+ brw_wm_linear_shader_emit(brw, c);
+ }
+
+ if (BRW_DEBUG & DEBUG_WM)
+ debug_printf("\n");
+
+ /* get the program
+ */
+ ret = brw_get_program(&c->func, &program, &program_size);
+ if (ret)
+ return ret;
+
+ ret = brw_upload_cache( &brw->cache, BRW_WM_PROG,
+ &c->key, sizeof(c->key),
+ NULL, 0,
+ program, program_size,
+ &c->prog_data,
+ &brw->wm.prog_data,
+ bo_out );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+
+static void brw_wm_populate_key( struct brw_context *brw,
+ struct brw_wm_prog_key *key )
+{
+ unsigned lookup, line_aa;
+ unsigned i;
+
+ memset(key, 0, sizeof(*key));
+
+ /* PIPE_NEW_FRAGMENT_SHADER
+ * PIPE_NEW_DEPTH_STENCIL_ALPHA
+ */
+ lookup = (brw->curr.zstencil->iz_lookup |
+ brw->curr.fragment_shader->iz_lookup);
+
+
+ /* PIPE_NEW_RAST
+ * BRW_NEW_REDUCED_PRIMITIVE
+ */
+ switch (brw->reduced_primitive) {
+ case PIPE_PRIM_POINTS:
+ line_aa = AA_NEVER;
+ break;
+ case PIPE_PRIM_LINES:
+ line_aa = (brw->curr.rast->templ.line_smooth ?
+ AA_ALWAYS : AA_NEVER);
+ break;
+ default:
+ line_aa = brw->curr.rast->unfilled_aa_line;
+ break;
+ }
+
+ brw_wm_lookup_iz(line_aa,
+ lookup,
+ brw->curr.fragment_shader->uses_depth,
+ key);
+
+ /* PIPE_NEW_RAST */
+ key->flat_shade = brw->curr.rast->templ.flatshade;
+
+
+ /* PIPE_NEW_BOUND_TEXTURES */
+ for (i = 0; i < brw->curr.num_textures; i++) {
+ const struct brw_texture *tex = brw_texture(brw->curr.texture[i]);
+
+ if (tex->base.format == PIPE_FORMAT_YCBCR)
+ key->yuvtex_mask |= 1 << i;
+
+ if (tex->base.format == PIPE_FORMAT_YCBCR_REV)
+ key->yuvtex_swap_mask |= 1 << i;
+
+ /* XXX: shadow texture
+ */
+ /* key->shadowtex_mask |= 1<<i; */
+ }
+
+ /* CACHE_NEW_VS_PROG */
+ key->vp_nr_outputs = brw->vs.prog_data->nr_outputs;
+
+ key->nr_cbufs = brw->curr.fb.nr_cbufs;
+
+ key->nr_inputs = brw->curr.fragment_shader->info.num_inputs;
+
+ /* The unique fragment program ID */
+ key->program_string_id = brw->curr.fragment_shader->id;
+}
+
+
+static enum pipe_error brw_prepare_wm_prog(struct brw_context *brw)
+{
+ struct brw_wm_prog_key key;
+ struct brw_fragment_shader *fs = brw->curr.fragment_shader;
+ enum pipe_error ret;
+
+ brw_wm_populate_key(brw, &key);
+
+ /* Make an early check for the key.
+ */
+ if (brw_search_cache(&brw->cache, BRW_WM_PROG,
+ &key, sizeof(key),
+ NULL, 0,
+ &brw->wm.prog_data,
+ &brw->wm.prog_bo))
+ return PIPE_OK;
+
+ ret = do_wm_prog(brw, fs, &key, &brw->wm.prog_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+const struct brw_tracked_state brw_wm_prog = {
+ .dirty = {
+ .mesa = (PIPE_NEW_FRAGMENT_SHADER |
+ PIPE_NEW_DEPTH_STENCIL_ALPHA |
+ PIPE_NEW_RAST |
+ PIPE_NEW_NR_CBUFS |
+ PIPE_NEW_BOUND_TEXTURES),
+ .brw = (BRW_NEW_WM_INPUT_DIMENSIONS |
+ BRW_NEW_REDUCED_PRIMITIVE),
+ .cache = CACHE_NEW_VS_PROG,
+ },
+ .prepare = brw_prepare_wm_prog
+};
+
diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
new file mode 100644
index 00000000000..f1ca9f63696
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -0,0 +1,344 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_WM_H
+#define BRW_WM_H
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define SATURATE (1<<5)
+
+/* A big lookup table is used to figure out which and how many
+ * additional regs will inserted before the main payload in the WM
+ * program execution. These mainly relate to depth and stencil
+ * processing and the early-depth-test optimization.
+ */
+#define IZ_PS_KILL_ALPHATEST_BIT 0x1
+#define IZ_PS_COMPUTES_DEPTH_BIT 0x2
+#define IZ_DEPTH_WRITE_ENABLE_BIT 0x4
+#define IZ_DEPTH_TEST_ENABLE_BIT 0x8
+#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10
+#define IZ_STENCIL_TEST_ENABLE_BIT 0x20
+#define IZ_BIT_MAX 0x40
+
+#define AA_NEVER 0
+#define AA_SOMETIMES 1
+#define AA_ALWAYS 2
+
+struct brw_wm_prog_key {
+ GLuint source_depth_reg:3;
+ GLuint aa_dest_stencil_reg:3;
+ GLuint dest_depth_reg:3;
+ GLuint nr_depth_regs:3;
+ GLuint computes_depth:1;
+ GLuint source_depth_to_render_target:1;
+ GLuint flat_shade:1;
+ GLuint runtime_check_aads_emit:1;
+
+ GLuint shadowtex_mask:16;
+ GLuint yuvtex_mask:16;
+ GLuint yuvtex_swap_mask:16; /* UV swaped */
+
+ GLuint vp_nr_outputs:6;
+ GLuint nr_inputs:6;
+ GLuint nr_cbufs:3;
+ GLuint has_flow_control:1;
+
+ GLuint program_string_id;
+};
+
+
+/* A bit of a glossary:
+ *
+ * brw_wm_value: A computed value or program input. Values are
+ * constant, they are created once and are never modified. When a
+ * fragment program register is written or overwritten, new values are
+ * created fresh, preserving the rule that values are constant.
+ *
+ * brw_wm_ref: A reference to a value. Wherever a value used is by an
+ * instruction or as a program output, that is tracked with an
+ * instance of this struct. All references to a value occur after it
+ * is created. After the last reference, a value is dead and can be
+ * discarded.
+ *
+ * brw_wm_grf: Represents a physical hardware register. May be either
+ * empty or hold a value. Register allocation is the process of
+ * assigning values to grf registers. This occurs in pass2 and the
+ * brw_wm_grf struct is not used before that.
+ *
+ * Fragment program registers: These are time-varying constructs that
+ * are hard to reason about and which we translate away in pass0. A
+ * single fragment program register element (eg. temp[0].x) will be
+ * translated to one or more brw_wm_value structs, one for each time
+ * that temp[0].x is written to during the program.
+ */
+
+
+
+/* Used in pass2 to track register allocation.
+ */
+struct brw_wm_grf {
+ struct brw_wm_value *value;
+ GLuint nextuse;
+};
+
+struct brw_wm_value {
+ struct brw_reg hw_reg; /* emitted to this reg, may not always be there */
+ struct brw_wm_ref *lastuse;
+ struct brw_wm_grf *resident;
+ GLuint contributes_to_output:1;
+ GLuint spill_slot:16; /* if non-zero, spill immediately after calculation */
+};
+
+struct brw_wm_ref {
+ struct brw_reg hw_reg; /* nr filled in in pass2, everything else, pass0 */
+ struct brw_wm_value *value;
+ struct brw_wm_ref *prevuse;
+ GLuint unspill_reg:7; /* unspill to reg */
+ GLuint emitted:1;
+ GLuint insn:24;
+};
+
+struct brw_wm_instruction {
+ struct brw_wm_value *dst[4];
+ struct brw_wm_ref *src[3][4];
+ GLuint opcode:8;
+ GLuint saturate:1;
+ GLuint writemask:4;
+ GLuint sampler:4;
+ GLuint tex_unit:4; /* texture/sampler unit for texture instructions */
+ GLuint target:4; /* TGSI_TEXTURE_x for texture instructions,
+ * target binding table index for FB_WRITE
+ */
+ GLuint eot:1; /* End of thread indicator for FB_WRITE*/
+};
+
+
+#define BRW_WM_MAX_INSN 2048
+#define BRW_WM_MAX_GRF 128 /* hardware limit */
+#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4)
+#define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12)
+#define BRW_WM_MAX_PARAM 256
+#define BRW_WM_MAX_CONST 256
+#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS
+#define BRW_WM_MAX_SUBROUTINE 16
+
+
+/* New opcodes to track internal operations required for WM unit.
+ * These are added early so that the registers used can be tracked,
+ * freed and reused like those of other instructions.
+ */
+#define MAX_OPCODE TGSI_OPCODE_LAST
+#define WM_PIXELXY (MAX_OPCODE)
+#define WM_DELTAXY (MAX_OPCODE + 1)
+#define WM_PIXELW (MAX_OPCODE + 2)
+#define WM_LINTERP (MAX_OPCODE + 3)
+#define WM_PINTERP (MAX_OPCODE + 4)
+#define WM_CINTERP (MAX_OPCODE + 5)
+#define WM_WPOSXY (MAX_OPCODE + 6)
+#define WM_FB_WRITE (MAX_OPCODE + 7)
+#define WM_FRONTFACING (MAX_OPCODE + 8)
+#define MAX_WM_OPCODE (MAX_OPCODE + 9)
+
+#define BRW_FILE_PAYLOAD (TGSI_FILE_COUNT)
+#define PAYLOAD_DEPTH (PIPE_MAX_SHADER_INPUTS) /* ?? */
+
+#define X 0
+#define Y 1
+#define Z 2
+#define W 3
+
+
+struct brw_fp_src {
+ unsigned file:4;
+ unsigned index:16;
+ unsigned swizzle:8;
+ unsigned indirect:1;
+ unsigned negate:1;
+ unsigned abs:1;
+};
+
+struct brw_fp_dst {
+ unsigned file:4;
+ unsigned index:16;
+ unsigned writemask:4;
+ unsigned indirect:1;
+ unsigned saturate:1;
+};
+
+struct brw_fp_instruction {
+ struct brw_fp_dst dst;
+ struct brw_fp_src src[3];
+ unsigned opcode:8;
+ unsigned target:8; /* XXX: special usage for FB_WRITE */
+ unsigned tex_unit:4;
+ unsigned sampler:4;
+ unsigned pad:8;
+};
+
+
+struct brw_wm_compile {
+ struct brw_compile func;
+ struct brw_wm_prog_key key;
+ struct brw_wm_prog_data prog_data;
+
+ struct brw_fragment_shader *fp;
+
+ GLfloat (*env_param)[4];
+
+ enum {
+ START,
+ PASS2_DONE
+ } state;
+
+ /* Initial pass - translate fp instructions to fp instructions,
+ * simplifying and adding instructions for interpolation and
+ * framebuffer writes.
+ */
+ struct {
+ GLfloat v[4];
+ unsigned nr;
+ } immediate[BRW_WM_MAX_CONST+3];
+ GLuint nr_immediates;
+
+ struct brw_fp_instruction fp_instructions[BRW_WM_MAX_INSN];
+ GLuint nr_fp_insns;
+ GLuint fp_temp;
+ GLuint fp_interp_emitted;
+ GLuint fp_fragcolor_emitted;
+ GLuint fp_first_internal_temp;
+
+ struct brw_fp_src fp_pixel_xy;
+ struct brw_fp_src fp_delta_xy;
+ struct brw_fp_src fp_pixel_w;
+
+
+ /* Subsequent passes using SSA representation:
+ */
+ struct brw_wm_value vreg[BRW_WM_MAX_VREG];
+ GLuint nr_vreg;
+
+ struct brw_wm_value creg[BRW_WM_MAX_PARAM];
+ GLuint nr_creg;
+
+ struct {
+ struct brw_wm_value depth[4]; /* includes r0/r1 */
+ struct brw_wm_value input_interp[PIPE_MAX_SHADER_INPUTS];
+ } payload;
+
+
+ const struct brw_wm_ref *pass0_fp_reg[BRW_FILE_PAYLOAD+1][256][4];
+
+ struct brw_wm_ref undef_ref;
+ struct brw_wm_value undef_value;
+
+ struct brw_wm_ref refs[BRW_WM_MAX_REF];
+ GLuint nr_refs;
+
+ struct brw_wm_instruction instruction[BRW_WM_MAX_INSN];
+ GLuint nr_insns;
+
+ struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2];
+
+ GLuint grf_limit;
+ GLuint max_wm_grf;
+ GLuint last_scratch;
+
+ GLuint cur_inst; /**< index of current instruction */
+
+ GLboolean out_of_regs; /**< ran out of GRF registers? */
+
+ /** Mapping from Mesa registers to hardware registers */
+ struct {
+ GLboolean inited;
+ struct brw_reg reg;
+ } wm_regs[BRW_FILE_PAYLOAD+1][256][4];
+
+ GLboolean used_grf[BRW_WM_MAX_GRF];
+ GLuint first_free_grf;
+ struct brw_reg stack;
+ struct brw_reg emit_mask_reg;
+ GLuint tmp_regs[BRW_WM_MAX_GRF];
+ GLuint tmp_index;
+ GLuint tmp_max;
+ GLuint subroutines[BRW_WM_MAX_SUBROUTINE];
+ GLuint dispatch_width;
+
+ /** we may need up to 3 constants per instruction (if use_const_buffer) */
+ struct {
+ GLint index;
+ struct brw_reg reg;
+ } current_const[3];
+
+ GLuint error;
+};
+
+
+GLuint brw_wm_nr_args( GLuint opcode );
+GLuint brw_wm_is_scalar_result( GLuint opcode );
+
+int brw_wm_pass_fp( struct brw_wm_compile *c );
+void brw_wm_pass0( struct brw_wm_compile *c );
+void brw_wm_pass1( struct brw_wm_compile *c );
+void brw_wm_pass2( struct brw_wm_compile *c );
+void brw_wm_emit( struct brw_wm_compile *c );
+
+void brw_wm_print_value( struct brw_wm_compile *c,
+ struct brw_wm_value *value );
+
+void brw_wm_print_ref( struct brw_wm_compile *c,
+ struct brw_wm_ref *ref );
+
+void brw_wm_print_insn( struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst );
+
+void brw_wm_print_program( struct brw_wm_compile *c,
+ const char *stage );
+
+void brw_wm_print_fp_program( struct brw_wm_compile *c,
+ const char *stage );
+
+void brw_wm_lookup_iz( GLuint line_aa,
+ GLuint lookup,
+ GLboolean ps_uses_depth,
+ struct brw_wm_prog_key *key );
+
+void brw_wm_branching_shader_emit(struct brw_context *brw, struct brw_wm_compile *c);
+
+void emit_ddxy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLboolean is_ddx,
+ const struct brw_reg *arg0);
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_wm_constant_buffer.c b/src/gallium/drivers/i965/brw_wm_constant_buffer.c
new file mode 100644
index 00000000000..6434c6acf73
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_constant_buffer.c
@@ -0,0 +1,165 @@
+/* XXX: Constant buffers disabled
+ */
+
+
+/**
+ * Create the constant buffer surface. Vertex/fragment shader constants will be
+ * read from this buffer with Data Port Read instructions/messages.
+ */
+enum pipe_error
+brw_create_constant_surface( struct brw_context *brw,
+ struct brw_surface_key *key,
+ struct brw_winsys_buffer **bo_out )
+{
+ const GLint w = key->width - 1;
+ struct brw_winsys_buffer *bo;
+ struct brw_winsys_reloc reloc[1];
+ enum pipe_error ret;
+
+ /* Emit relocation to surface contents */
+ make_reloc(&reloc[0],
+ BRW_USAGE_SAMPLER,
+ 0,
+ offsetof(struct brw_surface_state, ss1),
+ key->bo);
+
+
+ memset(&surf, 0, sizeof(surf));
+
+ surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+ surf.ss0.surface_type = BRW_SURFACE_BUFFER;
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+ surf.ss1.base_addr = 0; /* reloc */
+
+ surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */
+ surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */
+ surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */
+ surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
+ brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
+
+ ret = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
+ key, sizeof(*key),
+ reloc, Elements(reloc),
+ &surf, sizeof(surf),
+ NULL, NULL,
+ &bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+
+/**
+ * Update the surface state for a WM constant buffer.
+ * The constant buffer will be (re)allocated here if needed.
+ */
+static enum pipe_error
+brw_update_wm_constant_surface( struct brw_context *brw,
+ GLuint surf)
+{
+ struct brw_surface_key key;
+ struct brw_fragment_shader *fp = brw->curr.fragment_shader;
+ struct pipe_buffer *cbuf = brw->curr.fragment_constants;
+ int pitch = cbuf->size / (4 * sizeof(float));
+ enum pipe_error ret;
+
+ /* If we're in this state update atom, we need to update WM constants, so
+ * free the old buffer and create a new one for the new contents.
+ */
+ ret = brw_wm_update_constant_buffer(brw, &fp->const_buffer);
+ if (ret)
+ return ret;
+
+ /* If there's no constant buffer, then no surface BO is needed to point at
+ * it.
+ */
+ if (cbuf == NULL) {
+ bo_reference(&brw->wm.surf_bo[surf], NULL);
+ return PIPE_OK;
+ }
+
+ memset(&key, 0, sizeof(key));
+
+ key.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+ key.ss0.surface_type = BRW_SURFACE_BUFFER;
+ key.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+ key.bo = brw_buffer(cbuf)->bo;
+
+ key.ss2.width = (pitch-1) & 0x7f; /* bits 6:0 of size or width */
+ key.ss2.height = ((pitch-1) >> 7) & 0x1fff; /* bits 19:7 of size or width */
+ key.ss3.depth = ((pitch-1) >> 20) & 0x7f; /* bits 26:20 of size or width */
+ key.ss3.pitch = (pitch * 4 * sizeof(float)) - 1; /* ignored?? */
+ brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
+
+
+ /*
+ printf("%s:\n", __FUNCTION__);
+ printf(" width %d height %d depth %d cpp %d pitch %d\n",
+ key.width, key.height, key.depth, key.cpp, key.pitch);
+ */
+
+ if (brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, 1,
+ NULL,
+ &brw->wm.surf_bo[surf]))
+ return PIPE_OK;
+
+ ret = brw_create_constant_surface(brw, &key, &brw->wm.surf_bo[surf]);
+ if (ret)
+ return ret;
+
+ brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+ return PIPE_OK;
+}
+
+/**
+ * Updates surface / buffer for fragment shader constant buffer, if
+ * one is required.
+ *
+ * This consumes the state updates for the constant buffer, and produces
+ * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for
+ * inclusion in the binding table.
+ */
+static enum pipe_error prepare_wm_constant_surface(struct brw_context *brw )
+{
+ struct brw_fragment_program *fp =
+ (struct brw_fragment_program *) brw->fragment_program;
+ GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
+
+ ret = brw_wm_update_constant_buffer(brw,
+ &fp->const_buffer);
+ if (ret)
+ return ret;
+
+ /* If there's no constant buffer, then no surface BO is needed to point at
+ * it.
+ */
+ if (fp->const_buffer == 0) {
+ if (brw->wm.surf_bo[surf] != NULL) {
+ bo_reference(&brw->wm.surf_bo[surf], NULL);
+ brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+ }
+ return PIPE_OK;
+ }
+
+ ret = brw_update_wm_constant_surface(ctx, surf);
+ if (ret)
+ return ret;
+
+ return PIPE_OK
+}
+
+const struct brw_tracked_state brw_wm_constant_surface = {
+ .dirty = {
+ .mesa = (_NEW_PROGRAM_CONSTANTS),
+ .brw = (BRW_NEW_FRAGMENT_PROGRAM),
+ .cache = 0
+ },
+ .prepare = prepare_wm_constant_surface,
+};
diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c
new file mode 100644
index 00000000000..3d11fa074cc
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_debug.c
@@ -0,0 +1,256 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "tgsi/tgsi_info.h"
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+static void print_writemask( unsigned writemask )
+{
+ if (writemask != BRW_WRITEMASK_XYZW)
+ debug_printf(".%s%s%s%s",
+ (writemask & BRW_WRITEMASK_X) ? "x" : "",
+ (writemask & BRW_WRITEMASK_Y) ? "y" : "",
+ (writemask & BRW_WRITEMASK_Z) ? "z" : "",
+ (writemask & BRW_WRITEMASK_W) ? "w" : "");
+}
+
+static void print_swizzle( unsigned swizzle )
+{
+ char *swz = "xyzw";
+ if (swizzle != BRW_SWIZZLE_XYZW)
+ debug_printf(".%c%c%c%c",
+ swz[BRW_GET_SWZ(swizzle, X)],
+ swz[BRW_GET_SWZ(swizzle, Y)],
+ swz[BRW_GET_SWZ(swizzle, Z)],
+ swz[BRW_GET_SWZ(swizzle, W)]);
+}
+
+static void print_opcode( unsigned opcode )
+{
+ switch (opcode) {
+ case WM_PIXELXY:
+ debug_printf("PIXELXY");
+ break;
+ case WM_DELTAXY:
+ debug_printf("DELTAXY");
+ break;
+ case WM_PIXELW:
+ debug_printf("PIXELW");
+ break;
+ case WM_WPOSXY:
+ debug_printf("WPOSXY");
+ break;
+ case WM_PINTERP:
+ debug_printf("PINTERP");
+ break;
+ case WM_LINTERP:
+ debug_printf("LINTERP");
+ break;
+ case WM_CINTERP:
+ debug_printf("CINTERP");
+ break;
+ case WM_FB_WRITE:
+ debug_printf("FB_WRITE");
+ break;
+ case WM_FRONTFACING:
+ debug_printf("FRONTFACING");
+ break;
+ default:
+ debug_printf("%s", tgsi_get_opcode_info(opcode)->mnemonic);
+ break;
+ }
+}
+
+void brw_wm_print_value( struct brw_wm_compile *c,
+ struct brw_wm_value *value )
+{
+ assert(value);
+ if (c->state >= PASS2_DONE)
+ brw_print_reg(value->hw_reg);
+ else if( value == &c->undef_value )
+ debug_printf("undef");
+ else if( value - c->vreg >= 0 &&
+ value - c->vreg < BRW_WM_MAX_VREG)
+ debug_printf("r%d", value - c->vreg);
+ else if (value - c->creg >= 0 &&
+ value - c->creg < BRW_WM_MAX_PARAM)
+ debug_printf("c%d", value - c->creg);
+ else if (value - c->payload.input_interp >= 0 &&
+ value - c->payload.input_interp < PIPE_MAX_SHADER_INPUTS)
+ debug_printf("i%d", value - c->payload.input_interp);
+ else if (value - c->payload.depth >= 0 &&
+ value - c->payload.depth < PIPE_MAX_SHADER_INPUTS)
+ debug_printf("d%d", value - c->payload.depth);
+ else
+ debug_printf("?");
+}
+
+void brw_wm_print_ref( struct brw_wm_compile *c,
+ struct brw_wm_ref *ref )
+{
+ struct brw_reg hw_reg = ref->hw_reg;
+
+ if (ref->unspill_reg)
+ debug_printf("UNSPILL(%x)/", ref->value->spill_slot);
+
+ if (c->state >= PASS2_DONE)
+ brw_print_reg(ref->hw_reg);
+ else {
+ debug_printf("%s", hw_reg.negate ? "-" : "");
+ debug_printf("%s", hw_reg.abs ? "abs/" : "");
+ brw_wm_print_value(c, ref->value);
+ if ((hw_reg.nr&1) || hw_reg.subnr) {
+ debug_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr);
+ }
+ }
+}
+
+void brw_wm_print_insn( struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst )
+{
+ GLuint i, arg;
+ GLuint nr_args = brw_wm_nr_args(inst->opcode);
+
+ debug_printf("[");
+ for (i = 0; i < 4; i++) {
+ if (inst->dst[i]) {
+ brw_wm_print_value(c, inst->dst[i]);
+ if (inst->dst[i]->spill_slot)
+ debug_printf("/SPILL(%x)",inst->dst[i]->spill_slot);
+ }
+ else
+ debug_printf("#");
+ if (i < 3)
+ debug_printf(",");
+ }
+ debug_printf("]");
+ print_writemask(inst->writemask);
+
+ debug_printf(" = ");
+ print_opcode(inst->opcode);
+
+ if (inst->saturate)
+ debug_printf("_SAT");
+
+ for (arg = 0; arg < nr_args; arg++) {
+
+ debug_printf(" [");
+
+ for (i = 0; i < 4; i++) {
+ if (inst->src[arg][i]) {
+ brw_wm_print_ref(c, inst->src[arg][i]);
+ }
+ else
+ debug_printf("%%");
+
+ if (i < 3)
+ debug_printf(",");
+ else
+ debug_printf("]");
+ }
+ }
+ debug_printf("\n");
+}
+
+void brw_wm_print_program( struct brw_wm_compile *c,
+ const char *stage )
+{
+ GLuint insn;
+
+ debug_printf("%s:\n", stage);
+ for (insn = 0; insn < c->nr_insns; insn++)
+ brw_wm_print_insn(c, &c->instruction[insn]);
+ debug_printf("\n");
+}
+
+static const char *file_strings[TGSI_FILE_COUNT+1] = {
+ "NULL",
+ "CONST",
+ "IN",
+ "OUT",
+ "TEMP",
+ "SAMPLER",
+ "ADDR",
+ "IMM",
+ "LOOP",
+ "PAYLOAD"
+};
+
+static void brw_wm_print_fp_insn( struct brw_wm_compile *c,
+ struct brw_fp_instruction *inst )
+{
+ GLuint i;
+ GLuint nr_args = brw_wm_nr_args(inst->opcode);
+
+ print_opcode(inst->opcode);
+ if (inst->dst.saturate)
+ debug_printf("_SAT");
+ debug_printf(" ");
+
+ if (inst->dst.indirect)
+ debug_printf("[");
+
+ debug_printf("%s[%d]",
+ file_strings[inst->dst.file],
+ inst->dst.index );
+ print_writemask(inst->dst.writemask);
+
+ if (inst->dst.indirect)
+ debug_printf("]");
+
+ debug_printf(nr_args ? ", " : "\n");
+
+ for (i = 0; i < nr_args; i++) {
+ debug_printf("%s%s%s[%d]%s",
+ inst->src[i].negate ? "-" : "",
+ inst->src[i].abs ? "ABS(" : "",
+ file_strings[inst->src[i].file],
+ inst->src[i].index,
+ inst->src[i].abs ? ")" : "");
+ print_swizzle(inst->src[i].swizzle);
+ debug_printf("%s", i == nr_args - 1 ? "\n" : ", ");
+ }
+}
+
+
+void brw_wm_print_fp_program( struct brw_wm_compile *c,
+ const char *stage )
+{
+ GLuint insn;
+
+ debug_printf("%s:\n", stage);
+ for (insn = 0; insn < c->nr_fp_insns; insn++)
+ brw_wm_print_fp_insn(c, &c->fp_instructions[insn]);
+ debug_printf("\n");
+}
+
diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
new file mode 100644
index 00000000000..8f983a60ae8
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -0,0 +1,1521 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+#include "tgsi/tgsi_info.h"
+
+#include "brw_context.h"
+#include "brw_wm.h"
+#include "brw_debug.h"
+#include "brw_disasm.h"
+
+/* Not quite sure how correct this is - need to understand horiz
+ * vs. vertical strides a little better.
+ */
+static INLINE struct brw_reg sechalf( struct brw_reg reg )
+{
+ if (reg.vstride)
+ reg.nr++;
+ return reg;
+}
+
+/* Payload R0:
+ *
+ * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 quads,
+ * corresponding to each of the 16 execution channels.
+ * R0.1..8 -- ?
+ * R1.0 -- triangle vertex 0.X
+ * R1.1 -- triangle vertex 0.Y
+ * R1.2 -- quad 0 x,y coords (2 packed uwords)
+ * R1.3 -- quad 1 x,y coords (2 packed uwords)
+ * R1.4 -- quad 2 x,y coords (2 packed uwords)
+ * R1.5 -- quad 3 x,y coords (2 packed uwords)
+ * R1.6 -- ?
+ * R1.7 -- ?
+ * R1.8 -- ?
+ */
+
+
+static void emit_pixel_xy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask)
+{
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+ struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ /* Calculate pixel centers by adding 1 or 0 to each of the
+ * micro-tile coordinates passed in r1.
+ */
+ if (mask & BRW_WRITEMASK_X) {
+ brw_ADD(p,
+ vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
+ stride(suboffset(r1_uw, 4), 2, 4, 0),
+ brw_imm_v(0x10101010));
+ }
+
+ if (mask & BRW_WRITEMASK_Y) {
+ brw_ADD(p,
+ vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
+ stride(suboffset(r1_uw,5), 2, 4, 0),
+ brw_imm_v(0x11001100));
+ }
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+}
+
+
+
+static void emit_delta_xy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
+{
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+
+ /* Calc delta X,Y by subtracting origin in r1 from the pixel
+ * centers.
+ */
+ if (mask & BRW_WRITEMASK_X) {
+ brw_ADD(p,
+ dst[0],
+ retype(arg0[0], BRW_REGISTER_TYPE_UW),
+ negate(r1));
+ }
+
+ if (mask & BRW_WRITEMASK_Y) {
+ brw_ADD(p,
+ dst[1],
+ retype(arg0[1], BRW_REGISTER_TYPE_UW),
+ negate(suboffset(r1,1)));
+
+ }
+}
+
+static void emit_wpos_xy(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
+{
+ struct brw_compile *p = &c->func;
+
+ if (mask & BRW_WRITEMASK_X) {
+ /* X' = X */
+ brw_MOV(p,
+ dst[0],
+ retype(arg0[0], BRW_REGISTER_TYPE_W));
+ }
+
+ /* XXX: is this needed any more, or is this a NOOP?
+ */
+ if (mask & BRW_WRITEMASK_Y) {
+#if 0
+ /* Y' = height - 1 - Y */
+ brw_ADD(p,
+ dst[1],
+ negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
+ brw_imm_d(c->key.drawable_height - 1));
+#else
+ brw_MOV(p,
+ dst[0],
+ retype(arg0[0], BRW_REGISTER_TYPE_W));
+#endif
+ }
+}
+
+
+static void emit_pixel_w( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas)
+{
+ /* Don't need this if all you are doing is interpolating color, for
+ * instance.
+ */
+ if (mask & BRW_WRITEMASK_W) {
+ struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
+
+ /* Calc 1/w - just linterp wpos[3] optimized by putting the
+ * result straight into a message reg.
+ */
+ brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
+ brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
+
+ /* Calc w */
+ brw_math_16( p, dst[3],
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 2, brw_null_reg(),
+ BRW_MATH_PRECISION_FULL);
+ }
+}
+
+
+
+static void emit_linterp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas )
+{
+ struct brw_reg interp[4];
+ GLuint nr = arg0[0].nr;
+ GLuint i;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+ brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+ }
+ }
+}
+
+
+static void emit_pinterp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas,
+ const struct brw_reg *w)
+{
+ struct brw_reg interp[4];
+ GLuint nr = arg0[0].nr;
+ GLuint i;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+ brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+ }
+ }
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MUL(p, dst[i], dst[i], w[3]);
+ }
+ }
+}
+
+
+static void emit_cinterp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0 )
+{
+ struct brw_reg interp[4];
+ GLuint nr = arg0[0].nr;
+ GLuint i;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
+ }
+ }
+}
+
+/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
+static void emit_frontfacing( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask )
+{
+ struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
+ GLuint i;
+
+ if (!(mask & BRW_WRITEMASK_XYZW))
+ return;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MOV(p, dst[i], brw_imm_f(0.0));
+ }
+ }
+
+ /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
+ * us front face
+ */
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MOV(p, dst[i], brw_imm_f(1.0));
+ }
+ }
+ brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
+ * looking like:
+ *
+ * arg0: q0.tl q0.tr q0.bl q0.br q1.tl q1.tr q1.bl q1.br
+ *
+ * and we're trying to produce:
+ *
+ * DDX DDY
+ * dst: (q0.tr - q0.tl) (q0.tl - q0.bl)
+ * (q0.tr - q0.tl) (q0.tr - q0.br)
+ * (q0.br - q0.bl) (q0.tl - q0.bl)
+ * (q0.br - q0.bl) (q0.tr - q0.br)
+ * (q1.tr - q1.tl) (q1.tl - q1.bl)
+ * (q1.tr - q1.tl) (q1.tr - q1.br)
+ * (q1.br - q1.bl) (q1.tl - q1.bl)
+ * (q1.br - q1.bl) (q1.tr - q1.br)
+ *
+ * and add two more quads if in 16-pixel dispatch mode.
+ *
+ * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
+ * for each pair, and vertstride = 2 jumps us 2 elements after processing a
+ * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
+ * between each other. We could probably do it like ddx and swizzle the right
+ * order later, but bail for now and just produce
+ * ((q0.tl - q0.bl)x4 (q1.tl - q1.bl)x4)
+ */
+void emit_ddxy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLboolean is_ddx,
+ const struct brw_reg *arg0)
+{
+ int i;
+ struct brw_reg src0, src1;
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 1);
+ for (i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ if (is_ddx) {
+ src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
+ src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
+ } else {
+ src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
+ src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
+ }
+ brw_ADD(p, dst[i], src0, negate(src1));
+ }
+ }
+ if (mask & SATURATE)
+ brw_set_saturate(p, 0);
+}
+
+static void emit_alu1( struct brw_compile *p,
+ struct brw_instruction *(*func)(struct brw_compile *,
+ struct brw_reg,
+ struct brw_reg),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0 )
+{
+ GLuint i;
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 1);
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ func(p, dst[i], arg0[i]);
+ }
+ }
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_alu2( struct brw_compile *p,
+ struct brw_instruction *(*func)(struct brw_compile *,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ GLuint i;
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 1);
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ func(p, dst[i], arg0[i], arg1[i]);
+ }
+ }
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_mad( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2 )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MUL(p, dst[i], arg0[i], arg1[i]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_ADD(p, dst[i], dst[i], arg2[i]);
+ brw_set_saturate(p, 0);
+ }
+ }
+}
+
+static void emit_trunc( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_RNDZ(p, dst[i], arg0[i]);
+ }
+ }
+}
+
+static void emit_lrp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2 )
+{
+ GLuint i;
+
+ /* Uses dst as a temporary:
+ */
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ /* Can I use the LINE instruction for this?
+ */
+ brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
+ brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MAC(p, dst[i], arg0[i], arg1[i]);
+ brw_set_saturate(p, 0);
+ }
+ }
+}
+
+static void emit_sop( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLuint cond,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MOV(p, dst[i], brw_imm_f(0));
+ brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
+ brw_MOV(p, dst[i], brw_imm_f(1.0));
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+}
+
+static void emit_slt( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
+}
+
+static void emit_sle( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
+}
+
+static void emit_sgt( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
+}
+
+static void emit_sge( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
+}
+
+static void emit_seq( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
+}
+
+static void emit_sne( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
+}
+
+static void emit_cmp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2 )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg2[i]);
+ brw_set_saturate(p, 0);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg1[i]);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+}
+
+static void emit_max( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg0[i]);
+ brw_set_saturate(p, 0);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg1[i]);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+}
+
+static void emit_min( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg1[i]);
+ brw_set_saturate(p, 0);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg0[i]);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+}
+
+
+static void emit_dp3( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+
+ if (!(mask & BRW_WRITEMASK_XYZW))
+ return; /* Do not emit dead code */
+
+ assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+
+ brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+ brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_dp4( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+
+ if (!(mask & BRW_WRITEMASK_XYZW))
+ return; /* Do not emit dead code */
+
+ assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+
+ brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+ brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+ brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_dph( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ const int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+
+ if (!(mask & BRW_WRITEMASK_XYZW))
+ return; /* Do not emit dead code */
+
+ assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+
+ brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+ brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+ brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_xpd( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ GLuint i;
+
+ assert((mask & BRW_WRITEMASK_W) != BRW_WRITEMASK_W);
+
+ for (i = 0 ; i < 3; i++) {
+ if (mask & (1<<i)) {
+ GLuint i2 = (i+2)%3;
+ GLuint i1 = (i+1)%3;
+
+ brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
+ brw_set_saturate(p, 0);
+ }
+ }
+}
+
+
+static void emit_math1( struct brw_compile *p,
+ GLuint function,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0 )
+{
+ int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+
+ if (!(mask & BRW_WRITEMASK_XYZW))
+ return; /* Do not emit dead code */
+
+ assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+
+ brw_MOV(p, brw_message_reg(2), arg0[0]);
+
+ /* Send two messages to perform all 16 operations:
+ */
+ brw_math_16(p,
+ dst[dst_chan],
+ function,
+ (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_PRECISION_FULL);
+}
+
+
+static void emit_math2( struct brw_compile *p,
+ GLuint function,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
+{
+ int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+
+ if (!(mask & BRW_WRITEMASK_XYZW))
+ return; /* Do not emit dead code */
+
+ assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+
+ brw_push_insn_state(p);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, brw_message_reg(2), arg0[0]);
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, brw_message_reg(3), arg1[0]);
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
+
+
+ /* Send two messages to perform all 16 operations:
+ */
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math(p,
+ dst[dst_chan],
+ function,
+ (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p,
+ offset(dst[dst_chan],1),
+ function,
+ (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 4,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ brw_pop_insn_state(p);
+}
+
+
+
+static void emit_tex( struct brw_wm_compile *c,
+ const struct brw_wm_instruction *inst,
+ struct brw_reg *dst,
+ GLuint dst_flags,
+ struct brw_reg *coord,
+ GLuint sampler)
+{
+ struct brw_compile *p = &c->func;
+ GLuint msgLength, responseLength;
+ GLuint i, nr;
+ GLuint emit;
+ GLuint msg_type;
+ GLboolean shadow = FALSE;
+
+ /* How many input regs are there?
+ */
+ switch (inst->target) {
+ case TGSI_TEXTURE_1D:
+ emit = BRW_WRITEMASK_X;
+ nr = 1;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ emit = BRW_WRITEMASK_XW;
+ nr = 4;
+ shadow = TRUE;
+ break;
+ case TGSI_TEXTURE_2D:
+ emit = BRW_WRITEMASK_XY;
+ nr = 2;
+ break;
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ emit = BRW_WRITEMASK_XYW;
+ nr = 4;
+ shadow = TRUE;
+ break;
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ emit = BRW_WRITEMASK_XYZ;
+ nr = 3;
+ break;
+ default:
+ /* unexpected target */
+ abort();
+ }
+
+ msgLength = 1;
+
+ for (i = 0; i < nr; i++) {
+ static const GLuint swz[4] = {0,1,2,2};
+ if (emit & (1<<i))
+ brw_MOV(p, brw_message_reg(msgLength+1), coord[swz[i]]);
+ else
+ brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
+ msgLength += 2;
+ }
+
+ responseLength = 8; /* always */
+
+ if (BRW_IS_IGDNG(p->brw)) {
+ if (shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
+ } else {
+ if (shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
+ }
+
+ brw_SAMPLE(p,
+ retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
+ 1,
+ retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
+ BTI_TEXTURE(inst->tex_unit),
+ sampler, /* sampler index */
+ inst->writemask,
+ msg_type,
+ responseLength,
+ msgLength,
+ 0,
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD16);
+}
+
+
+static void emit_txb( struct brw_wm_compile *c,
+ const struct brw_wm_instruction *inst,
+ struct brw_reg *dst,
+ GLuint dst_flags,
+ struct brw_reg *coord,
+ GLuint sampler )
+{
+ struct brw_compile *p = &c->func;
+ GLuint msgLength;
+ GLuint msg_type;
+ /* Shadow ignored for txb.
+ */
+ switch (inst->target) {
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
+ brw_MOV(p, brw_message_reg(2), coord[0]);
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+ break;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ brw_MOV(p, brw_message_reg(2), coord[0]);
+ brw_MOV(p, brw_message_reg(4), coord[1]);
+ brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+ break;
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ brw_MOV(p, brw_message_reg(2), coord[0]);
+ brw_MOV(p, brw_message_reg(4), coord[1]);
+ brw_MOV(p, brw_message_reg(6), coord[2]);
+ break;
+ default:
+ /* unexpected target */
+ abort();
+ }
+
+ brw_MOV(p, brw_message_reg(8), coord[3]);
+ msgLength = 9;
+
+ if (BRW_IS_IGDNG(p->brw))
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+
+ brw_SAMPLE(p,
+ retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
+ 1,
+ retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
+ BTI_TEXTURE(inst->tex_unit),
+ sampler, /* sampler index */
+ inst->writemask,
+ msg_type,
+ 8, /* responseLength */
+ msgLength,
+ 0,
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD16);
+}
+
+
+static void emit_lit( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0 )
+{
+ assert((mask & BRW_WRITEMASK_XW) == 0);
+
+ if (mask & BRW_WRITEMASK_Y) {
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[1], arg0[0]);
+ brw_set_saturate(p, 0);
+ }
+
+ if (mask & BRW_WRITEMASK_Z) {
+ emit_math2(p, BRW_MATH_FUNCTION_POW,
+ &dst[2],
+ BRW_WRITEMASK_X | (mask & SATURATE),
+ &arg0[1],
+ &arg0[3]);
+ }
+
+ /* Ordinarily you'd use an iff statement to skip or shortcircuit
+ * some of the POW calculations above, but 16-wide iff statements
+ * seem to lock c1 hardware, so this is a nasty workaround:
+ */
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
+ {
+ if (mask & BRW_WRITEMASK_Y)
+ brw_MOV(p, dst[1], brw_imm_f(0));
+
+ if (mask & BRW_WRITEMASK_Z)
+ brw_MOV(p, dst[2], brw_imm_f(0));
+ }
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+
+/* Kill pixel - set execution mask to zero for those pixels which
+ * fail.
+ */
+static void emit_kil( struct brw_wm_compile *c,
+ struct brw_reg *arg0)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+ GLuint i;
+
+ /* XXX - usually won't need 4 compares!
+ */
+ for (i = 0; i < 4; i++) {
+ brw_push_insn_state(p);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
+ brw_set_predicate_control_flag_value(p, 0xff);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_AND(p, r0uw, brw_flag_reg(), r0uw);
+ brw_pop_insn_state(p);
+ }
+}
+
+/* KILLP kills the pixels that are currently executing, not based on a test
+ * of the arguments.
+ */
+static void emit_killp( struct brw_wm_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
+ brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
+ brw_pop_insn_state(p);
+}
+
+static void fire_fb_write( struct brw_wm_compile *c,
+ GLuint base_reg,
+ GLuint nr,
+ GLuint target,
+ GLuint eot )
+{
+ struct brw_compile *p = &c->func;
+
+ /* Pass through control information:
+ */
+/* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
+ {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p,
+ brw_message_reg(base_reg + 1),
+ brw_vec8_grf(1, 0));
+ brw_pop_insn_state(p);
+ }
+
+ /* Send framebuffer write message: */
+/* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
+ brw_fb_WRITE(p,
+ retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+ base_reg,
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+ target,
+ nr,
+ 0,
+ eot);
+}
+
+
+static void emit_aa( struct brw_wm_compile *c,
+ struct brw_reg *arg1,
+ GLuint reg )
+{
+ struct brw_compile *p = &c->func;
+ GLuint comp = c->key.aa_dest_stencil_reg / 2;
+ GLuint off = c->key.aa_dest_stencil_reg % 2;
+ struct brw_reg aa = offset(arg1[comp], off);
+
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
+ brw_MOV(p, brw_message_reg(reg), aa);
+ brw_pop_insn_state(p);
+}
+
+
+/* Post-fragment-program processing. Send the results to the
+ * framebuffer.
+ * \param arg0 the fragment color
+ * \param arg1 the pass-through depth value
+ * \param arg2 the shader-computed depth value
+ */
+static void emit_fb_write( struct brw_wm_compile *c,
+ struct brw_reg *arg0,
+ struct brw_reg *arg1,
+ struct brw_reg *arg2,
+ GLuint target,
+ GLuint eot)
+{
+ struct brw_compile *p = &c->func;
+ GLuint nr = 2;
+ GLuint channel;
+
+ /* Reserve a space for AA - may not be needed:
+ */
+ if (c->key.aa_dest_stencil_reg)
+ nr += 1;
+
+ /* I don't really understand how this achieves the color interleave
+ * (ie RGBARGBA) in the result: [Do the saturation here]
+ */
+ {
+ brw_push_insn_state(p);
+
+ for (channel = 0; channel < 4; channel++) {
+ /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
+ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p,
+ brw_message_reg(nr + channel),
+ arg0[channel]);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p,
+ brw_message_reg(nr + channel + 4),
+ sechalf(arg0[channel]));
+ }
+
+ /* skip over the regs populated above:
+ */
+ nr += 8;
+
+ brw_pop_insn_state(p);
+ }
+
+ if (c->key.source_depth_to_render_target)
+ {
+ if (c->key.computes_depth)
+ brw_MOV(p, brw_message_reg(nr), arg2[2]);
+ else
+ brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
+
+ nr += 2;
+ }
+
+ if (c->key.dest_depth_reg)
+ {
+ GLuint comp = c->key.dest_depth_reg / 2;
+ GLuint off = c->key.dest_depth_reg % 2;
+
+ if (off != 0) {
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
+ /* 2nd half? */
+ brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
+ brw_pop_insn_state(p);
+ }
+ else {
+ brw_MOV(p, brw_message_reg(nr), arg1[comp]);
+ }
+ nr += 2;
+ }
+
+ if (!c->key.runtime_check_aads_emit) {
+ if (c->key.aa_dest_stencil_reg)
+ emit_aa(c, arg1, 2);
+
+ fire_fb_write(c, 0, nr, target, eot);
+ }
+ else {
+ struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+ struct brw_reg ip = brw_ip_reg();
+ struct brw_instruction *jmp;
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p,
+ v1_null_ud,
+ get_element_ud(brw_vec8_grf(1,0), 6),
+ brw_imm_ud(1<<26));
+
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+ {
+ emit_aa(c, arg1, 2);
+ fire_fb_write(c, 0, nr, target, eot);
+ /* note - thread killed in subroutine */
+ }
+ brw_land_fwd_jump(p, jmp);
+
+ /* ELSE: Shuffle up one register to fill in the hole left for AA:
+ */
+ fire_fb_write(c, 1, nr-1, target, eot);
+ }
+}
+
+
+/**
+ * Move a GPR to scratch memory.
+ */
+static void emit_spill( struct brw_wm_compile *c,
+ struct brw_reg reg,
+ GLuint slot )
+{
+ struct brw_compile *p = &c->func;
+
+ /*
+ mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
+ */
+ brw_MOV(p, brw_message_reg(2), reg);
+
+ /*
+ mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
+ send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
+ */
+ brw_dp_WRITE_16(p,
+ retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
+ slot);
+}
+
+
+/**
+ * Load a GPR from scratch memory.
+ */
+static void emit_unspill( struct brw_wm_compile *c,
+ struct brw_reg reg,
+ GLuint slot )
+{
+ struct brw_compile *p = &c->func;
+
+ /* Slot 0 is the undef value.
+ */
+ if (slot == 0) {
+ brw_MOV(p, reg, brw_imm_f(0));
+ return;
+ }
+
+ /*
+ mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
+ send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
+ */
+
+ brw_dp_READ_16(p,
+ retype(vec16(reg), BRW_REGISTER_TYPE_UW),
+ slot);
+}
+
+
+/**
+ * Retrieve up to 4 GEN4 register pairs for the given wm reg:
+ * Args with unspill_reg != 0 will be loaded from scratch memory.
+ */
+static void get_argument_regs( struct brw_wm_compile *c,
+ struct brw_wm_ref *arg[],
+ struct brw_reg *regs )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (arg[i]) {
+ if (arg[i]->unspill_reg)
+ emit_unspill(c,
+ brw_vec8_grf(arg[i]->unspill_reg, 0),
+ arg[i]->value->spill_slot);
+
+ regs[i] = arg[i]->hw_reg;
+ }
+ else {
+ regs[i] = brw_null_reg();
+ }
+ }
+}
+
+
+/**
+ * For values that have a spill_slot!=0, write those regs to scratch memory.
+ */
+static void spill_values( struct brw_wm_compile *c,
+ struct brw_wm_value *values,
+ GLuint nr )
+{
+ GLuint i;
+
+ for (i = 0; i < nr; i++)
+ if (values[i].spill_slot)
+ emit_spill(c, values[i].hw_reg, values[i].spill_slot);
+}
+
+
+/* Emit the fragment program instructions here.
+ */
+void brw_wm_emit( struct brw_wm_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ GLuint insn;
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
+ /* Check if any of the payload regs need to be spilled:
+ */
+ spill_values(c, c->payload.depth, 4);
+ spill_values(c, c->creg, c->nr_creg);
+ spill_values(c, c->payload.input_interp, PIPE_MAX_SHADER_INPUTS);
+
+
+ for (insn = 0; insn < c->nr_insns; insn++) {
+
+ struct brw_wm_instruction *inst = &c->instruction[insn];
+ struct brw_reg args[3][4], dst[4];
+ GLuint i, dst_flags;
+
+ /* Get argument regs:
+ */
+ for (i = 0; i < 3; i++)
+ get_argument_regs(c, inst->src[i], args[i]);
+
+ /* Get dest regs:
+ */
+ for (i = 0; i < 4; i++)
+ if (inst->dst[i])
+ dst[i] = inst->dst[i]->hw_reg;
+ else
+ dst[i] = brw_null_reg();
+
+ /* Flags
+ */
+ dst_flags = inst->writemask;
+ if (inst->saturate)
+ dst_flags |= SATURATE;
+
+ switch (inst->opcode) {
+ /* Generated instructions for calculating triangle interpolants:
+ */
+ case WM_PIXELXY:
+ emit_pixel_xy(p, dst, dst_flags);
+ break;
+
+ case WM_DELTAXY:
+ emit_delta_xy(p, dst, dst_flags, args[0]);
+ break;
+
+ case WM_WPOSXY:
+ emit_wpos_xy(c, dst, dst_flags, args[0]);
+ break;
+
+ case WM_PIXELW:
+ emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case WM_LINTERP:
+ emit_linterp(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case WM_PINTERP:
+ emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
+ break;
+
+ case WM_CINTERP:
+ emit_cinterp(p, dst, dst_flags, args[0]);
+ break;
+
+ case WM_FB_WRITE:
+ emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
+ break;
+
+ case WM_FRONTFACING:
+ emit_frontfacing(p, dst, dst_flags);
+ break;
+
+ /* Straightforward arithmetic:
+ */
+ case TGSI_OPCODE_ADD:
+ emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_FRC:
+ emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_FLR:
+ emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_DDX:
+ emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
+ break;
+
+ case TGSI_OPCODE_DDY:
+ emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
+ break;
+
+ case TGSI_OPCODE_DP3:
+ emit_dp3(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_DP4:
+ emit_dp4(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_DPH:
+ emit_dph(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_TRUNC:
+ emit_trunc(p, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_LRP:
+ emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
+ break;
+
+ case TGSI_OPCODE_MAD:
+ emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
+ break;
+
+ case TGSI_OPCODE_MOV:
+ emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_MUL:
+ emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_XPD:
+ emit_xpd(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ /* Higher math functions:
+ */
+ case TGSI_OPCODE_RCP:
+ emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_RSQ:
+ emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_SIN:
+ emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_COS:
+ emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_EX2:
+ emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_LG2:
+ emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
+ break;
+
+ case TGSI_OPCODE_SCS:
+ /* There is an scs math function, but it would need some
+ * fixup for 16-element execution.
+ */
+ if (dst_flags & BRW_WRITEMASK_X)
+ emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]);
+ if (dst_flags & BRW_WRITEMASK_Y)
+ emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]);
+ break;
+
+ case TGSI_OPCODE_POW:
+ emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
+ break;
+
+ /* Comparisons:
+ */
+ case TGSI_OPCODE_CMP:
+ emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
+ break;
+
+ case TGSI_OPCODE_MAX:
+ emit_max(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_MIN:
+ emit_min(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_SLT:
+ emit_slt(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_SLE:
+ emit_sle(p, dst, dst_flags, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SGT:
+ emit_sgt(p, dst, dst_flags, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SGE:
+ emit_sge(p, dst, dst_flags, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SEQ:
+ emit_seq(p, dst, dst_flags, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SNE:
+ emit_sne(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case TGSI_OPCODE_LIT:
+ emit_lit(p, dst, dst_flags, args[0]);
+ break;
+
+ /* Texturing operations:
+ */
+ case TGSI_OPCODE_TEX:
+ emit_tex(c, inst, dst, dst_flags, args[0], inst->sampler);
+ break;
+
+ case TGSI_OPCODE_TXB:
+ emit_txb(c, inst, dst, dst_flags, args[0], inst->sampler);
+ break;
+
+ case TGSI_OPCODE_KIL:
+ emit_kil(c, args[0]);
+ break;
+
+ case TGSI_OPCODE_KILP:
+ emit_killp(c);
+ break;
+
+ default:
+ debug_printf("Unsupported opcode %i (%s) in fragment shader\n",
+ inst->opcode,
+ tgsi_get_opcode_info(inst->opcode)->mnemonic);
+ }
+
+ for (i = 0; i < 4; i++)
+ if (inst->dst[i] && inst->dst[i]->spill_slot)
+ emit_spill(c,
+ inst->dst[i]->hw_reg,
+ inst->dst[i]->spill_slot);
+ }
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ debug_printf("wm-native:\n");
+ brw_disasm(stderr, p->store, p->nr_insn);
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
new file mode 100644
index 00000000000..9c5b527f897
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -0,0 +1,1224 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_shader_tokens.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_util.h"
+
+#include "brw_wm.h"
+#include "brw_util.h"
+#include "brw_debug.h"
+
+
+/***********************************************************************
+ * Source regs
+ */
+
+static struct brw_fp_src src_reg(GLuint file, GLuint idx)
+{
+ struct brw_fp_src reg;
+ reg.file = file;
+ reg.index = idx;
+ reg.swizzle = BRW_SWIZZLE_XYZW;
+ reg.indirect = 0;
+ reg.negate = 0;
+ reg.abs = 0;
+ return reg;
+}
+
+static struct brw_fp_src src_reg_from_dst(struct brw_fp_dst dst)
+{
+ return src_reg(dst.file, dst.index);
+}
+
+static struct brw_fp_src src_undef( void )
+{
+ return src_reg(TGSI_FILE_NULL, 0);
+}
+
+static GLboolean src_is_undef(struct brw_fp_src src)
+{
+ return src.file == TGSI_FILE_NULL;
+}
+
+static struct brw_fp_src src_swizzle( struct brw_fp_src reg, int x, int y, int z, int w )
+{
+ unsigned swz = reg.swizzle;
+
+ reg.swizzle = ( BRW_GET_SWZ(swz, x) << 0 |
+ BRW_GET_SWZ(swz, y) << 2 |
+ BRW_GET_SWZ(swz, z) << 4 |
+ BRW_GET_SWZ(swz, w) << 6 );
+
+ return reg;
+}
+
+static struct brw_fp_src src_scalar( struct brw_fp_src reg, int x )
+{
+ return src_swizzle(reg, x, x, x, x);
+}
+
+static struct brw_fp_src src_abs( struct brw_fp_src src )
+{
+ src.negate = 0;
+ src.abs = 1;
+ return src;
+}
+
+static struct brw_fp_src src_negate( struct brw_fp_src src )
+{
+ src.negate = 1;
+ src.abs = 0;
+ return src;
+}
+
+
+static int match_or_expand_immediate( const float *v,
+ unsigned nr,
+ float *v2,
+ unsigned *nr2,
+ unsigned *swizzle )
+{
+ unsigned i, j;
+
+ *swizzle = 0;
+
+ for (i = 0; i < nr; i++) {
+ boolean found = FALSE;
+
+ for (j = 0; j < *nr2 && !found; j++) {
+ if (v[i] == v2[j]) {
+ *swizzle |= j << (i * 2);
+ found = TRUE;
+ }
+ }
+
+ if (!found) {
+ if (*nr2 >= 4)
+ return FALSE;
+
+ v2[*nr2] = v[i];
+ *swizzle |= *nr2 << (i * 2);
+ (*nr2)++;
+ }
+ }
+
+ return TRUE;
+}
+
+
+
+/* Internally generated immediates: overkill...
+ */
+static struct brw_fp_src src_imm( struct brw_wm_compile *c,
+ const GLfloat *v,
+ unsigned nr)
+{
+ unsigned i, j;
+ unsigned swizzle;
+
+ /* Could do a first pass where we examine all existing immediates
+ * without expanding.
+ */
+
+ for (i = 0; i < c->nr_immediates; i++) {
+ if (match_or_expand_immediate( v,
+ nr,
+ c->immediate[i].v,
+ &c->immediate[i].nr,
+ &swizzle ))
+ goto out;
+ }
+
+ if (c->nr_immediates < Elements(c->immediate)) {
+ i = c->nr_immediates++;
+ if (match_or_expand_immediate( v,
+ nr,
+ c->immediate[i].v,
+ &c->immediate[i].nr,
+ &swizzle ))
+ goto out;
+ }
+
+ c->error = 1;
+ return src_undef();
+
+out:
+ /* Make sure that all referenced elements are from this immediate.
+ * Has the effect of making size-one immediates into scalars.
+ */
+ for (j = nr; j < 4; j++)
+ swizzle |= (swizzle & 0x3) << (j * 2);
+
+ return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ),
+ BRW_GET_SWZ(swizzle, X),
+ BRW_GET_SWZ(swizzle, Y),
+ BRW_GET_SWZ(swizzle, Z),
+ BRW_GET_SWZ(swizzle, W) );
+}
+
+
+
+static struct brw_fp_src src_imm1f( struct brw_wm_compile *c,
+ GLfloat f )
+{
+ return src_imm(c, &f, 1);
+}
+
+static struct brw_fp_src src_imm4f( struct brw_wm_compile *c,
+ GLfloat x,
+ GLfloat y,
+ GLfloat z,
+ GLfloat w)
+{
+ GLfloat f[4] = {x,y,z,w};
+ return src_imm(c, f, 4);
+}
+
+
+
+/***********************************************************************
+ * Dest regs
+ */
+
+static struct brw_fp_dst dst_reg(GLuint file, GLuint idx)
+{
+ struct brw_fp_dst reg;
+ reg.file = file;
+ reg.index = idx;
+ reg.writemask = BRW_WRITEMASK_XYZW;
+ reg.indirect = 0;
+ reg.saturate = 0;
+ return reg;
+}
+
+static struct brw_fp_dst dst_mask( struct brw_fp_dst reg, int mask )
+{
+ reg.writemask &= mask;
+ return reg;
+}
+
+static struct brw_fp_dst dst_undef( void )
+{
+ return dst_reg(TGSI_FILE_NULL, 0);
+}
+
+static boolean dst_is_undef( struct brw_fp_dst dst )
+{
+ return dst.file == TGSI_FILE_NULL;
+}
+
+static struct brw_fp_dst dst_saturate( struct brw_fp_dst reg, boolean flag )
+{
+ reg.saturate = flag;
+ return reg;
+}
+
+static struct brw_fp_dst get_temp( struct brw_wm_compile *c )
+{
+ int bit = ffs( ~c->fp_temp );
+
+ if (!bit) {
+ debug_printf("%s: out of temporaries\n", __FILE__);
+ }
+
+ c->fp_temp |= 1<<(bit-1);
+ return dst_reg(TGSI_FILE_TEMPORARY, c->fp_first_internal_temp+(bit-1));
+}
+
+
+static void release_temp( struct brw_wm_compile *c, struct brw_fp_dst temp )
+{
+ c->fp_temp &= ~(1 << (temp.index - c->fp_first_internal_temp));
+}
+
+
+/***********************************************************************
+ * Instructions
+ */
+
+static struct brw_fp_instruction *get_fp_inst(struct brw_wm_compile *c)
+{
+ return &c->fp_instructions[c->nr_fp_insns++];
+}
+
+static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c,
+ GLuint op,
+ struct brw_fp_dst dest,
+ GLuint tex_unit,
+ GLuint target,
+ GLuint sampler,
+ struct brw_fp_src src0,
+ struct brw_fp_src src1,
+ struct brw_fp_src src2 )
+{
+ struct brw_fp_instruction *inst = get_fp_inst(c);
+
+ if (tex_unit || target)
+ assert(op == TGSI_OPCODE_TXP ||
+ op == TGSI_OPCODE_TXB ||
+ op == TGSI_OPCODE_TEX ||
+ op == WM_FB_WRITE);
+
+ inst->opcode = op;
+ inst->dst = dest;
+ inst->tex_unit = tex_unit;
+ inst->target = target;
+ inst->sampler = sampler;
+ inst->src[0] = src0;
+ inst->src[1] = src1;
+ inst->src[2] = src2;
+
+ return inst;
+}
+
+
+static INLINE void emit_op3(struct brw_wm_compile *c,
+ GLuint op,
+ struct brw_fp_dst dest,
+ struct brw_fp_src src0,
+ struct brw_fp_src src1,
+ struct brw_fp_src src2 )
+{
+ emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src2);
+}
+
+
+static INLINE void emit_op2(struct brw_wm_compile *c,
+ GLuint op,
+ struct brw_fp_dst dest,
+ struct brw_fp_src src0,
+ struct brw_fp_src src1)
+{
+ emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src_undef());
+}
+
+static INLINE void emit_op1(struct brw_wm_compile *c,
+ GLuint op,
+ struct brw_fp_dst dest,
+ struct brw_fp_src src0)
+{
+ emit_tex_op(c, op, dest, 0, 0, 0, src0, src_undef(), src_undef());
+}
+
+static INLINE void emit_op0(struct brw_wm_compile *c,
+ GLuint op,
+ struct brw_fp_dst dest)
+{
+ emit_tex_op(c, op, dest, 0, 0, 0, src_undef(), src_undef(), src_undef());
+}
+
+
+
+/* Many opcodes produce the same value across all the result channels.
+ * We'd rather not have to support that splatting in the opcode implementations,
+ * and brw_wm_pass*.c wants to optimize them out by shuffling references around
+ * anyway. We can easily get both by emitting the opcode to one channel, and
+ * then MOVing it to the others, which brw_wm_pass*.c already understands.
+ */
+static void emit_scalar_insn(struct brw_wm_compile *c,
+ unsigned opcode,
+ struct brw_fp_dst dst,
+ struct brw_fp_src src0,
+ struct brw_fp_src src1,
+ struct brw_fp_src src2 )
+{
+ unsigned first_chan = ffs(dst.writemask) - 1;
+ unsigned first_mask = 1 << first_chan;
+
+ if (dst.writemask == 0)
+ return;
+
+ emit_op3( c, opcode,
+ dst_mask(dst, first_mask),
+ src0, src1, src2 );
+
+ if (dst.writemask != first_mask) {
+ emit_op1(c, TGSI_OPCODE_MOV,
+ dst_mask(dst, ~first_mask),
+ src_scalar(src_reg_from_dst(dst), first_chan));
+ }
+}
+
+
+/***********************************************************************
+ * Special instructions for interpolation and other tasks
+ */
+
+static struct brw_fp_src get_pixel_xy( struct brw_wm_compile *c )
+{
+ if (src_is_undef(c->fp_pixel_xy)) {
+ struct brw_fp_dst pixel_xy = get_temp(c);
+ struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
+
+
+ /* Emit the out calculations, and hold onto the results. Use
+ * two instructions as a temporary is required.
+ */
+ /* pixel_xy.xy = PIXELXY payload[0];
+ */
+ emit_op1(c,
+ WM_PIXELXY,
+ dst_mask(pixel_xy, BRW_WRITEMASK_XY),
+ payload_r0_depth);
+
+ c->fp_pixel_xy = src_reg_from_dst(pixel_xy);
+ }
+
+ return c->fp_pixel_xy;
+}
+
+static struct brw_fp_src get_delta_xy( struct brw_wm_compile *c )
+{
+ if (src_is_undef(c->fp_delta_xy)) {
+ struct brw_fp_dst delta_xy = get_temp(c);
+ struct brw_fp_src pixel_xy = get_pixel_xy(c);
+ struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
+
+ /* deltas.xy = DELTAXY pixel_xy, payload[0]
+ */
+ emit_op3(c,
+ WM_DELTAXY,
+ dst_mask(delta_xy, BRW_WRITEMASK_XY),
+ pixel_xy,
+ payload_r0_depth,
+ src_undef());
+
+ c->fp_delta_xy = src_reg_from_dst(delta_xy);
+ }
+
+ return c->fp_delta_xy;
+}
+
+static struct brw_fp_src get_pixel_w( struct brw_wm_compile *c )
+{
+ if (src_is_undef(c->fp_pixel_w)) {
+ struct brw_fp_dst pixel_w = get_temp(c);
+ struct brw_fp_src deltas = get_delta_xy(c);
+
+ /* XXX: assuming position is always first -- valid?
+ */
+ struct brw_fp_src interp_wpos = src_reg(BRW_FILE_PAYLOAD, 0);
+
+ /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
+ */
+ emit_op3(c,
+ WM_PIXELW,
+ dst_mask(pixel_w, BRW_WRITEMASK_W),
+ interp_wpos,
+ deltas,
+ src_undef());
+
+
+ c->fp_pixel_w = src_reg_from_dst(pixel_w);
+ }
+
+ return c->fp_pixel_w;
+}
+
+
+/***********************************************************************
+ * Emit INTERP instructions ahead of first use of each attrib.
+ */
+
+static void emit_interp( struct brw_wm_compile *c,
+ GLuint idx,
+ GLuint semantic,
+ GLuint interp_mode )
+{
+ struct brw_fp_dst dst = dst_reg(TGSI_FILE_INPUT, idx);
+ struct brw_fp_src interp = src_reg(BRW_FILE_PAYLOAD, idx);
+ struct brw_fp_src deltas = get_delta_xy(c);
+
+ /* Need to use PINTERP on attributes which have been
+ * multiplied by 1/W in the SF program, and LINTERP on those
+ * which have not:
+ */
+ switch (semantic) {
+ case TGSI_SEMANTIC_POSITION:
+ /* Have to treat wpos.xy specially:
+ */
+ emit_op1(c,
+ WM_WPOSXY,
+ dst_mask(dst, BRW_WRITEMASK_XY),
+ get_pixel_xy(c));
+
+ /* TGSI_FILE_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
+ */
+ emit_op2(c,
+ WM_LINTERP,
+ dst_mask(dst, BRW_WRITEMASK_ZW),
+ interp,
+ deltas);
+ break;
+
+ case TGSI_SEMANTIC_COLOR:
+ if (c->key.flat_shade) {
+ emit_op1(c,
+ WM_CINTERP,
+ dst,
+ interp);
+ }
+ else if (interp_mode == TGSI_INTERPOLATE_LINEAR) {
+ emit_op2(c,
+ WM_LINTERP,
+ dst,
+ interp,
+ deltas);
+ }
+ else {
+ emit_op3(c,
+ WM_PINTERP,
+ dst,
+ interp,
+ deltas,
+ get_pixel_w(c));
+ }
+
+ break;
+
+ case TGSI_SEMANTIC_FOG:
+ /* Interpolate the fog coordinate */
+ emit_op3(c,
+ WM_PINTERP,
+ dst_mask(dst, BRW_WRITEMASK_X),
+ interp,
+ deltas,
+ get_pixel_w(c));
+
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_mask(dst, BRW_WRITEMASK_YZ),
+ src_imm1f(c, 0.0));
+
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_mask(dst, BRW_WRITEMASK_W),
+ src_imm1f(c, 1.0));
+ break;
+
+ case TGSI_SEMANTIC_FACE:
+ /* XXX review/test this case */
+ emit_op0(c,
+ WM_FRONTFACING,
+ dst_mask(dst, BRW_WRITEMASK_X));
+
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_mask(dst, BRW_WRITEMASK_YZ),
+ src_imm1f(c, 0.0));
+
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_mask(dst, BRW_WRITEMASK_W),
+ src_imm1f(c, 1.0));
+ break;
+
+ case TGSI_SEMANTIC_PSIZE:
+ /* XXX review/test this case */
+ emit_op3(c,
+ WM_PINTERP,
+ dst_mask(dst, BRW_WRITEMASK_XY),
+ interp,
+ deltas,
+ get_pixel_w(c));
+
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_mask(dst, BRW_WRITEMASK_Z),
+ src_imm1f(c, 0.0f));
+
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_mask(dst, BRW_WRITEMASK_W),
+ src_imm1f(c, 1.0f));
+ break;
+
+ default:
+ switch (interp_mode) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ emit_op1(c,
+ WM_CINTERP,
+ dst,
+ interp);
+ break;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ emit_op2(c,
+ WM_LINTERP,
+ dst,
+ interp,
+ deltas);
+ break;
+
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ emit_op3(c,
+ WM_PINTERP,
+ dst,
+ interp,
+ deltas,
+ get_pixel_w(c));
+ break;
+ }
+ break;
+ }
+}
+
+
+/***********************************************************************
+ * Expand various instructions here to simpler forms.
+ */
+static void precalc_dst( struct brw_wm_compile *c,
+ struct brw_fp_dst dst,
+ struct brw_fp_src src0,
+ struct brw_fp_src src1 )
+{
+ if (dst.writemask & BRW_WRITEMASK_Y) {
+ /* dst.y = mul src0.y, src1.y
+ */
+ emit_op2(c,
+ TGSI_OPCODE_MUL,
+ dst_mask(dst, BRW_WRITEMASK_Y),
+ src0,
+ src1);
+ }
+
+ if (dst.writemask & BRW_WRITEMASK_XZ) {
+ /* dst.z = mov src0.zzzz
+ */
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_mask(dst, BRW_WRITEMASK_Z),
+ src_scalar(src0, Z));
+
+ /* dst.x = imm1f(1.0)
+ */
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0),
+ src_imm1f(c, 1.0));
+ }
+ if (dst.writemask & BRW_WRITEMASK_W) {
+ /* dst.w = mov src1.w
+ */
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_mask(dst, BRW_WRITEMASK_W),
+ src1);
+ }
+}
+
+
+static void precalc_lit( struct brw_wm_compile *c,
+ struct brw_fp_dst dst,
+ struct brw_fp_src src0 )
+{
+ if (dst.writemask & BRW_WRITEMASK_XW) {
+ /* dst.xw = imm(1.0f)
+ */
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ dst_saturate(dst_mask(dst, BRW_WRITEMASK_XW), 0),
+ src_imm1f(c, 1.0f));
+ }
+
+ if (dst.writemask & BRW_WRITEMASK_YZ) {
+ emit_op1(c,
+ TGSI_OPCODE_LIT,
+ dst_mask(dst, BRW_WRITEMASK_YZ),
+ src0);
+ }
+}
+
+
+/**
+ * Some TEX instructions require extra code, cube map coordinate
+ * normalization, or coordinate scaling for RECT textures, etc.
+ * This function emits those extra instructions and the TEX
+ * instruction itself.
+ */
+static void precalc_tex( struct brw_wm_compile *c,
+ struct brw_fp_dst dst,
+ unsigned target,
+ unsigned unit,
+ struct brw_fp_src src0,
+ struct brw_fp_src sampler )
+{
+ struct brw_fp_src coord = src_undef();
+ struct brw_fp_dst tmp = dst_undef();
+
+ assert(unit < BRW_MAX_TEX_UNIT);
+
+ /* Cubemap: find longest component of coord vector and normalize
+ * it.
+ */
+ if (target == TGSI_TEXTURE_CUBE) {
+ struct brw_fp_src tmpsrc;
+
+ tmp = get_temp(c);
+ tmpsrc = src_reg_from_dst(tmp);
+
+ /* tmp = abs(src0) */
+ emit_op1(c,
+ TGSI_OPCODE_MOV,
+ tmp,
+ src_abs(src0));
+
+ /* tmp.X = MAX(tmp.X, tmp.Y) */
+ emit_op2(c, TGSI_OPCODE_MAX,
+ dst_mask(tmp, BRW_WRITEMASK_X),
+ src_scalar(tmpsrc, X),
+ src_scalar(tmpsrc, Y));
+
+ /* tmp.X = MAX(tmp.X, tmp.Z) */
+ emit_op2(c, TGSI_OPCODE_MAX,
+ dst_mask(tmp, BRW_WRITEMASK_X),
+ tmpsrc,
+ src_scalar(tmpsrc, Z));
+
+ /* tmp.X = 1 / tmp.X */
+ emit_op1(c, TGSI_OPCODE_RCP,
+ dst_mask(tmp, BRW_WRITEMASK_X),
+ tmpsrc);
+
+ /* tmp = src0 * tmp.xxxx */
+ emit_op2(c, TGSI_OPCODE_MUL,
+ tmp,
+ src0,
+ src_scalar(tmpsrc, X));
+
+ coord = tmpsrc;
+ }
+ else if (target == TGSI_TEXTURE_RECT ||
+ target == TGSI_TEXTURE_SHADOWRECT) {
+ /* XXX: need a mechanism for internally generated constants.
+ */
+ coord = src0;
+ }
+ else {
+ coord = src0;
+ }
+
+ /* Need to emit YUV texture conversions by hand. Probably need to
+ * do this here - the alternative is in brw_wm_emit.c, but the
+ * conversion requires allocating a temporary variable which we
+ * don't have the facility to do that late in the compilation.
+ */
+ if (c->key.yuvtex_mask & (1 << unit)) {
+ /* convert ycbcr to RGBA */
+ GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
+ struct brw_fp_dst tmp = get_temp(c);
+ struct brw_fp_src tmpsrc = src_reg_from_dst(tmp);
+ struct brw_fp_src C0 = src_imm4f( c, -.5, -.0625, -.5, 1.164 );
+ struct brw_fp_src C1 = src_imm4f( c, 1.596, -0.813, 2.018, -.391 );
+
+ /* tmp = TEX ...
+ */
+ emit_tex_op(c,
+ TGSI_OPCODE_TEX,
+ dst_saturate(tmp, dst.saturate),
+ unit,
+ target,
+ sampler.index,
+ coord,
+ src_undef(),
+ src_undef());
+
+ /* tmp.xyz = ADD TMP, C0
+ */
+ emit_op2(c, TGSI_OPCODE_ADD,
+ dst_mask(tmp, BRW_WRITEMASK_XYZ),
+ tmpsrc,
+ C0);
+
+ /* YUV.y = MUL YUV.y, C0.w
+ */
+ emit_op2(c, TGSI_OPCODE_MUL,
+ dst_mask(tmp, BRW_WRITEMASK_Y),
+ tmpsrc,
+ src_scalar(C0, W));
+
+ /*
+ * if (UV swaped)
+ * RGB.xyz = MAD YUV.zzx, C1, YUV.y
+ * else
+ * RGB.xyz = MAD YUV.xxz, C1, YUV.y
+ */
+
+ emit_op3(c, TGSI_OPCODE_MAD,
+ dst_mask(dst, BRW_WRITEMASK_XYZ),
+ ( swap_uv ?
+ src_swizzle(tmpsrc, Z,Z,X,X) :
+ src_swizzle(tmpsrc, X,X,Z,Z)),
+ C1,
+ src_scalar(tmpsrc, Y));
+
+ /* RGB.y = MAD YUV.z, C1.w, RGB.y
+ */
+ emit_op3(c,
+ TGSI_OPCODE_MAD,
+ dst_mask(dst, BRW_WRITEMASK_Y),
+ src_scalar(tmpsrc, Z),
+ src_scalar(C1, W),
+ src_scalar(src_reg_from_dst(dst), Y));
+
+ release_temp(c, tmp);
+ }
+ else {
+ /* ordinary RGBA tex instruction */
+ emit_tex_op(c,
+ TGSI_OPCODE_TEX,
+ dst,
+ unit,
+ target,
+ sampler.index,
+ coord,
+ src_undef(),
+ src_undef());
+ }
+
+ /* XXX: add GL_EXT_texture_swizzle support to gallium -- by
+ * generating shader varients in mesa state tracker.
+ */
+
+ /* Release this temp if we ended up allocating it:
+ */
+ if (!dst_is_undef(tmp))
+ release_temp(c, tmp);
+}
+
+
+/**
+ * Check if the given TXP instruction really needs the divide-by-W step.
+ */
+static GLboolean projtex( struct brw_wm_compile *c,
+ unsigned target,
+ struct brw_fp_src src )
+{
+ /* Only try to detect the simplest cases. Could detect (later)
+ * cases where we are trying to emit code like RCP {1.0}, MUL x,
+ * {1.0}, and so on.
+ *
+ * More complex cases than this typically only arise from
+ * user-provided fragment programs anyway:
+ */
+ if (target == TGSI_TEXTURE_CUBE)
+ return GL_FALSE; /* ut2004 gun rendering !?! */
+
+ if (src.file == TGSI_FILE_INPUT &&
+ BRW_GET_SWZ(src.swizzle, W) == W &&
+ c->fp->info.input_interpolate[src.index] != TGSI_INTERPOLATE_PERSPECTIVE)
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Emit code for TXP.
+ */
+static void precalc_txp( struct brw_wm_compile *c,
+ struct brw_fp_dst dst,
+ unsigned target,
+ unsigned unit,
+ struct brw_fp_src src0,
+ struct brw_fp_src sampler )
+{
+ if (projtex(c, target, src0)) {
+ struct brw_fp_dst tmp = get_temp(c);
+
+ /* tmp0.w = RCP inst.arg[0][3]
+ */
+ emit_op1(c,
+ TGSI_OPCODE_RCP,
+ dst_mask(tmp, BRW_WRITEMASK_W),
+ src_scalar(src0, W));
+
+ /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
+ */
+ emit_op2(c,
+ TGSI_OPCODE_MUL,
+ dst_mask(tmp, BRW_WRITEMASK_XYZ),
+ src0,
+ src_scalar(src_reg_from_dst(tmp), W));
+
+ /* dst = TEX tmp0
+ */
+ precalc_tex(c,
+ dst,
+ target,
+ unit,
+ src_reg_from_dst(tmp),
+ sampler );
+
+ release_temp(c, tmp);
+ }
+ else
+ {
+ /* dst = TEX src0
+ */
+ precalc_tex(c, dst, target, unit, src0, sampler);
+ }
+}
+
+
+/* XXX: note this returns a src_reg.
+ */
+static struct brw_fp_src
+find_output_by_semantic( struct brw_wm_compile *c,
+ unsigned semantic,
+ unsigned index )
+{
+ const struct tgsi_shader_info *info = &c->fp->info;
+ unsigned i;
+
+ for (i = 0; i < info->num_outputs; i++)
+ if (info->output_semantic_name[i] == semantic &&
+ info->output_semantic_index[i] == index)
+ return src_reg( TGSI_FILE_OUTPUT, i );
+
+ /* If not found, return some arbitrary immediate value:
+ *
+ * XXX: this is a good idea but immediates are up generating extra
+ * curbe entries atm, as they would have in the original driver.
+ */
+ return src_reg( TGSI_FILE_OUTPUT, 0 ); /* src_imm1f(c, 1.0); */
+}
+
+
+static void emit_fb_write( struct brw_wm_compile *c )
+{
+ struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
+ struct brw_fp_src outdepth = find_output_by_semantic(c, TGSI_SEMANTIC_POSITION, 0);
+ GLuint i;
+
+
+ outdepth = src_scalar(outdepth, Z);
+
+ for (i = 0 ; i < c->key.nr_cbufs; i++) {
+ struct brw_fp_src outcolor;
+
+ outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i);
+
+ /* Use emit_tex_op so that we can specify the inst->target
+ * field, which is abused to contain the FB write target and the
+ * EOT marker
+ */
+ emit_tex_op(c, WM_FB_WRITE,
+ dst_undef(),
+ (i == c->key.nr_cbufs - 1), /* EOT */
+ i,
+ 0, /* no sampler */
+ outcolor,
+ payload_r0_depth,
+ outdepth);
+ }
+}
+
+
+static struct brw_fp_dst translate_dst( struct brw_wm_compile *c,
+ const struct tgsi_full_dst_register *dst,
+ unsigned saturate )
+{
+ struct brw_fp_dst out;
+
+ out.file = dst->Register.File;
+ out.index = dst->Register.Index;
+ out.writemask = dst->Register.WriteMask;
+ out.indirect = dst->Register.Indirect;
+ out.saturate = (saturate == TGSI_SAT_ZERO_ONE);
+
+ if (out.indirect) {
+ assert(dst->Indirect.File == TGSI_FILE_ADDRESS);
+ assert(dst->Indirect.Index == 0);
+ }
+
+ return out;
+}
+
+
+static struct brw_fp_src translate_src( struct brw_wm_compile *c,
+ const struct tgsi_full_src_register *src )
+{
+ struct brw_fp_src out;
+
+ out.file = src->Register.File;
+ out.index = src->Register.Index;
+ out.indirect = src->Register.Indirect;
+
+ out.swizzle = ((src->Register.SwizzleX << 0) |
+ (src->Register.SwizzleY << 2) |
+ (src->Register.SwizzleZ << 4) |
+ (src->Register.SwizzleW << 6));
+
+ switch (tgsi_util_get_full_src_register_sign_mode( src, 0 )) {
+ case TGSI_UTIL_SIGN_CLEAR:
+ out.abs = 1;
+ out.negate = 0;
+ break;
+
+ case TGSI_UTIL_SIGN_SET:
+ out.abs = 1;
+ out.negate = 1;
+ break;
+
+ case TGSI_UTIL_SIGN_TOGGLE:
+ out.abs = 0;
+ out.negate = 1;
+ break;
+
+ case TGSI_UTIL_SIGN_KEEP:
+ default:
+ out.abs = 0;
+ out.negate = 0;
+ break;
+ }
+
+ if (out.indirect) {
+ assert(src->Indirect.File == TGSI_FILE_ADDRESS);
+ assert(src->Indirect.Index == 0);
+ }
+
+ return out;
+}
+
+
+
+static void emit_insn( struct brw_wm_compile *c,
+ const struct tgsi_full_instruction *inst )
+{
+ unsigned opcode = inst->Instruction.Opcode;
+ struct brw_fp_dst dst;
+ struct brw_fp_src src[3];
+ int i;
+
+ dst = translate_dst( c, &inst->Dst[0],
+ inst->Instruction.Saturate );
+
+ for (i = 0; i < inst->Instruction.NumSrcRegs; i++)
+ src[i] = translate_src( c, &inst->Src[i] );
+
+ switch (opcode) {
+ case TGSI_OPCODE_ABS:
+ emit_op1(c, TGSI_OPCODE_MOV,
+ dst,
+ src_abs(src[0]));
+ break;
+
+ case TGSI_OPCODE_SUB:
+ emit_op2(c, TGSI_OPCODE_ADD,
+ dst,
+ src[0],
+ src_negate(src[1]));
+ break;
+
+ case TGSI_OPCODE_SCS:
+ emit_op1(c, TGSI_OPCODE_SCS,
+ dst_mask(dst, BRW_WRITEMASK_XY),
+ src[0]);
+ break;
+
+ case TGSI_OPCODE_DST:
+ precalc_dst(c, dst, src[0], src[1]);
+ break;
+
+ case TGSI_OPCODE_LIT:
+ precalc_lit(c, dst, src[0]);
+ break;
+
+ case TGSI_OPCODE_TEX:
+ precalc_tex(c, dst,
+ inst->Texture.Texture,
+ src[1].index, /* use sampler unit for tex idx */
+ src[0], /* coord */
+ src[1]); /* sampler */
+ break;
+
+ case TGSI_OPCODE_TXP:
+ precalc_txp(c, dst,
+ inst->Texture.Texture,
+ src[1].index, /* use sampler unit for tex idx */
+ src[0], /* coord */
+ src[1]); /* sampler */
+ break;
+
+ case TGSI_OPCODE_TXB:
+ /* XXX: TXB not done
+ */
+ precalc_tex(c, dst,
+ inst->Texture.Texture,
+ src[1].index, /* use sampler unit for tex idx*/
+ src[0],
+ src[1]);
+ break;
+
+ case TGSI_OPCODE_XPD:
+ emit_op2(c, TGSI_OPCODE_XPD,
+ dst_mask(dst, BRW_WRITEMASK_XYZ),
+ src[0],
+ src[1]);
+ break;
+
+ case TGSI_OPCODE_KIL:
+ emit_op1(c, TGSI_OPCODE_KIL,
+ dst_mask(dst_undef(), 0),
+ src[0]);
+ break;
+
+ case TGSI_OPCODE_END:
+ emit_fb_write(c);
+ break;
+ default:
+ if (!c->key.has_flow_control &&
+ brw_wm_is_scalar_result(opcode))
+ emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]);
+ else
+ emit_op3(c, opcode, dst, src[0], src[1], src[2]);
+ break;
+ }
+}
+
+/**
+ * Initial pass for fragment program code generation.
+ * This function is used by both the GLSL and non-GLSL paths.
+ */
+int brw_wm_pass_fp( struct brw_wm_compile *c )
+{
+ struct brw_fragment_shader *fs = c->fp;
+ struct tgsi_parse_context parse;
+ struct tgsi_full_instruction *inst;
+ struct tgsi_full_declaration *decl;
+ const float *imm;
+ GLuint size;
+ GLuint i;
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ debug_printf("pre-fp:\n");
+ tgsi_dump(fs->tokens, 0);
+ }
+
+ c->fp_pixel_xy = src_undef();
+ c->fp_delta_xy = src_undef();
+ c->fp_pixel_w = src_undef();
+ c->nr_fp_insns = 0;
+ c->nr_immediates = 0;
+
+
+ /* Loop over all instructions doing assorted simplifications and
+ * transformations.
+ */
+ tgsi_parse_init( &parse, fs->tokens );
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ /* Turn intput declarations into special WM_* instructions.
+ *
+ * XXX: For non-branching shaders, consider deferring variable
+ * initialization as late as possible to minimize register
+ * usage. This is how the original BRW driver worked.
+ *
+ * In a branching shader, must preamble instructions at decl
+ * time, as instruction order in the shader does not
+ * correspond to the order instructions are executed in the
+ * wild.
+ *
+ * This is where special instructions such as WM_CINTERP,
+ * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
+ * compute shader inputs from the payload registers and pixel
+ * position.
+ */
+ decl = &parse.FullToken.FullDeclaration;
+ if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+ unsigned first, last, mask;
+ unsigned attrib;
+
+ first = decl->Range.First;
+ last = decl->Range.Last;
+ mask = decl->Declaration.UsageMask;
+
+ for (attrib = first; attrib <= last; attrib++) {
+ emit_interp(c,
+ attrib,
+ decl->Semantic.Name,
+ decl->Declaration.Interpolate );
+ }
+ }
+
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ /* Unlike VS programs we can probably manage fine encoding
+ * immediate values directly into the emitted EU
+ * instructions, as we probably only need to reference one
+ * float value per instruction. Just save the data for now
+ * and use directly later.
+ */
+ i = c->nr_immediates++;
+ imm = &parse.FullToken.FullImmediate.u[i].Float;
+ size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+
+ if (c->nr_immediates >= BRW_WM_MAX_CONST)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ for (i = 0; i < size; i++)
+ c->immediate[c->nr_immediates].v[i] = imm[i];
+
+ for (; i < 4; i++)
+ c->immediate[c->nr_immediates].v[i] = 0.0;
+
+ c->immediate[c->nr_immediates].nr = size;
+ c->nr_immediates++;
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ inst = &parse.FullToken.FullInstruction;
+ emit_insn(c, inst);
+ break;
+ }
+ }
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ brw_wm_print_fp_program( c, "pass_fp" );
+ debug_printf("\n");
+ }
+
+ return c->error;
+}
+
diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c
new file mode 100644
index 00000000000..3b3afc39d3c
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@@ -0,0 +1,2032 @@
+#include "util/u_math.h"
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+
+
+static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst,
+ GLuint component);
+
+
+static void
+reclaim_temps(struct brw_wm_compile *c);
+
+
+/** Mark GRF register as used. */
+static void
+prealloc_grf(struct brw_wm_compile *c, int r)
+{
+ c->used_grf[r] = GL_TRUE;
+}
+
+
+/** Mark given GRF register as not in use. */
+static void
+release_grf(struct brw_wm_compile *c, int r)
+{
+ /*assert(c->used_grf[r]);*/
+ c->used_grf[r] = GL_FALSE;
+ c->first_free_grf = MIN2(c->first_free_grf, r);
+}
+
+
+/** Return index of a free GRF, mark it as used. */
+static int
+alloc_grf(struct brw_wm_compile *c)
+{
+ GLuint r;
+ for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+ if (!c->used_grf[r]) {
+ c->used_grf[r] = GL_TRUE;
+ c->first_free_grf = r + 1; /* a guess */
+ return r;
+ }
+ }
+
+ /* no free temps, try to reclaim some */
+ reclaim_temps(c);
+ c->first_free_grf = 0;
+
+ /* try alloc again */
+ for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+ if (!c->used_grf[r]) {
+ c->used_grf[r] = GL_TRUE;
+ c->first_free_grf = r + 1; /* a guess */
+ return r;
+ }
+ }
+
+ for (r = 0; r < BRW_WM_MAX_GRF; r++) {
+ assert(c->used_grf[r]);
+ }
+
+ /* really, no free GRF regs found */
+ if (!c->out_of_regs) {
+ /* print warning once per compilation */
+ debug_printf("%s: ran out of registers for fragment program", __FUNCTION__);
+ c->out_of_regs = GL_TRUE;
+ }
+
+ return -1;
+}
+
+
+/** Return number of GRF registers used */
+static int
+num_grf_used(const struct brw_wm_compile *c)
+{
+ int r;
+ for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--)
+ if (c->used_grf[r])
+ return r + 1;
+ return 0;
+}
+
+
+
+/**
+ * Record the mapping of a Mesa register to a hardware register.
+ */
+static void set_reg(struct brw_wm_compile *c, int file, int index,
+ int component, struct brw_reg reg)
+{
+ c->wm_regs[file][index][component].reg = reg;
+ c->wm_regs[file][index][component].inited = GL_TRUE;
+}
+
+static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
+{
+ struct brw_reg reg;
+
+ /* if we need to allocate another temp, grow the tmp_regs[] array */
+ if (c->tmp_index == c->tmp_max) {
+ int r = alloc_grf(c);
+ if (r < 0) {
+ /*printf("Out of temps in %s\n", __FUNCTION__);*/
+ r = 50; /* XXX random register! */
+ }
+ c->tmp_regs[ c->tmp_max++ ] = r;
+ }
+
+ /* form the GRF register */
+ reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
+ /*printf("alloc_temp %d\n", reg.nr);*/
+ assert(reg.nr < BRW_WM_MAX_GRF);
+ return reg;
+
+}
+
+/**
+ * Save current temp register info.
+ * There must be a matching call to release_tmps().
+ */
+static int mark_tmps(struct brw_wm_compile *c)
+{
+ return c->tmp_index;
+}
+
+static struct brw_reg lookup_tmp( struct brw_wm_compile *c, int index )
+{
+ return brw_vec8_grf( c->tmp_regs[ index ], 0 );
+}
+
+static void release_tmps(struct brw_wm_compile *c, int mark)
+{
+ c->tmp_index = mark;
+}
+
+/**
+ * Convert Mesa src register to brw register.
+ *
+ * Since we're running in SOA mode each Mesa register corresponds to four
+ * hardware registers. We allocate the hardware registers as needed here.
+ *
+ * \param file register file, one of PROGRAM_x
+ * \param index register number
+ * \param component src component (X=0, Y=1, Z=2, W=3)
+ * \param nr not used?!?
+ * \param neg negate value?
+ * \param abs take absolute value?
+ */
+static struct brw_reg
+get_reg(struct brw_wm_compile *c, int file, int index, int component,
+ int nr, GLuint neg, GLuint abs)
+{
+ struct brw_reg reg;
+ switch (file) {
+ case TGSI_FILE_NULL:
+ return brw_null_reg();
+
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_OUTPUT:
+ case BRW_FILE_PAYLOAD:
+ break;
+
+ default:
+ debug_printf("%s: Unexpected file type\n", __FUNCTION__);
+ return brw_null_reg();
+ }
+
+ assert(index < 256);
+ assert(component < 4);
+
+ /* see if we've already allocated a HW register for this Mesa register */
+ if (c->wm_regs[file][index][component].inited) {
+ /* yes, re-use */
+ reg = c->wm_regs[file][index][component].reg;
+ }
+ else {
+ /* no, allocate new register */
+ int grf = alloc_grf(c);
+ /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
+ if (grf < 0) {
+ /* totally out of temps */
+ grf = 51; /* XXX random register! */
+ }
+
+ reg = brw_vec8_grf(grf, 0);
+ /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
+
+ set_reg(c, file, index, component, reg);
+ }
+
+ if (neg & (1 << component)) {
+ reg = negate(reg);
+ }
+ if (abs)
+ reg = brw_abs(reg);
+ return reg;
+}
+
+
+
+
+/**
+ * Find first/last instruction that references each temporary register.
+ */
+GLboolean
+_mesa_find_temp_intervals(const struct prog_instruction *instructions,
+ GLuint numInstructions,
+ GLint intBegin[MAX_PROGRAM_TEMPS],
+ GLint intEnd[MAX_PROGRAM_TEMPS])
+{
+ struct loop_info
+ {
+ GLuint Start, End; /**< Start, end instructions of loop */
+ };
+ struct loop_info loopStack[MAX_LOOP_NESTING];
+ GLuint loopStackDepth = 0;
+ GLuint i;
+
+ for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
+ intBegin[i] = intEnd[i] = -1;
+ }
+
+ /* Scan instructions looking for temporary registers */
+ for (i = 0; i < numInstructions; i++) {
+ const struct prog_instruction *inst = instructions + i;
+ if (inst->Opcode == OPCODE_BGNLOOP) {
+ loopStack[loopStackDepth].Start = i;
+ loopStack[loopStackDepth].End = inst->BranchTarget;
+ loopStackDepth++;
+ }
+ else if (inst->Opcode == OPCODE_ENDLOOP) {
+ loopStackDepth--;
+ }
+ else if (inst->Opcode == OPCODE_CAL) {
+ return GL_FALSE;
+ }
+ else {
+ const GLuint numSrc = 3;
+ GLuint j;
+ for (j = 0; j < numSrc; j++) {
+ if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
+ const GLuint index = inst->SrcReg[j].Index;
+ if (inst->SrcReg[j].RelAddr)
+ return GL_FALSE;
+ update_interval(intBegin, intEnd, index, i);
+ if (loopStackDepth > 0) {
+ /* extend temp register's interval to end of loop */
+ GLuint loopEnd = loopStack[loopStackDepth - 1].End;
+ update_interval(intBegin, intEnd, index, loopEnd);
+ }
+ }
+ }
+ if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ const GLuint index = inst->DstReg.Index;
+ if (inst->DstReg.RelAddr)
+ return GL_FALSE;
+ update_interval(intBegin, intEnd, index, i);
+ if (loopStackDepth > 0) {
+ /* extend temp register's interval to end of loop */
+ GLuint loopEnd = loopStack[loopStackDepth - 1].End;
+ update_interval(intBegin, intEnd, index, loopEnd);
+ }
+ }
+ }
+ }
+
+ return GL_TRUE;
+}
+
+
+/**
+ * This is called if we run out of GRF registers. Examine the live intervals
+ * of temp regs in the program and free those which won't be used again.
+ */
+static void
+reclaim_temps(struct brw_wm_compile *c)
+{
+ GLint intBegin[BRW_WM_MAX_TEMPS];
+ GLint intEnd[BRW_WM_MAX_TEMPS];
+ int index;
+
+ /*printf("Reclaim temps:\n");*/
+
+ _mesa_find_temp_intervals(c->fp_instructions, c->nr_fp_insns,
+ intBegin, intEnd);
+
+ for (index = 0; index < BRW_WM_MAX_TEMPS; index++) {
+ if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) {
+ /* program temp[i] can be freed */
+ int component;
+ /*printf(" temp[%d] is dead\n", index);*/
+ for (component = 0; component < 4; component++) {
+ if (c->wm_regs[TGSI_FILE_TEMPORARY][index][component].inited) {
+ int r = c->wm_regs[TGSI_FILE_TEMPORARY][index][component].reg.nr;
+ release_grf(c, r);
+ /*
+ printf(" Reclaim temp %d, reg %d at inst %d\n",
+ index, r, c->cur_inst);
+ */
+ c->wm_regs[TGSI_FILE_TEMPORARY][index][component].inited = GL_FALSE;
+ }
+ }
+ }
+ }
+}
+
+
+
+
+/**
+ * Preallocate registers. This sets up the Mesa to hardware register
+ * mapping for certain registers, such as constants (uniforms/state vars)
+ * and shader inputs.
+ */
+static void prealloc_reg(struct brw_wm_compile *c)
+{
+ int i, j;
+ struct brw_reg reg;
+ int urb_read_length = 0;
+ GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
+ GLuint reg_index = 0;
+
+ memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
+ c->first_free_grf = 0;
+
+ for (i = 0; i < 4; i++) {
+ if (i < c->key.nr_depth_regs)
+ reg = brw_vec8_grf(i * 2, 0);
+ else
+ reg = brw_vec8_grf(0, 0);
+ set_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, i, reg);
+ }
+ reg_index += 2 * c->key.nr_depth_regs;
+
+ /* constants */
+ {
+ const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters;
+ const GLuint nr_temps = c->fp->program.Base.NumTemporaries;
+
+ /* use a real constant buffer, or just use a section of the GRF? */
+ /* XXX this heuristic may need adjustment... */
+ if ((nr_params + nr_temps) * 4 + reg_index > 80)
+ c->fp->use_const_buffer = GL_TRUE;
+ else
+ c->fp->use_const_buffer = GL_FALSE;
+ /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
+
+ if (c->fp->use_const_buffer) {
+ /* We'll use a real constant buffer and fetch constants from
+ * it with a dataport read message.
+ */
+
+ /* number of float constants in CURBE */
+ c->prog_data.nr_params = 0;
+ }
+ else {
+ const struct gl_program_parameter_list *plist =
+ c->fp->program.Base.Parameters;
+ int index = 0;
+
+ /* number of float constants in CURBE */
+ c->prog_data.nr_params = 4 * nr_params;
+
+ /* loop over program constants (float[4]) */
+ for (i = 0; i < nr_params; i++) {
+ /* loop over XYZW channels */
+ for (j = 0; j < 4; j++, index++) {
+ reg = brw_vec1_grf(reg_index + index / 8, index % 8);
+ /* Save pointer to parameter/constant value.
+ * Constants will be copied in prepare_constant_buffer()
+ */
+ c->prog_data.param[index] = &plist->ParameterValues[i][j];
+ set_reg(c, TGSI_FILE_STATE_VAR, i, j, reg);
+ }
+ }
+ /* number of constant regs used (each reg is float[8]) */
+ c->nr_creg = 2 * ((4 * nr_params + 15) / 16);
+ reg_index += c->nr_creg;
+ }
+ }
+
+ /* fragment shader inputs */
+ for (i = 0; i < VERT_RESULT_MAX; i++) {
+ int fp_input;
+
+ if (i >= VERT_RESULT_VAR0)
+ fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0;
+ else if (i <= VERT_RESULT_TEX7)
+ fp_input = i;
+ else
+ fp_input = -1;
+
+ if (fp_input >= 0 && inputs & (1 << fp_input)) {
+ urb_read_length = reg_index;
+ reg = brw_vec8_grf(reg_index, 0);
+ for (j = 0; j < 4; j++)
+ set_reg(c, TGSI_FILE_PAYLOAD, fp_input, j, reg);
+ }
+ if (c->key.nr_vp_outputs > i) {
+ reg_index += 2;
+ }
+ }
+
+ c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+ c->prog_data.urb_read_length = urb_read_length;
+ c->prog_data.curb_read_length = c->nr_creg;
+ c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+ reg_index++;
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+ reg_index += 2;
+
+ /* mark GRF regs [0..reg_index-1] as in-use */
+ for (i = 0; i < reg_index; i++)
+ prealloc_grf(c, i);
+
+ /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
+ prealloc_grf(c, 126);
+ prealloc_grf(c, 127);
+
+ for (i = 0; i < c->nr_fp_insns; i++) {
+ const struct brw_fp_instruction *inst = &c->fp_instructions[i];
+ struct brw_reg dst[4];
+
+ switch (inst->Opcode) {
+ case OPCODE_TEX:
+ case OPCODE_TXB:
+ /* Allocate the channels of texture results contiguously,
+ * since they are written out that way by the sampler unit.
+ */
+ for (j = 0; j < 4; j++) {
+ dst[j] = get_dst_reg(c, inst, j);
+ if (j != 0)
+ assert(dst[j].nr == dst[j - 1].nr + 1);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* An instruction may reference up to three constants.
+ * They'll be found in these registers.
+ * XXX alloc these on demand!
+ */
+ if (c->fp->use_const_buffer) {
+ for (i = 0; i < 3; i++) {
+ c->current_const[i].index = -1;
+ c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);
+ }
+ }
+#if 0
+ printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer);
+ printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index);
+#endif
+}
+
+
+/**
+ * Check if any of the instruction's src registers are constants, uniforms,
+ * or statevars. If so, fetch any constants that we don't already have in
+ * the three GRF slots.
+ */
+static void fetch_constants(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ /* loop over instruction src regs */
+ for (i = 0; i < 3; i++) {
+ const struct prog_src_register *src = &inst->SrcReg[i];
+ if (src->File == TGSI_FILE_IMMEDIATE ||
+ src->File == TGSI_FILE_CONSTANT) {
+ c->current_const[i].index = src->Index;
+
+#if 0
+ printf(" fetch const[%d] for arg %d into reg %d\n",
+ src->Index, i, c->current_const[i].reg.nr);
+#endif
+
+ /* need to fetch the constant now */
+ brw_dp_READ_4(p,
+ c->current_const[i].reg, /* writeback dest */
+ src->RelAddr, /* relative indexing? */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */
+ );
+ }
+ }
+}
+
+
+/**
+ * Convert Mesa dst register to brw register.
+ */
+static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst,
+ GLuint component)
+{
+ const int nr = 1;
+ return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr,
+ 0, 0);
+}
+
+
+static struct brw_reg
+get_src_reg_const(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst,
+ GLuint srcRegIndex, GLuint component)
+{
+ /* We should have already fetched the constant from the constant
+ * buffer in fetch_constants(). Now we just have to return a
+ * register description that extracts the needed component and
+ * smears it across all eight vector components.
+ */
+ const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+ struct brw_reg const_reg;
+
+ assert(component < 4);
+ assert(srcRegIndex < 3);
+ assert(c->current_const[srcRegIndex].index != -1);
+ const_reg = c->current_const[srcRegIndex].reg;
+
+ /* extract desired float from the const_reg, and smear */
+ const_reg = stride(const_reg, 0, 1, 0);
+ const_reg.subnr = component * 4;
+
+ if (src->Negate)
+ const_reg = negate(const_reg);
+ if (src->Abs)
+ const_reg = brw_abs(const_reg);
+
+#if 0
+ printf(" form const[%d].%d for arg %d, reg %d\n",
+ c->current_const[srcRegIndex].index,
+ component,
+ srcRegIndex,
+ const_reg.nr);
+#endif
+
+ return const_reg;
+}
+
+
+/**
+ * Convert Mesa src register to brw register.
+ */
+static struct brw_reg get_src_reg(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst,
+ GLuint srcRegIndex, GLuint channel)
+{
+ const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+ const GLuint nr = 1;
+ const GLuint component = BRW_GET_SWZ(src->Swizzle, channel);
+
+ /* Extended swizzle terms */
+ if (component == SWIZZLE_ZERO) {
+ return brw_imm_f(0.0F);
+ }
+ else if (component == SWIZZLE_ONE) {
+ return brw_imm_f(1.0F);
+ }
+
+ if (c->fp->use_const_buffer &&
+ (src->File == TGSI_FILE_STATE_VAR ||
+ src->File == TGSI_FILE_CONSTANT ||
+ src->File == TGSI_FILE_UNIFORM)) {
+ return get_src_reg_const(c, inst, srcRegIndex, component);
+ }
+ else {
+ /* other type of source register */
+ return get_reg(c, src->File, src->Index, component, nr,
+ src->Negate, src->Abs);
+ }
+}
+
+
+/**
+ * Same as \sa get_src_reg() but if the register is a immediate, emit
+ * a brw_reg encoding the immediate.
+ * Note that a brw instruction only allows one src operand to be a immediate.
+ * For instructions with more than one operand, only the second can be a
+ * immediate. This means that we treat some immediates as constants
+ * (which why TGSI_FILE_IMMEDIATE is checked in fetch_constants()).
+ *
+ */
+static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst,
+ GLuint srcRegIndex, GLuint channel)
+{
+ const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+ if (src->File == TGSI_FILE_IMMEDIATE) {
+ /* an immediate */
+ const int component = BRW_GET_SWZ(src->Swizzle, channel);
+ const GLfloat *param =
+ c->fp->program.Base.Parameters->ParameterValues[src->Index];
+ GLfloat value = param[component];
+ if (src->Negate)
+ value = -value;
+ if (src->Abs)
+ value = FABSF(value);
+#if 0
+ printf(" form immed value %f for chan %d\n", value, channel);
+#endif
+ return brw_imm_f(value);
+ }
+ else {
+ return get_src_reg(c, inst, srcRegIndex, channel);
+ }
+}
+
+
+/**
+ * Subroutines are minimal support for resusable instruction sequences.
+ * They are implemented as simply as possible to minimise overhead: there
+ * is no explicit support for communication between the caller and callee
+ * other than saving the return address in a temporary register, nor is
+ * there any automatic local storage. This implies that great care is
+ * required before attempting reentrancy or any kind of nested
+ * subroutine invocations.
+ */
+static void invoke_subroutine( struct brw_wm_compile *c,
+ enum _subroutine subroutine,
+ void (*emit)( struct brw_wm_compile * ) )
+{
+ struct brw_compile *p = &c->func;
+
+ assert( subroutine < BRW_WM_MAX_SUBROUTINE );
+
+ if( c->subroutines[ subroutine ] ) {
+ /* subroutine previously emitted: reuse existing instructions */
+
+ int mark = mark_tmps( c );
+ struct brw_reg return_address = retype( alloc_tmp( c ),
+ BRW_REGISTER_TYPE_UD );
+ int here = p->nr_insn;
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
+
+ brw_ADD( p, brw_ip_reg(), brw_ip_reg(),
+ brw_imm_d( ( c->subroutines[ subroutine ] -
+ here - 1 ) << 4 ) );
+ brw_pop_insn_state(p);
+
+ release_tmps( c, mark );
+ } else {
+ /* previously unused subroutine: emit, and mark for later reuse */
+
+ int mark = mark_tmps( c );
+ struct brw_reg return_address = retype( alloc_tmp( c ),
+ BRW_REGISTER_TYPE_UD );
+ struct brw_instruction *calc;
+ int base = p->nr_insn;
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ calc = brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 0 ) );
+ brw_pop_insn_state(p);
+
+ c->subroutines[ subroutine ] = p->nr_insn;
+
+ emit( c );
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV( p, brw_ip_reg(), return_address );
+ brw_pop_insn_state(p);
+
+ brw_set_src1( calc, brw_imm_ud( ( p->nr_insn - base ) << 4 ) );
+
+ release_tmps( c, mark );
+ }
+}
+
+static void emit_trunc( struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ int i;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ struct brw_reg src, dst;
+ dst = get_dst_reg(c, inst, i);
+ src = get_src_reg(c, inst, 0, i);
+ brw_RNDZ(p, dst, src);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_mov( struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ int i;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ struct brw_reg src, dst;
+ dst = get_dst_reg(c, inst, i);
+ /* XXX some moves from immediate value don't work reliably!!! */
+ /*src = get_src_reg_imm(c, inst, 0, i);*/
+ src = get_src_reg(c, inst, 0, i);
+ brw_MOV(p, dst, src);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_pixel_xy(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+ struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
+
+ struct brw_reg dst0, dst1;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+
+ dst0 = get_dst_reg(c, inst, 0);
+ dst1 = get_dst_reg(c, inst, 1);
+ /* Calculate pixel centers by adding 1 or 0 to each of the
+ * micro-tile coordinates passed in r1.
+ */
+ if (mask & WRITEMASK_X) {
+ brw_ADD(p,
+ vec8(retype(dst0, BRW_REGISTER_TYPE_UW)),
+ stride(suboffset(r1_uw, 4), 2, 4, 0),
+ brw_imm_v(0x10101010));
+ }
+
+ if (mask & WRITEMASK_Y) {
+ brw_ADD(p,
+ vec8(retype(dst1, BRW_REGISTER_TYPE_UW)),
+ stride(suboffset(r1_uw, 5), 2, 4, 0),
+ brw_imm_v(0x11001100));
+ }
+}
+
+static void emit_delta_xy(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+ struct brw_reg dst0, dst1, src0, src1;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+
+ dst0 = get_dst_reg(c, inst, 0);
+ dst1 = get_dst_reg(c, inst, 1);
+ src0 = get_src_reg(c, inst, 0, 0);
+ src1 = get_src_reg(c, inst, 0, 1);
+ /* Calc delta X,Y by subtracting origin in r1 from the pixel
+ * centers.
+ */
+ if (mask & WRITEMASK_X) {
+ brw_ADD(p,
+ dst0,
+ retype(src0, BRW_REGISTER_TYPE_UW),
+ negate(r1));
+ }
+
+ if (mask & WRITEMASK_Y) {
+ brw_ADD(p,
+ dst1,
+ retype(src1, BRW_REGISTER_TYPE_UW),
+ negate(suboffset(r1,1)));
+
+ }
+}
+
+static void fire_fb_write( struct brw_wm_compile *c,
+ GLuint base_reg,
+ GLuint nr,
+ GLuint target,
+ GLuint eot)
+{
+ struct brw_compile *p = &c->func;
+ /* Pass through control information:
+ */
+ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
+ {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
+ brw_MOV(p,
+ brw_message_reg(base_reg + 1),
+ brw_vec8_grf(1, 0));
+ brw_pop_insn_state(p);
+ }
+ /* Send framebuffer write message: */
+ brw_fb_WRITE(p,
+ retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+ base_reg,
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+ target,
+ nr,
+ 0,
+ eot);
+}
+
+static void emit_fb_write(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ int nr = 2;
+ int channel;
+ GLuint target, eot;
+ struct brw_reg src0;
+
+ /* Reserve a space for AA - may not be needed:
+ */
+ if (c->key.aa_dest_stencil_reg)
+ nr += 1;
+
+ brw_push_insn_state(p);
+ for (channel = 0; channel < 4; channel++) {
+ src0 = get_src_reg(c, inst, 0, channel);
+ /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
+ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
+ brw_MOV(p, brw_message_reg(nr + channel), src0);
+ }
+ /* skip over the regs populated above: */
+ nr += 8;
+ brw_pop_insn_state(p);
+
+ if (c->key.source_depth_to_render_target) {
+ if (c->key.computes_depth) {
+ src0 = get_src_reg(c, inst, 2, 2);
+ brw_MOV(p, brw_message_reg(nr), src0);
+ }
+ else {
+ src0 = get_src_reg(c, inst, 1, 1);
+ brw_MOV(p, brw_message_reg(nr), src0);
+ }
+
+ nr += 2;
+ }
+
+ if (c->key.dest_depth_reg) {
+ const GLuint comp = c->key.dest_depth_reg / 2;
+ const GLuint off = c->key.dest_depth_reg % 2;
+
+ if (off != 0) {
+ /* XXX this code needs review/testing */
+ struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp);
+ struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1);
+
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1));
+ /* 2nd half? */
+ brw_MOV(p, brw_message_reg(nr+1), arg1_1);
+ brw_pop_insn_state(p);
+ }
+ else
+ {
+ struct brw_reg src = get_src_reg(c, inst, 1, 1);
+ brw_MOV(p, brw_message_reg(nr), src);
+ }
+ nr += 2;
+ }
+
+ target = inst->Aux >> 1;
+ eot = inst->Aux & 1;
+ fire_fb_write(c, 0, nr, target, eot);
+}
+
+static void emit_pixel_w( struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ if (mask & WRITEMASK_W) {
+ struct brw_reg dst, src0, delta0, delta1;
+ struct brw_reg interp3;
+
+ dst = get_dst_reg(c, inst, 3);
+ src0 = get_src_reg(c, inst, 0, 0);
+ delta0 = get_src_reg(c, inst, 1, 0);
+ delta1 = get_src_reg(c, inst, 1, 1);
+
+ interp3 = brw_vec1_grf(src0.nr+1, 4);
+ /* Calc 1/w - just linterp wpos[3] optimized by putting the
+ * result straight into a message reg.
+ */
+ brw_LINE(p, brw_null_reg(), interp3, delta0);
+ brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1);
+
+ /* Calc w */
+ brw_math_16( p, dst,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 2, brw_null_reg(),
+ BRW_MATH_PRECISION_FULL);
+ }
+}
+
+static void emit_linterp(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ struct brw_reg interp[4];
+ struct brw_reg dst, delta0, delta1;
+ struct brw_reg src0;
+ GLuint nr, i;
+
+ src0 = get_src_reg(c, inst, 0, 0);
+ delta0 = get_src_reg(c, inst, 1, 0);
+ delta1 = get_src_reg(c, inst, 1, 1);
+ nr = src0.nr;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_LINE(p, brw_null_reg(), interp[i], delta0);
+ brw_MAC(p, dst, suboffset(interp[i],1), delta1);
+ }
+ }
+}
+
+static void emit_cinterp(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+
+ struct brw_reg interp[4];
+ struct brw_reg dst, src0;
+ GLuint nr, i;
+
+ src0 = get_src_reg(c, inst, 0, 0);
+ nr = src0.nr;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_MOV(p, dst, suboffset(interp[i],3));
+ }
+ }
+}
+
+static void emit_pinterp(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+
+ struct brw_reg interp[4];
+ struct brw_reg dst, delta0, delta1;
+ struct brw_reg src0, w;
+ GLuint nr, i;
+
+ src0 = get_src_reg(c, inst, 0, 0);
+ delta0 = get_src_reg(c, inst, 1, 0);
+ delta1 = get_src_reg(c, inst, 1, 1);
+ w = get_src_reg(c, inst, 2, 3);
+ nr = src0.nr;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_LINE(p, brw_null_reg(), interp[i], delta0);
+ brw_MAC(p, dst, suboffset(interp[i],1),
+ delta1);
+ brw_MUL(p, dst, dst, w);
+ }
+ }
+}
+
+/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
+static void emit_frontfacing(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
+ struct brw_reg dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_MOV(p, dst, brw_imm_f(0.0));
+ }
+ }
+
+ /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
+ * us front face
+ */
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_MOV(p, dst, brw_imm_f(1.0));
+ }
+ }
+ brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+static void emit_xpd(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ int i;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ for (i = 0; i < 4; i++) {
+ GLuint i2 = (i+2)%3;
+ GLuint i1 = (i+1)%3;
+ if (mask & (1<<i)) {
+ struct brw_reg src0, src1, dst;
+ dst = get_dst_reg(c, inst, i);
+ src0 = negate(get_src_reg(c, inst, 0, i2));
+ src1 = get_src_reg_imm(c, inst, 1, i1);
+ brw_MUL(p, brw_null_reg(), src0, src1);
+ src0 = get_src_reg(c, inst, 0, i1);
+ src1 = get_src_reg_imm(c, inst, 1, i2);
+ brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+ brw_MAC(p, dst, src0, src1);
+ brw_set_saturate(p, 0);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_dp3(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_reg src0[3], src1[3], dst;
+ int i;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ for (i = 0; i < 3; i++) {
+ src0[i] = get_src_reg(c, inst, 0, i);
+ src1[i] = get_src_reg_imm(c, inst, 1, i);
+ }
+
+ dst = get_dst_reg(c, inst, dst_chan);
+ brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+ brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_MAC(p, dst, src0[2], src1[2]);
+ brw_set_saturate(p, 0);
+}
+
+static void emit_dp4(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_reg src0[4], src1[4], dst;
+ int i;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ for (i = 0; i < 4; i++) {
+ src0[i] = get_src_reg(c, inst, 0, i);
+ src1[i] = get_src_reg_imm(c, inst, 1, i);
+ }
+ dst = get_dst_reg(c, inst, dst_chan);
+ brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+ brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+ brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_MAC(p, dst, src0[3], src1[3]);
+ brw_set_saturate(p, 0);
+}
+
+static void emit_dph(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_reg src0[4], src1[4], dst;
+ int i;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ for (i = 0; i < 4; i++) {
+ src0[i] = get_src_reg(c, inst, 0, i);
+ src1[i] = get_src_reg_imm(c, inst, 1, i);
+ }
+ dst = get_dst_reg(c, inst, dst_chan);
+ brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+ brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+ brw_MAC(p, dst, src0[2], src1[2]);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_ADD(p, dst, dst, src1[3]);
+ brw_set_saturate(p, 0);
+}
+
+/**
+ * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
+ * Note that the result of the function is smeared across the dest
+ * register's X, Y, Z and W channels (subject to writemasking of course).
+ */
+static void emit_math1(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst, GLuint func)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ /* Get first component of source register */
+ dst = get_dst_reg(c, inst, dst_chan);
+ src0 = get_src_reg(c, inst, 0, 0);
+
+ brw_MOV(p, brw_message_reg(2), src0);
+ brw_math(p,
+ dst,
+ func,
+ (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+}
+
+static void emit_rcp(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
+}
+
+static void emit_rsq(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
+}
+
+static void emit_sin(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
+}
+
+static void emit_cos(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
+}
+
+static void emit_ex2(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
+}
+
+static void emit_lg2(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
+}
+
+static void emit_add(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
+ brw_ADD(p, dst, src0, src1);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_arl(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, addr_reg;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ADDRESS, 0);
+ src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */
+ brw_MOV(p, addr_reg, src0);
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_mul(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
+ brw_MUL(p, dst, src0, src1);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_frc(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg_imm(c, inst, 0, i);
+ brw_FRC(p, dst, src0);
+ }
+ }
+ if (inst->SaturateMode != SATURATE_OFF)
+ brw_set_saturate(p, 0);
+}
+
+static void emit_flr(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg_imm(c, inst, 0, i);
+ brw_RNDD(p, dst, src0);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_min_max(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ const GLuint mask = inst->DstReg.WriteMask;
+ const int mark = mark_tmps(c);
+ int i;
+ brw_push_insn_state(p);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ struct brw_reg real_dst = get_dst_reg(c, inst, i);
+ struct brw_reg src0 = get_src_reg(c, inst, 0, i);
+ struct brw_reg src1 = get_src_reg(c, inst, 1, i);
+ struct brw_reg dst;
+ /* if dst==src0 or dst==src1 we need to use a temp reg */
+ GLboolean use_temp = brw_same_reg(dst, src0) ||
+ brw_same_reg(dst, src1);
+ if (use_temp)
+ dst = alloc_tmp(c);
+ else
+ dst = real_dst;
+
+ /*
+ printf(" Min/max: dst %d src0 %d src1 %d\n",
+ dst.nr, src0.nr, src1.nr);
+ */
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_MOV(p, dst, src0);
+ brw_set_saturate(p, 0);
+
+ if (inst->Opcode == OPCODE_MIN)
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
+ else
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0);
+
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_MOV(p, dst, src1);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ if (use_temp)
+ brw_MOV(p, real_dst, dst);
+ }
+ }
+ brw_pop_insn_state(p);
+ release_tmps(c, mark);
+}
+
+static void emit_pow(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg dst, src0, src1;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ dst = get_dst_reg(c, inst, dst_chan);
+ src0 = get_src_reg_imm(c, inst, 0, 0);
+ src1 = get_src_reg_imm(c, inst, 1, 0);
+
+ brw_MOV(p, brw_message_reg(2), src0);
+ brw_MOV(p, brw_message_reg(3), src1);
+
+ brw_math(p,
+ dst,
+ BRW_MATH_FUNCTION_POW,
+ (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+}
+
+static void emit_lrp(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
+ int i;
+ int mark = mark_tmps(c);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+
+ src1 = get_src_reg_imm(c, inst, 1, i);
+
+ if (src1.nr == dst.nr) {
+ tmp1 = alloc_tmp(c);
+ brw_MOV(p, tmp1, src1);
+ } else
+ tmp1 = src1;
+
+ src2 = get_src_reg(c, inst, 2, i);
+ if (src2.nr == dst.nr) {
+ tmp2 = alloc_tmp(c);
+ brw_MOV(p, tmp2, src2);
+ } else
+ tmp2 = src2;
+
+ brw_ADD(p, dst, negate(src0), brw_imm_f(1.0));
+ brw_MUL(p, brw_null_reg(), dst, tmp2);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_MAC(p, dst, src0, tmp1);
+ brw_set_saturate(p, 0);
+ }
+ release_tmps(c, mark);
+ }
+}
+
+/**
+ * For GLSL shaders, this KIL will be unconditional.
+ * It may be contained inside an IF/ENDIF structure of course.
+ */
+static void emit_kil(struct brw_wm_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+ brw_AND(p, depth, c->emit_mask_reg, depth);
+ brw_pop_insn_state(p);
+}
+
+static void emit_mad(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ struct brw_reg dst, src0, src1, src2;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
+ src2 = get_src_reg_imm(c, inst, 2, i);
+ brw_MUL(p, dst, src0, src1);
+
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_ADD(p, dst, dst, src2);
+ brw_set_saturate(p, 0);
+ }
+ }
+}
+
+static void emit_sop(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst, GLuint cond)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ struct brw_reg dst, src0, src1;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
+ brw_push_insn_state(p);
+ brw_CMP(p, brw_null_reg(), cond, src0, src1);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_MOV(p, dst, brw_imm_f(0.0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_MOV(p, dst, brw_imm_f(1.0));
+ brw_pop_insn_state(p);
+ }
+ }
+}
+
+static void emit_slt(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_L);
+}
+
+static void emit_sle(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_LE);
+}
+
+static void emit_sgt(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_G);
+}
+
+static void emit_sge(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_GE);
+}
+
+static void emit_seq(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_EQ);
+}
+
+static void emit_sne(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
+}
+
+static INLINE struct brw_reg high_words( struct brw_reg reg )
+{
+ return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
+ 0, 8, 2 );
+}
+
+static INLINE struct brw_reg low_words( struct brw_reg reg )
+{
+ return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 );
+}
+
+static INLINE struct brw_reg even_bytes( struct brw_reg reg )
+{
+ return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 );
+}
+
+static INLINE struct brw_reg odd_bytes( struct brw_reg reg )
+{
+ return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ),
+ 0, 16, 2 );
+}
+
+
+
+static void emit_wpos_xy(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ struct brw_reg src0[2], dst[2];
+
+ dst[0] = get_dst_reg(c, inst, 0);
+ dst[1] = get_dst_reg(c, inst, 1);
+
+ src0[0] = get_src_reg(c, inst, 0, 0);
+ src0[1] = get_src_reg(c, inst, 0, 1);
+
+ /* Calculate the pixel offset from window bottom left into destination
+ * X and Y channels.
+ */
+ if (mask & WRITEMASK_X) {
+ /* X' = X */
+ brw_MOV(p,
+ dst[0],
+ retype(src0[0], BRW_REGISTER_TYPE_W));
+ }
+
+ if (mask & WRITEMASK_Y) {
+ /* Y' = height - 1 - Y */
+ brw_ADD(p,
+ dst[1],
+ negate(retype(src0[1], BRW_REGISTER_TYPE_W)),
+ brw_imm_d(c->key.drawable_height - 1));
+ }
+}
+
+/* TODO
+ BIAS on SIMD8 not working yet...
+ */
+static void emit_txb(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg dst[4], src[4], payload_reg;
+ /* Note: tex_unit was already looked up through SamplerTextures[] */
+ const GLuint unit = inst->tex_unit;
+ GLuint i;
+ GLuint msg_type;
+
+ assert(unit < BRW_MAX_TEX_UNIT);
+
+ payload_reg = get_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
+
+ for (i = 0; i < 4; i++)
+ dst[i] = get_dst_reg(c, inst, i);
+ for (i = 0; i < 4; i++)
+ src[i] = get_src_reg(c, inst, 0, i);
+
+ switch (inst->tex_target) {
+ case TEXTURE_1D_INDEX:
+ brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */
+ brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */
+ break;
+ case TEXTURE_2D_INDEX:
+ case TEXTURE_RECT_INDEX:
+ brw_MOV(p, brw_message_reg(2), src[0]);
+ brw_MOV(p, brw_message_reg(3), src[1]);
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+ break;
+ case TEXTURE_3D_INDEX:
+ case TEXTURE_CUBE_INDEX:
+ brw_MOV(p, brw_message_reg(2), src[0]);
+ brw_MOV(p, brw_message_reg(3), src[1]);
+ brw_MOV(p, brw_message_reg(4), src[2]);
+ break;
+ default:
+ /* invalid target */
+ abort();
+ }
+ brw_MOV(p, brw_message_reg(5), src[3]); /* bias */
+ brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
+
+ if (BRW_IS_IGDNG(p->brw)) {
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG;
+ } else {
+ /* Does it work well on SIMD8? */
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+ }
+
+ brw_SAMPLE(p,
+ retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
+ 1, /* msg_reg_nr */
+ retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
+ SURF_INDEX_TEXTURE(unit),
+ unit, /* sampler */
+ inst->DstReg.WriteMask, /* writemask */
+ msg_type, /* msg_type */
+ 4, /* response_length */
+ 4, /* msg_length */
+ 0, /* eot */
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD8);
+}
+
+
+static void emit_tex(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg dst[4], src[4], payload_reg;
+ /* Note: tex_unit was already looked up through SamplerTextures[] */
+ const GLuint unit = inst->tex_unit;
+ GLuint msg_len;
+ GLuint i, nr;
+ GLuint emit;
+ GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0;
+ GLuint msg_type;
+
+ assert(unit < BRW_MAX_TEX_UNIT);
+
+ payload_reg = get_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
+
+ for (i = 0; i < 4; i++)
+ dst[i] = get_dst_reg(c, inst, i);
+ for (i = 0; i < 4; i++)
+ src[i] = get_src_reg(c, inst, 0, i);
+
+ switch (inst->tex_target) {
+ case TEXTURE_1D_INDEX:
+ emit = WRITEMASK_X;
+ nr = 1;
+ break;
+ case TEXTURE_2D_INDEX:
+ case TEXTURE_RECT_INDEX:
+ emit = WRITEMASK_XY;
+ nr = 2;
+ break;
+ case TEXTURE_3D_INDEX:
+ case TEXTURE_CUBE_INDEX:
+ emit = WRITEMASK_XYZ;
+ nr = 3;
+ break;
+ default:
+ /* invalid target */
+ abort();
+ }
+ msg_len = 1;
+
+ /* move/load S, T, R coords */
+ for (i = 0; i < nr; i++) {
+ static const GLuint swz[4] = {0,1,2,2};
+ if (emit & (1<<i))
+ brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]);
+ else
+ brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0));
+ msg_len += 1;
+ }
+
+ if (shadow) {
+ brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
+ brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */
+ }
+
+ if (BRW_IS_IGDNG(p->brw)) {
+ if (shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG;
+ } else {
+ /* Does it work for shadow on SIMD8 ? */
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
+ }
+
+ brw_SAMPLE(p,
+ retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
+ 1, /* msg_reg_nr */
+ retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
+ SURF_INDEX_TEXTURE(unit),
+ unit, /* sampler */
+ inst->DstReg.WriteMask, /* writemask */
+ msg_type, /* msg_type */
+ 4, /* response_length */
+ shadow ? 6 : 4, /* msg_length */
+ 0, /* eot */
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD8);
+
+ if (shadow)
+ brw_MOV(p, dst[3], brw_imm_f(1.0));
+}
+
+
+/**
+ * Resolve subroutine calls after code emit is done.
+ */
+static void post_wm_emit( struct brw_wm_compile *c )
+{
+ brw_resolve_cals(&c->func);
+}
+
+static void
+get_argument_regs(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst,
+ int index,
+ struct brw_reg *regs,
+ int mask)
+{
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1 << i))
+ regs[i] = get_src_reg(c, inst, index, i);
+ }
+}
+
+static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_compile *c)
+{
+#define MAX_IF_DEPTH 32
+#define MAX_LOOP_DEPTH 32
+ struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
+ GLuint i, if_depth = 0, loop_depth = 0;
+ struct brw_compile *p = &c->func;
+ struct brw_indirect stack_index = brw_indirect(0, 0);
+
+ c->out_of_regs = GL_FALSE;
+
+ prealloc_reg(c);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
+
+ for (i = 0; i < c->nr_fp_insns; i++) {
+ const struct brw_fp_instruction *inst = &c->fp_instructions[i];
+ int dst_flags;
+ struct brw_reg args[3][4], dst[4];
+ int j;
+
+ c->cur_inst = i;
+
+#if 0
+ debug_printf("Inst %d: ", i);
+ _mesa_print_instruction(inst);
+#endif
+
+ /* fetch any constants that this instruction needs */
+ if (c->fp->use_const_buffer)
+ fetch_constants(c, inst);
+
+ if (inst->CondUpdate)
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ else
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+
+ dst_flags = inst->DstReg.WriteMask;
+ if (inst->SaturateMode == SATURATE_ZERO_ONE)
+ dst_flags |= SATURATE;
+
+ switch (inst->Opcode) {
+ case WM_PIXELXY:
+ emit_pixel_xy(c, inst);
+ break;
+ case WM_DELTAXY:
+ emit_delta_xy(c, inst);
+ break;
+ case WM_PIXELW:
+ emit_pixel_w(c, inst);
+ break;
+ case WM_LINTERP:
+ emit_linterp(c, inst);
+ break;
+ case WM_PINTERP:
+ emit_pinterp(c, inst);
+ break;
+ case WM_CINTERP:
+ emit_cinterp(c, inst);
+ break;
+ case WM_WPOSXY:
+ emit_wpos_xy(c, inst);
+ break;
+ case WM_FB_WRITE:
+ emit_fb_write(c, inst);
+ break;
+ case WM_FRONTFACING:
+ emit_frontfacing(c, inst);
+ break;
+ case OPCODE_ADD:
+ emit_add(c, inst);
+ break;
+ case OPCODE_ARL:
+ emit_arl(c, inst);
+ break;
+ case OPCODE_FRC:
+ emit_frc(c, inst);
+ break;
+ case OPCODE_FLR:
+ emit_flr(c, inst);
+ break;
+ case OPCODE_LRP:
+ emit_lrp(c, inst);
+ break;
+ case OPCODE_TRUNC:
+ emit_trunc(c, inst);
+ break;
+ case OPCODE_MOV:
+ emit_mov(c, inst);
+ break;
+ case OPCODE_DP3:
+ emit_dp3(c, inst);
+ break;
+ case OPCODE_DP4:
+ emit_dp4(c, inst);
+ break;
+ case OPCODE_XPD:
+ emit_xpd(c, inst);
+ break;
+ case OPCODE_DPH:
+ emit_dph(c, inst);
+ break;
+ case OPCODE_RCP:
+ emit_rcp(c, inst);
+ break;
+ case OPCODE_RSQ:
+ emit_rsq(c, inst);
+ break;
+ case OPCODE_SIN:
+ emit_sin(c, inst);
+ break;
+ case OPCODE_COS:
+ emit_cos(c, inst);
+ break;
+ case OPCODE_EX2:
+ emit_ex2(c, inst);
+ break;
+ case OPCODE_LG2:
+ emit_lg2(c, inst);
+ break;
+ case OPCODE_MIN:
+ case OPCODE_MAX:
+ emit_min_max(c, inst);
+ break;
+ case OPCODE_DDX:
+ case OPCODE_DDY:
+ for (j = 0; j < 4; j++) {
+ if (inst->DstReg.WriteMask & (1 << j))
+ dst[j] = get_dst_reg(c, inst, j);
+ else
+ dst[j] = brw_null_reg();
+ }
+ get_argument_regs(c, inst, 0, args[0], WRITEMASK_XYZW);
+ emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX),
+ args[0]);
+ break;
+ case OPCODE_SLT:
+ emit_slt(c, inst);
+ break;
+ case OPCODE_SLE:
+ emit_sle(c, inst);
+ break;
+ case OPCODE_SGT:
+ emit_sgt(c, inst);
+ break;
+ case OPCODE_SGE:
+ emit_sge(c, inst);
+ break;
+ case OPCODE_SEQ:
+ emit_seq(c, inst);
+ break;
+ case OPCODE_SNE:
+ emit_sne(c, inst);
+ break;
+ case OPCODE_MUL:
+ emit_mul(c, inst);
+ break;
+ case OPCODE_POW:
+ emit_pow(c, inst);
+ break;
+ case OPCODE_MAD:
+ emit_mad(c, inst);
+ break;
+ case OPCODE_TEX:
+ emit_tex(c, inst);
+ break;
+ case OPCODE_TXB:
+ emit_txb(c, inst);
+ break;
+ case OPCODE_KIL_NV:
+ emit_kil(c);
+ break;
+ case OPCODE_IF:
+ assert(if_depth < MAX_IF_DEPTH);
+ if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
+ break;
+ case OPCODE_ELSE:
+ if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
+ break;
+ case OPCODE_ENDIF:
+ assert(if_depth > 0);
+ brw_ENDIF(p, if_inst[--if_depth]);
+ break;
+ case OPCODE_BGNSUB:
+ brw_save_label(p, inst->Comment, p->nr_insn);
+ break;
+ case OPCODE_ENDSUB:
+ /* no-op */
+ break;
+ case OPCODE_CAL:
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_ADD(p, get_addr_reg(stack_index),
+ get_addr_reg(stack_index), brw_imm_d(4));
+ brw_save_call(&c->func, inst->label, p->nr_insn);
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ brw_pop_insn_state(p);
+ break;
+
+ case OPCODE_RET:
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_ADD(p, get_addr_reg(stack_index),
+ get_addr_reg(stack_index), brw_imm_d(-4));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_pop_insn_state(p);
+
+ break;
+ case OPCODE_BGNLOOP:
+ /* XXX may need to invalidate the current_constant regs */
+ loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
+ break;
+ case OPCODE_BRK:
+ brw_BREAK(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case OPCODE_CONT:
+ brw_CONT(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case OPCODE_ENDLOOP:
+ {
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(brw))
+ br = 2;
+
+ loop_depth--;
+ inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+ /* patch all the BREAK/CONT instructions from last BGNLOOP */
+ while (inst0 > loop_inst[loop_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ }
+ }
+ break;
+ default:
+ debug_printf("unsupported IR in fragment shader %d\n",
+ inst->Opcode);
+ }
+
+ if (inst->CondUpdate)
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ else
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ post_wm_emit(c);
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ debug_printf("wm-native:\n");
+ brw_disasm(stderr, p->store, p->nr_insn);
+ }
+}
+
+/**
+ * Do GPU code generation for shaders that use GLSL features such as
+ * flow control. Other shaders will be compiled with the
+ */
+void brw_wm_branching_shader_emit(struct brw_context *brw, struct brw_wm_compile *c)
+{
+ if (BRW_DEBUG & DEBUG_WM) {
+ debug_printf("%s:\n", __FUNCTION__);
+ }
+
+ /* initial instruction translation/simplification */
+ brw_wm_pass_fp(c);
+
+ /* actual code generation */
+ brw_wm_emit_branching_shader(brw, c);
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "brw_wm_branching_shader_emit done");
+ }
+
+ c->prog_data.total_grf = num_grf_used(c);
+ c->prog_data.total_scratch = 0;
+}
diff --git a/src/gallium/drivers/i965/brw_wm_iz.c b/src/gallium/drivers/i965/brw_wm_iz.c
new file mode 100644
index 00000000000..6f1e9fcc3c1
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_iz.c
@@ -0,0 +1,156 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_wm.h"
+
+
+#undef P /* prompted depth */
+#undef C /* computed */
+#undef N /* non-promoted? */
+
+#define P 0
+#define C 1
+#define N 2
+
+const struct {
+ GLuint mode:2;
+ GLuint sd_present:1;
+ GLuint sd_to_rt:1;
+ GLuint dd_present:1;
+ GLuint ds_present:1;
+} wm_iz_table[IZ_BIT_MAX] =
+{
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 1 },
+ { N, 0, 1, 0, 1 },
+ { N, 0, 1, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 0, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 1, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 }
+};
+
+/**
+ * \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES
+ * \param lookup bitmask of IZ_* flags
+ */
+void brw_wm_lookup_iz( GLuint line_aa,
+ GLuint lookup,
+ GLboolean ps_uses_depth,
+ struct brw_wm_prog_key *key )
+{
+ GLuint reg = 2;
+
+ assert (lookup < IZ_BIT_MAX);
+
+ if (lookup & IZ_PS_COMPUTES_DEPTH_BIT)
+ key->computes_depth = 1;
+
+ if (wm_iz_table[lookup].sd_present || ps_uses_depth) {
+ key->source_depth_reg = reg;
+ reg += 2;
+ }
+
+ if (wm_iz_table[lookup].sd_to_rt)
+ key->source_depth_to_render_target = 1;
+
+ if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) {
+ key->aa_dest_stencil_reg = reg;
+ key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present &&
+ line_aa == AA_SOMETIMES);
+ reg++;
+ }
+
+ if (wm_iz_table[lookup].dd_present) {
+ key->dest_depth_reg = reg;
+ reg+=2;
+ }
+
+ key->nr_depth_regs = (reg+1)/2;
+}
+
diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c
new file mode 100644
index 00000000000..0bacad2b0f0
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_pass0.c
@@ -0,0 +1,366 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "brw_debug.h"
+#include "brw_wm.h"
+
+
+
+/***********************************************************************
+ */
+
+static struct brw_wm_ref *get_ref( struct brw_wm_compile *c )
+{
+ assert(c->nr_refs < BRW_WM_MAX_REF);
+ return &c->refs[c->nr_refs++];
+}
+
+static struct brw_wm_value *get_value( struct brw_wm_compile *c)
+{
+ assert(c->nr_refs < BRW_WM_MAX_VREG);
+ return &c->vreg[c->nr_vreg++];
+}
+
+/** return pointer to a newly allocated instruction */
+static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c )
+{
+ assert(c->nr_insns < BRW_WM_MAX_INSN);
+ return &c->instruction[c->nr_insns++];
+}
+
+/***********************************************************************
+ */
+
+/** Init the "undef" register */
+static void pass0_init_undef( struct brw_wm_compile *c)
+{
+ struct brw_wm_ref *ref = &c->undef_ref;
+ ref->value = &c->undef_value;
+ ref->hw_reg = brw_vec8_grf(0, 0);
+ ref->insn = 0;
+ ref->prevuse = NULL;
+}
+
+/** Set a FP register to a value */
+static void pass0_set_fpreg_value( struct brw_wm_compile *c,
+ GLuint file,
+ GLuint idx,
+ GLuint component,
+ struct brw_wm_value *value )
+{
+ struct brw_wm_ref *ref = get_ref(c);
+ ref->value = value;
+ ref->hw_reg = brw_vec8_grf(0, 0);
+ ref->insn = 0;
+ ref->prevuse = NULL;
+ c->pass0_fp_reg[file][idx][component] = ref;
+}
+
+/** Set a FP register to a ref */
+static void pass0_set_fpreg_ref( struct brw_wm_compile *c,
+ GLuint file,
+ GLuint idx,
+ GLuint component,
+ const struct brw_wm_ref *src_ref )
+{
+ c->pass0_fp_reg[file][idx][component] = src_ref;
+}
+
+static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c,
+ unsigned idx,
+ unsigned component)
+{
+ GLuint i = idx * 4 + component;
+
+ if (i >= BRW_WM_MAX_PARAM) {
+ debug_printf("%s: out of params\n", __FUNCTION__);
+ c->prog_data.error = 1;
+ return NULL;
+ }
+ else {
+ struct brw_wm_ref *ref = get_ref(c);
+
+ c->nr_creg = MAX2(c->nr_creg, (i+16)/16);
+
+ /* Push the offsets into hw_reg. These will be added to the
+ * real register numbers once one is allocated in pass2.
+ */
+ ref->hw_reg = brw_vec1_grf((i&8)?1:0, i%8);
+ ref->value = &c->creg[i/16];
+ ref->insn = 0;
+ ref->prevuse = NULL;
+
+ return ref;
+ }
+}
+
+
+
+
+/* Lookup our internal registers
+ */
+static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
+ GLuint file,
+ GLuint idx,
+ GLuint component )
+{
+ const struct brw_wm_ref *ref = c->pass0_fp_reg[file][idx][component];
+
+ if (!ref) {
+ switch (file) {
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_OUTPUT:
+ case BRW_FILE_PAYLOAD:
+ /* should already be done?? */
+ break;
+
+ case TGSI_FILE_CONSTANT:
+ ref = get_param_ref(c,
+ c->fp->info.immediate_count + idx,
+ component);
+ break;
+
+ case TGSI_FILE_IMMEDIATE:
+ ref = get_param_ref(c,
+ idx,
+ component);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ c->pass0_fp_reg[file][idx][component] = ref;
+ }
+
+ if (!ref)
+ ref = &c->undef_ref;
+
+ return ref;
+}
+
+
+
+/***********************************************************************
+ * Straight translation to internal instruction format
+ */
+
+static void pass0_set_dst( struct brw_wm_compile *c,
+ struct brw_wm_instruction *out,
+ const struct brw_fp_instruction *inst,
+ GLuint writemask )
+{
+ const struct brw_fp_dst dst = inst->dst;
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<<i)) {
+ out->dst[i] = get_value(c);
+ pass0_set_fpreg_value(c, dst.file, dst.index, i, out->dst[i]);
+ }
+ }
+
+ out->writemask = writemask;
+}
+
+
+static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c,
+ struct brw_fp_src src,
+ GLuint i )
+{
+ return pass0_get_reg(c, src.file, src.index, BRW_GET_SWZ(src.swizzle,i));
+}
+
+
+static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c,
+ struct brw_fp_src src,
+ GLuint i,
+ struct brw_wm_instruction *insn)
+{
+ const struct brw_wm_ref *ref = get_fp_src_reg_ref(c, src, i);
+ struct brw_wm_ref *newref = get_ref(c);
+
+ newref->value = ref->value;
+ newref->hw_reg = ref->hw_reg;
+
+ if (insn) {
+ newref->insn = insn - c->instruction;
+ newref->prevuse = newref->value->lastuse;
+ newref->value->lastuse = newref;
+ }
+
+ if (src.negate)
+ newref->hw_reg.negate ^= 1;
+
+ if (src.abs) {
+ newref->hw_reg.negate = 0;
+ newref->hw_reg.abs = 1;
+ }
+
+ return newref;
+}
+
+
+static void
+translate_insn(struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst)
+{
+ struct brw_wm_instruction *out = get_instruction(c);
+ GLuint writemask = inst->dst.writemask;
+ GLuint nr_args = brw_wm_nr_args(inst->opcode);
+ GLuint i, j;
+
+ /* Copy some data out of the instruction
+ */
+ out->opcode = inst->opcode;
+ out->saturate = inst->dst.saturate;
+ out->tex_unit = inst->tex_unit;
+ out->target = inst->target;
+
+ /* Nasty hack:
+ */
+ out->eot = (inst->opcode == WM_FB_WRITE &&
+ inst->tex_unit != 0);
+
+
+ /* Args:
+ */
+ for (i = 0; i < nr_args; i++) {
+ for (j = 0; j < 4; j++) {
+ out->src[i][j] = get_new_ref(c, inst->src[i], j, out);
+ }
+ }
+
+ /* Dst:
+ */
+ pass0_set_dst(c, out, inst, writemask);
+}
+
+
+
+/***********************************************************************
+ * Optimize moves and swizzles away:
+ */
+static void pass0_precalc_mov( struct brw_wm_compile *c,
+ const struct brw_fp_instruction *inst )
+{
+ const struct brw_fp_dst dst = inst->dst;
+ GLuint writemask = dst.writemask;
+ struct brw_wm_ref *refs[4];
+ GLuint i;
+
+ /* Get the effect of a MOV by manipulating our register table:
+ * First get all refs, then assign refs. This ensures that "in-place"
+ * swizzles such as:
+ * MOV t, t.xxyx
+ * are handled correctly. Previously, these two steps were done in
+ * one loop and the above case was incorrectly handled.
+ */
+ for (i = 0; i < 4; i++) {
+ refs[i] = get_new_ref(c, inst->src[0], i, NULL);
+ }
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1 << i)) {
+ pass0_set_fpreg_ref( c, dst.file, dst.index, i, refs[i]);
+ }
+ }
+}
+
+
+/* Initialize payload "registers".
+ */
+static void pass0_init_payload( struct brw_wm_compile *c )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ GLuint j = i >= c->key.nr_depth_regs ? 0 : i;
+ pass0_set_fpreg_value( c, BRW_FILE_PAYLOAD, PAYLOAD_DEPTH, i,
+ &c->payload.depth[j] );
+ }
+
+ for (i = 0; i < c->key.nr_inputs; i++)
+ pass0_set_fpreg_value( c, BRW_FILE_PAYLOAD, i, 0,
+ &c->payload.input_interp[i] );
+}
+
+
+/***********************************************************************
+ * PASS 0
+ *
+ * Work forwards to give each calculated value a unique number. Where
+ * an instruction produces duplicate values (eg DP3), all are given
+ * the same number.
+ *
+ * Translate away swizzling and eliminate non-saturating moves.
+ *
+ * Translate instructions from our fp_instruction structs to our
+ * internal brw_wm_instruction representation.
+ */
+void brw_wm_pass0( struct brw_wm_compile *c )
+{
+ GLuint insn;
+
+ c->nr_vreg = 0;
+ c->nr_insns = 0;
+
+ pass0_init_undef(c);
+ pass0_init_payload(c);
+
+ for (insn = 0; insn < c->nr_fp_insns; insn++) {
+ const struct brw_fp_instruction *inst = &c->fp_instructions[insn];
+
+ /* Optimize away moves, otherwise emit translated instruction:
+ */
+ switch (inst->opcode) {
+ case TGSI_OPCODE_MOV:
+ if (!inst->dst.saturate) {
+ pass0_precalc_mov(c, inst);
+ }
+ else {
+ translate_insn(c, inst);
+ }
+ break;
+ default:
+ translate_insn(c, inst);
+ break;
+ }
+ }
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "pass0");
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c
new file mode 100644
index 00000000000..005747f00ba
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_pass1.c
@@ -0,0 +1,292 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_wm.h"
+#include "brw_debug.h"
+
+
+static GLuint get_tracked_mask(struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst)
+{
+ GLuint i;
+ for (i = 0; i < 4; i++) {
+ if (inst->writemask & (1<<i)) {
+ if (!inst->dst[i]->contributes_to_output) {
+ inst->writemask &= ~(1<<i);
+ inst->dst[i] = 0;
+ }
+ }
+ }
+
+ return inst->writemask;
+}
+
+/* Remove a reference from a value's usage chain.
+ */
+static void unlink_ref(struct brw_wm_ref *ref)
+{
+ struct brw_wm_value *value = ref->value;
+
+ if (ref == value->lastuse) {
+ value->lastuse = ref->prevuse;
+ }
+ else {
+ struct brw_wm_ref *i = value->lastuse;
+ while (i->prevuse != ref) i = i->prevuse;
+ i->prevuse = ref->prevuse;
+ }
+}
+
+static void track_arg(struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst,
+ GLuint arg,
+ GLuint readmask)
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ struct brw_wm_ref *ref = inst->src[arg][i];
+ if (ref) {
+ if (readmask & (1<<i)) {
+ ref->value->contributes_to_output = 1;
+ }
+ else {
+ unlink_ref(ref);
+ inst->src[arg][i] = NULL;
+ }
+ }
+ }
+}
+
+static GLuint get_texcoord_mask( GLuint tex_idx )
+{
+ switch (tex_idx) {
+ case TGSI_TEXTURE_1D:
+ return BRW_WRITEMASK_X;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ return BRW_WRITEMASK_XY;
+ case TGSI_TEXTURE_3D:
+ return BRW_WRITEMASK_XYZ;
+ case TGSI_TEXTURE_CUBE:
+ return BRW_WRITEMASK_XYZ;
+
+ case TGSI_TEXTURE_SHADOW1D:
+ return BRW_WRITEMASK_XZ;
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ return BRW_WRITEMASK_XYZ;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+
+/* Step two: Basically this is dead code elimination.
+ *
+ * Iterate backwards over instructions, noting which values
+ * contribute to the final result. Adjust writemasks to only
+ * calculate these values.
+ */
+void brw_wm_pass1( struct brw_wm_compile *c )
+{
+ GLint insn;
+
+ for (insn = c->nr_insns-1; insn >= 0; insn--) {
+ struct brw_wm_instruction *inst = &c->instruction[insn];
+ GLuint writemask;
+ GLuint read0, read1, read2;
+
+ if (inst->opcode == TGSI_OPCODE_KIL) {
+ track_arg(c, inst, 0, BRW_WRITEMASK_XYZW); /* All args contribute to final */
+ continue;
+ }
+
+ if (inst->opcode == WM_FB_WRITE) {
+ track_arg(c, inst, 0, BRW_WRITEMASK_XYZW);
+ track_arg(c, inst, 1, BRW_WRITEMASK_XYZW);
+ if (c->key.source_depth_to_render_target &&
+ c->key.computes_depth)
+ track_arg(c, inst, 2, BRW_WRITEMASK_Z);
+ else
+ track_arg(c, inst, 2, 0);
+ continue;
+ }
+
+ /* Lookup all the registers which were written by this
+ * instruction and get a mask of those that contribute to the output:
+ */
+ writemask = get_tracked_mask(c, inst);
+ if (!writemask) {
+ GLuint arg;
+ for (arg = 0; arg < 3; arg++)
+ track_arg(c, inst, arg, 0);
+ continue;
+ }
+
+ read0 = 0;
+ read1 = 0;
+ read2 = 0;
+
+ /* Mark all inputs which contribute to the marked outputs:
+ */
+ switch (inst->opcode) {
+ case TGSI_OPCODE_ABS:
+ case TGSI_OPCODE_FLR:
+ case TGSI_OPCODE_FRC:
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_TRUNC:
+ read0 = writemask;
+ break;
+
+ case TGSI_OPCODE_SUB:
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_MAX:
+ case TGSI_OPCODE_MIN:
+ case TGSI_OPCODE_MUL:
+ read0 = writemask;
+ read1 = writemask;
+ break;
+
+ case TGSI_OPCODE_DDX:
+ case TGSI_OPCODE_DDY:
+ read0 = writemask;
+ break;
+
+ case TGSI_OPCODE_MAD:
+ case TGSI_OPCODE_CMP:
+ case TGSI_OPCODE_LRP:
+ read0 = writemask;
+ read1 = writemask;
+ read2 = writemask;
+ break;
+
+ case TGSI_OPCODE_XPD:
+ if (writemask & BRW_WRITEMASK_X) read0 |= BRW_WRITEMASK_YZ;
+ if (writemask & BRW_WRITEMASK_Y) read0 |= BRW_WRITEMASK_XZ;
+ if (writemask & BRW_WRITEMASK_Z) read0 |= BRW_WRITEMASK_XY;
+ read1 = read0;
+ break;
+
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_SIN:
+ case TGSI_OPCODE_SCS:
+ case WM_CINTERP:
+ case WM_PIXELXY:
+ read0 = BRW_WRITEMASK_X;
+ break;
+
+ case TGSI_OPCODE_POW:
+ read0 = BRW_WRITEMASK_X;
+ read1 = BRW_WRITEMASK_X;
+ break;
+
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXP:
+ read0 = get_texcoord_mask(inst->target);
+ break;
+
+ case TGSI_OPCODE_TXB:
+ read0 = get_texcoord_mask(inst->target) | BRW_WRITEMASK_W;
+ break;
+
+ case WM_WPOSXY:
+ read0 = writemask & BRW_WRITEMASK_XY;
+ break;
+
+ case WM_DELTAXY:
+ read0 = writemask & BRW_WRITEMASK_XY;
+ read1 = BRW_WRITEMASK_X;
+ break;
+
+ case WM_PIXELW:
+ read0 = BRW_WRITEMASK_X;
+ read1 = BRW_WRITEMASK_XY;
+ break;
+
+ case WM_LINTERP:
+ read0 = BRW_WRITEMASK_X;
+ read1 = BRW_WRITEMASK_XY;
+ break;
+
+ case WM_PINTERP:
+ read0 = BRW_WRITEMASK_X; /* interpolant */
+ read1 = BRW_WRITEMASK_XY; /* deltas */
+ read2 = BRW_WRITEMASK_W; /* pixel w */
+ break;
+
+ case TGSI_OPCODE_DP3:
+ read0 = BRW_WRITEMASK_XYZ;
+ read1 = BRW_WRITEMASK_XYZ;
+ break;
+
+ case TGSI_OPCODE_DPH:
+ read0 = BRW_WRITEMASK_XYZ;
+ read1 = BRW_WRITEMASK_XYZW;
+ break;
+
+ case TGSI_OPCODE_DP4:
+ read0 = BRW_WRITEMASK_XYZW;
+ read1 = BRW_WRITEMASK_XYZW;
+ break;
+
+ case TGSI_OPCODE_LIT:
+ read0 = BRW_WRITEMASK_XYW;
+ break;
+
+ case TGSI_OPCODE_DST:
+ case WM_FRONTFACING:
+ case TGSI_OPCODE_KILP:
+ default:
+ break;
+ }
+
+ track_arg(c, inst, 0, read0);
+ track_arg(c, inst, 1, read1);
+ track_arg(c, inst, 2, read2);
+ }
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "pass1");
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c
new file mode 100644
index 00000000000..19248b45195
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_pass2.c
@@ -0,0 +1,334 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "brw_debug.h"
+#include "brw_wm.h"
+
+
+/* Use these to force spilling so that that functionality can be
+ * tested with known-good examples rather than having to construct new
+ * tests.
+ */
+#define TEST_PAYLOAD_SPILLS 0
+#define TEST_DST_SPILLS 0
+
+static void spill_value(struct brw_wm_compile *c,
+ struct brw_wm_value *value);
+
+static void prealloc_reg(struct brw_wm_compile *c,
+ struct brw_wm_value *value,
+ GLuint reg)
+{
+ if (value->lastuse) {
+ /* Set nextuse to zero, it will be corrected by
+ * update_register_usage().
+ */
+ c->pass2_grf[reg].value = value;
+ c->pass2_grf[reg].nextuse = 0;
+
+ value->resident = &c->pass2_grf[reg];
+ value->hw_reg = brw_vec8_grf(reg*2, 0);
+
+ if (TEST_PAYLOAD_SPILLS)
+ spill_value(c, value);
+ }
+}
+
+
+/* Initialize all the register values. Do the initial setup
+ * calculations for interpolants.
+ */
+static void init_registers( struct brw_wm_compile *c )
+{
+ GLuint reg = 0;
+ GLuint j;
+
+ for (j = 0; j < c->grf_limit; j++)
+ c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN;
+
+ /* Pre-allocate incoming payload regs:
+ */
+ for (j = 0; j < c->key.nr_depth_regs; j++)
+ prealloc_reg(c, &c->payload.depth[j], reg++);
+
+ for (j = 0; j < c->nr_creg; j++)
+ prealloc_reg(c, &c->creg[j], reg++);
+
+ reg++; /* XXX: skip over position output */
+
+ /* XXX: currently just hope the VS outputs line up with FS inputs:
+ */
+ for (j = 0; j < c->key.nr_inputs; j++)
+ prealloc_reg(c, &c->payload.input_interp[j], reg++);
+
+ c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+ c->prog_data.urb_read_length = (c->key.nr_inputs + 1) * 2;
+ c->prog_data.curb_read_length = c->nr_creg * 2;
+
+ /* Note this allocation:
+ */
+ c->max_wm_grf = reg * 2;
+}
+
+
+/* Update the nextuse value for each register in our file.
+ */
+static void update_register_usage(struct brw_wm_compile *c,
+ GLuint thisinsn)
+{
+ GLuint i;
+
+ for (i = 1; i < c->grf_limit; i++) {
+ struct brw_wm_grf *grf = &c->pass2_grf[i];
+
+ /* Only search those which can change:
+ */
+ if (grf->nextuse < thisinsn) {
+ const struct brw_wm_ref *ref = grf->value->lastuse;
+
+ /* Has last use of value been passed?
+ */
+ if (ref->insn < thisinsn) {
+ grf->value->resident = 0;
+ grf->value = 0;
+ grf->nextuse = BRW_WM_MAX_INSN;
+ }
+ else {
+ /* Else loop through chain to update:
+ */
+ while (ref->prevuse && ref->prevuse->insn >= thisinsn)
+ ref = ref->prevuse;
+
+ grf->nextuse = ref->insn;
+ }
+ }
+ }
+}
+
+
+static void spill_value(struct brw_wm_compile *c,
+ struct brw_wm_value *value)
+{
+ /* Allocate a spill slot. Note that allocations start from 0x40 -
+ * the first slot is reserved to mean "undef" in brw_wm_emit.c
+ */
+ if (!value->spill_slot) {
+ c->last_scratch += 0x40;
+ value->spill_slot = c->last_scratch;
+ }
+
+ /* The spill will be done in brw_wm_emit.c immediately after the
+ * value is calculated, so we can just take this reg without any
+ * further work.
+ */
+ value->resident->value = NULL;
+ value->resident->nextuse = BRW_WM_MAX_INSN;
+ value->resident = NULL;
+}
+
+
+
+/* Search for contiguous region with the most distant nearest
+ * member. Free regs count as very distant.
+ *
+ * TODO: implement spill-to-reg so that we can rearrange discontigous
+ * free regs and then spill the oldest non-free regs in sequence.
+ * This would mean inserting instructions in this pass.
+ */
+static GLuint search_contiguous_regs(struct brw_wm_compile *c,
+ GLuint nr,
+ GLuint thisinsn)
+{
+ struct brw_wm_grf *grf = c->pass2_grf;
+ GLuint furthest = 0;
+ GLuint reg = 0;
+ GLuint i, j;
+
+ /* Start search at 1: r0 is special and can't be used or spilled.
+ */
+ for (i = 1; i < c->grf_limit && furthest < BRW_WM_MAX_INSN; i++) {
+ GLuint group_nextuse = BRW_WM_MAX_INSN;
+
+ for (j = 0; j < nr; j++) {
+ if (grf[i+j].nextuse < group_nextuse)
+ group_nextuse = grf[i+j].nextuse;
+ }
+
+ if (group_nextuse > furthest) {
+ furthest = group_nextuse;
+ reg = i;
+ }
+ }
+
+ assert(furthest != thisinsn);
+
+ /* Any non-empty regs will need to be spilled:
+ */
+ for (j = 0; j < nr; j++)
+ if (grf[reg+j].value)
+ spill_value(c, grf[reg+j].value);
+
+ return reg;
+}
+
+
+static void alloc_contiguous_dest(struct brw_wm_compile *c,
+ struct brw_wm_value *dst[],
+ GLuint nr,
+ GLuint thisinsn)
+{
+ GLuint reg = search_contiguous_regs(c, nr, thisinsn);
+ GLuint i;
+
+ for (i = 0; i < nr; i++) {
+ if (!dst[i]) {
+ /* Need to grab a dummy value in TEX case. Don't introduce
+ * it into the tracking scheme.
+ */
+ dst[i] = &c->vreg[c->nr_vreg++];
+ }
+ else {
+ assert(!dst[i]->resident);
+ assert(c->pass2_grf[reg+i].nextuse != thisinsn);
+
+ c->pass2_grf[reg+i].value = dst[i];
+ c->pass2_grf[reg+i].nextuse = thisinsn;
+
+ dst[i]->resident = &c->pass2_grf[reg+i];
+ }
+
+ dst[i]->hw_reg = brw_vec8_grf((reg+i)*2, 0);
+ }
+
+ if ((reg+nr)*2 > c->max_wm_grf)
+ c->max_wm_grf = (reg+nr) * 2;
+}
+
+
+static void load_args(struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst)
+{
+ GLuint thisinsn = inst - c->instruction;
+ GLuint i,j;
+
+ for (i = 0; i < 3; i++) {
+ for (j = 0; j < 4; j++) {
+ struct brw_wm_ref *ref = inst->src[i][j];
+
+ if (ref) {
+ if (!ref->value->resident) {
+ /* Need to bring the value in from scratch space. The code for
+ * this will be done in brw_wm_emit.c, here we just do the
+ * register allocation and mark the ref as requiring a fill.
+ */
+ GLuint reg = search_contiguous_regs(c, 1, thisinsn);
+
+ c->pass2_grf[reg].value = ref->value;
+ c->pass2_grf[reg].nextuse = thisinsn;
+
+ ref->value->resident = &c->pass2_grf[reg];
+
+ /* Note that a fill is required:
+ */
+ ref->unspill_reg = reg*2;
+ }
+
+ /* Adjust the hw_reg to point at the value's current location:
+ */
+ assert(ref->value == ref->value->resident->value);
+ ref->hw_reg.nr += (ref->value->resident - c->pass2_grf) * 2;
+ }
+ }
+ }
+}
+
+
+
+/* Step 3: Work forwards once again. Perform register allocations,
+ * taking into account instructions like TEX which require contiguous
+ * result registers. Where necessary spill registers to scratch space
+ * and reload later.
+ */
+void brw_wm_pass2( struct brw_wm_compile *c )
+{
+ GLuint insn;
+ GLuint i;
+
+ init_registers(c);
+
+ for (insn = 0; insn < c->nr_insns; insn++) {
+ struct brw_wm_instruction *inst = &c->instruction[insn];
+
+ /* Update registers' nextuse values:
+ */
+ update_register_usage(c, insn);
+
+ /* May need to unspill some args.
+ */
+ load_args(c, inst);
+
+ /* Allocate registers to hold results:
+ */
+ switch (inst->opcode) {
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXP:
+ alloc_contiguous_dest(c, inst->dst, 4, insn);
+ break;
+
+ default:
+ for (i = 0; i < 4; i++) {
+ if (inst->writemask & (1<<i)) {
+ assert(inst->dst[i]);
+ alloc_contiguous_dest(c, &inst->dst[i], 1, insn);
+ }
+ }
+ break;
+ }
+
+ if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY) {
+ for (i = 0; i < 4; i++)
+ if (inst->dst[i])
+ spill_value(c, inst->dst[i]);
+ }
+ }
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "pass2");
+ }
+
+ c->state = PASS2_DONE;
+
+ if (BRW_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "pass2/done");
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c
new file mode 100644
index 00000000000..a8bc31c9cef
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c
@@ -0,0 +1,229 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+#include "util/u_format.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_screen.h"
+
+
+/* Samplers aren't strictly wm state from the hardware's perspective,
+ * but that is the only situation in which we use them in this driver.
+ */
+
+
+
+static enum pipe_error
+upload_default_color( struct brw_context *brw,
+ const GLfloat *color,
+ struct brw_winsys_buffer **bo_out )
+{
+ struct brw_sampler_default_color sdc;
+ enum pipe_error ret;
+
+ COPY_4V(sdc.color, color);
+
+ ret = brw_cache_data( &brw->cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc,
+ NULL, 0, bo_out );
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+struct wm_sampler_key {
+ int sampler_count;
+ struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT];
+};
+
+
+/** Sets up the cache key for sampler state for all texture units */
+static void
+brw_wm_sampler_populate_key(struct brw_context *brw,
+ struct wm_sampler_key *key)
+{
+ int i;
+
+ memset(key, 0, sizeof(*key));
+
+ key->sampler_count = MIN2(brw->curr.num_textures,
+ brw->curr.num_samplers);
+
+ for (i = 0; i < key->sampler_count; i++) {
+ const struct brw_texture *tex = brw_texture(brw->curr.texture[i]);
+ const struct brw_sampler *sampler = brw->curr.sampler[i];
+ struct brw_sampler_state *entry = &key->sampler[i];
+
+ entry->ss0 = sampler->ss0;
+ entry->ss1 = sampler->ss1;
+ entry->ss2.default_color_pointer = 0; /* reloc */
+ entry->ss3 = sampler->ss3;
+
+ /* Cube-maps on 965 and later must use the same wrap mode for all 3
+ * coordinate dimensions. Futher, only CUBE and CLAMP are valid.
+ */
+ if (tex->base.target == PIPE_TEXTURE_CUBE) {
+ if (FALSE &&
+ (sampler->ss0.min_filter != BRW_MAPFILTER_NEAREST ||
+ sampler->ss0.mag_filter != BRW_MAPFILTER_NEAREST)) {
+ entry->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ entry->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ entry->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ } else {
+ entry->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ entry->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ entry->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ }
+ } else if (tex->base.target == PIPE_TEXTURE_1D) {
+ /* There's a bug in 1D texture sampling - it actually pays
+ * attention to the wrap_t value, though it should not.
+ * Override the wrap_t value here to GL_REPEAT to keep
+ * any nonexistent border pixels from floating in.
+ */
+ entry->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+ }
+ }
+}
+
+
+static enum pipe_error
+brw_wm_sampler_update_default_colors(struct brw_context *brw)
+{
+ enum pipe_error ret;
+ int nr = MIN2(brw->curr.num_textures,
+ brw->curr.num_samplers);
+ int i;
+
+ for (i = 0; i < nr; i++) {
+ const struct brw_texture *tex = brw_texture(brw->curr.texture[i]);
+ const struct brw_sampler *sampler = brw->curr.sampler[i];
+ const float *bc;
+
+ if (util_format_is_depth_or_stencil(tex->base.format)) {
+ float bordercolor[4] = {
+ sampler->border_color[0],
+ sampler->border_color[0],
+ sampler->border_color[0],
+ sampler->border_color[0]
+ };
+
+ bc = bordercolor;
+ }
+ else {
+ bc = sampler->border_color;
+ }
+
+ /* GL specs that border color for depth textures is taken from the
+ * R channel, while the hardware uses A. Spam R into all the
+ * channels for safety.
+ */
+ ret = upload_default_color(brw,
+ bc,
+ &brw->wm.sdc_bo[i]);
+ if (ret)
+ return ret;
+ }
+
+ return PIPE_OK;
+}
+
+
+
+/* All samplers must be uploaded in a single contiguous array.
+ */
+static int upload_wm_samplers( struct brw_context *brw )
+{
+ struct wm_sampler_key key;
+ struct brw_winsys_reloc reloc[BRW_MAX_TEX_UNIT];
+ enum pipe_error ret;
+ int i;
+
+ brw_wm_sampler_update_default_colors(brw);
+ brw_wm_sampler_populate_key(brw, &key);
+
+ if (brw->wm.sampler_count != key.sampler_count) {
+ brw->wm.sampler_count = key.sampler_count;
+ brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
+ }
+
+ if (brw->wm.sampler_count == 0) {
+ bo_reference(&brw->wm.sampler_bo, NULL);
+ return PIPE_OK;
+ }
+
+ /* Emit SDC relocations */
+ for (i = 0; i < key.sampler_count; i++) {
+ make_reloc( &reloc[i],
+ BRW_USAGE_SAMPLER,
+ 0,
+ i * sizeof(struct brw_sampler_state) +
+ offsetof(struct brw_sampler_state, ss2),
+ brw->wm.sdc_bo[i]);
+ }
+
+
+ if (brw_search_cache(&brw->cache, BRW_SAMPLER,
+ &key, sizeof(key),
+ reloc, key.sampler_count,
+ NULL,
+ &brw->wm.sampler_bo))
+ return PIPE_OK;
+
+ /* If we didnt find it in the cache, compute the state and put it in the
+ * cache.
+ */
+ ret = brw_upload_cache(&brw->cache, BRW_SAMPLER,
+ &key, sizeof(key),
+ reloc, key.sampler_count,
+ &key.sampler, sizeof(key.sampler),
+ NULL, NULL,
+ &brw->wm.sampler_bo);
+ if (ret)
+ return ret;
+
+
+ return 0;
+}
+
+const struct brw_tracked_state brw_wm_samplers = {
+ .dirty = {
+ .mesa = PIPE_NEW_BOUND_TEXTURES | PIPE_NEW_SAMPLERS,
+ .brw = 0,
+ .cache = 0
+ },
+ .prepare = upload_wm_samplers,
+};
+
+
diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c
new file mode 100644
index 00000000000..ee970ac75bc
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_state.c
@@ -0,0 +1,339 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_math.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_wm.h"
+#include "brw_debug.h"
+#include "brw_pipe_rast.h"
+
+/***********************************************************************
+ * WM unit - fragment programs and rasterization
+ */
+
+struct brw_wm_unit_key {
+ unsigned int total_grf, total_scratch;
+ unsigned int urb_entry_read_length;
+ unsigned int curb_entry_read_length;
+ unsigned int dispatch_grf_start_reg;
+
+ unsigned int curbe_offset;
+ unsigned int urb_size;
+
+ unsigned int max_threads;
+
+ unsigned int nr_surfaces, sampler_count;
+ GLboolean uses_depth, computes_depth, uses_kill, has_flow_control;
+ GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
+ GLfloat offset_units, offset_factor;
+};
+
+static void
+wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
+{
+ const struct brw_fragment_shader *fp = brw->curr.fragment_shader;
+
+ memset(key, 0, sizeof(*key));
+
+ if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
+ key->max_threads = 1;
+ else {
+ /* WM maximum threads is number of EUs times number of threads per EU. */
+ if (BRW_IS_IGDNG(brw))
+ key->max_threads = 12 * 6;
+ else if (BRW_IS_G4X(brw))
+ key->max_threads = 10 * 5;
+ else
+ key->max_threads = 8 * 4;
+ }
+
+ /* CACHE_NEW_WM_PROG */
+ key->total_grf = brw->wm.prog_data->total_grf;
+ key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
+ key->curb_entry_read_length = brw->wm.prog_data->curb_read_length;
+ key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
+ key->total_scratch = align(brw->wm.prog_data->total_scratch, 1024);
+
+ /* BRW_NEW_URB_FENCE */
+ key->urb_size = brw->urb.vsize;
+
+ /* BRW_NEW_CURBE_OFFSETS */
+ key->curbe_offset = brw->curbe.wm_start;
+
+ /* BRW_NEW_NR_SURFACEs */
+ key->nr_surfaces = brw->wm.nr_surfaces;
+
+ /* CACHE_NEW_SAMPLER */
+ key->sampler_count = brw->wm.sampler_count;
+
+ /* PIPE_NEW_RAST */
+ key->polygon_stipple = brw->curr.rast->templ.poly_stipple_enable;
+
+ /* PIPE_NEW_FRAGMENT_PROGRAM */
+ key->uses_depth = fp->uses_depth;
+ key->computes_depth = fp->info.writes_z;
+
+ /* PIPE_NEW_DEPTH_BUFFER
+ *
+ * Override for NULL depthbuffer case, required by the Pixel Shader Computed
+ * Depth field.
+ */
+ if (brw->curr.fb.zsbuf == NULL)
+ key->computes_depth = 0;
+
+ /* PIPE_NEW_DEPTH_STENCIL_ALPHA */
+ key->uses_kill = (fp->info.uses_kill ||
+ brw->curr.zstencil->cc3.alpha_test);
+
+ key->has_flow_control = fp->has_flow_control;
+
+ /* temporary sanity check assertion */
+ assert(fp->has_flow_control == 0);
+
+ /* PIPE_NEW_QUERY */
+ key->stats_wm = (brw->query.stats_wm != 0);
+
+ /* PIPE_NEW_RAST */
+ key->line_stipple = brw->curr.rast->templ.line_stipple_enable;
+
+
+ key->offset_enable = (brw->curr.rast->templ.offset_cw ||
+ brw->curr.rast->templ.offset_ccw);
+
+ key->offset_units = brw->curr.rast->templ.offset_units;
+ key->offset_factor = brw->curr.rast->templ.offset_scale;
+}
+
+/**
+ * Setup wm hardware state. See page 225 of Volume 2
+ */
+static enum pipe_error
+wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
+ struct brw_winsys_reloc *reloc,
+ unsigned nr_reloc,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_wm_unit_state wm;
+ enum pipe_error ret;
+
+ memset(&wm, 0, sizeof(wm));
+
+ wm.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
+ wm.thread0.kernel_start_pointer = 0; /* reloc */
+ wm.thread1.depth_coef_urb_read_offset = 1;
+ wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+
+ if (BRW_IS_IGDNG(brw))
+ wm.thread1.binding_table_entry_count = 0; /* hardware requirement */
+ else
+ wm.thread1.binding_table_entry_count = key->nr_surfaces;
+
+ if (key->total_scratch != 0) {
+ wm.thread2.scratch_space_base_pointer = 0; /* reloc */
+ wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;
+ } else {
+ wm.thread2.scratch_space_base_pointer = 0;
+ wm.thread2.per_thread_scratch_space = 0;
+ }
+
+ wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg;
+ wm.thread3.urb_entry_read_length = key->urb_entry_read_length;
+ wm.thread3.urb_entry_read_offset = 0;
+ wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+ wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+
+ if (BRW_IS_IGDNG(brw))
+ wm.wm4.sampler_count = 0; /* hardware requirement */
+ else
+ wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
+
+ /* reloc */
+ wm.wm4.sampler_state_pointer = 0;
+
+ wm.wm5.program_uses_depth = key->uses_depth;
+ wm.wm5.program_computes_depth = key->computes_depth;
+ wm.wm5.program_uses_killpixel = key->uses_kill;
+
+ if (key->has_flow_control)
+ wm.wm5.enable_8_pix = 1;
+ else
+ wm.wm5.enable_16_pix = 1;
+
+ wm.wm5.max_threads = key->max_threads - 1;
+ wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */
+ wm.wm5.legacy_line_rast = 0;
+ wm.wm5.legacy_global_depth_bias = 0;
+ wm.wm5.early_depth_test = 1; /* never need to disable */
+ wm.wm5.line_aa_region_width = 0;
+ wm.wm5.line_endcap_aa_region_width = 1;
+
+ wm.wm5.polygon_stipple = key->polygon_stipple;
+
+ if (key->offset_enable) {
+ wm.wm5.depth_offset = 1;
+ /* Something wierd going on with legacy_global_depth_bias,
+ * offset_constant, scaling and MRD. This value passes glean
+ * but gives some odd results elsewere (eg. the
+ * quad-offset-units test).
+ */
+ wm.global_depth_offset_constant = key->offset_units * 2;
+
+ /* This is the only value that passes glean:
+ */
+ wm.global_depth_offset_scale = key->offset_factor;
+ }
+
+ wm.wm5.line_stipple = key->line_stipple;
+
+ if ((BRW_DEBUG & DEBUG_STATS) || key->stats_wm)
+ wm.wm4.stats_enable = 1;
+
+ ret = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
+ key, sizeof(*key),
+ reloc, nr_reloc,
+ &wm, sizeof(wm),
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+static enum pipe_error upload_wm_unit( struct brw_context *brw )
+{
+ struct brw_wm_unit_key key;
+ struct brw_winsys_reloc reloc[3];
+ unsigned nr_reloc = 0;
+ enum pipe_error ret;
+ unsigned grf_reg_count;
+ unsigned per_thread_scratch_space;
+ unsigned stats_enable;
+ unsigned sampler_count;
+
+ wm_unit_populate_key(brw, &key);
+
+
+ /* Allocate the necessary scratch space if we haven't already. Don't
+ * bother reducing the allocation later, since we use scratch so
+ * rarely.
+ */
+ assert(key.total_scratch <= 12 * 1024);
+ if (key.total_scratch) {
+ GLuint total = key.total_scratch * key.max_threads;
+
+ /* Do we need a new buffer:
+ */
+ if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size)
+ bo_reference(&brw->wm.scratch_bo, NULL);
+
+ if (brw->wm.scratch_bo == NULL) {
+ ret = brw->sws->bo_alloc(brw->sws,
+ BRW_BUFFER_TYPE_SHADER_SCRATCH,
+ total,
+ 4096,
+ &brw->wm.scratch_bo);
+ if (ret)
+ return ret;
+ }
+ }
+
+
+ /* XXX: temporary:
+ */
+ grf_reg_count = (align(key.total_grf, 16) / 16 - 1);
+ per_thread_scratch_space = key.total_scratch / 1024 - 1;
+ stats_enable = (BRW_DEBUG & DEBUG_STATS) || key.stats_wm;
+ sampler_count = BRW_IS_IGDNG(brw) ? 0 :(key.sampler_count + 1) / 4;
+
+ /* Emit WM program relocation */
+ make_reloc(&reloc[nr_reloc++],
+ BRW_USAGE_STATE,
+ grf_reg_count << 1,
+ offsetof(struct brw_wm_unit_state, thread0),
+ brw->wm.prog_bo);
+
+ /* Emit scratch space relocation */
+ if (key.total_scratch != 0) {
+ make_reloc(&reloc[nr_reloc++],
+ BRW_USAGE_SCRATCH,
+ per_thread_scratch_space,
+ offsetof(struct brw_wm_unit_state, thread2),
+ brw->wm.scratch_bo);
+ }
+
+ /* Emit sampler state relocation */
+ if (key.sampler_count != 0) {
+ make_reloc(&reloc[nr_reloc++],
+ BRW_USAGE_STATE,
+ stats_enable | (sampler_count << 2),
+ offsetof(struct brw_wm_unit_state, wm4),
+ brw->wm.sampler_bo);
+ }
+
+
+ if (brw_search_cache(&brw->cache, BRW_WM_UNIT,
+ &key, sizeof(key),
+ reloc, nr_reloc,
+ NULL,
+ &brw->wm.state_bo))
+ return PIPE_OK;
+
+ ret = wm_unit_create_from_key(brw, &key,
+ reloc, nr_reloc,
+ &brw->wm.state_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_wm_unit = {
+ .dirty = {
+ .mesa = (PIPE_NEW_FRAGMENT_SHADER |
+ PIPE_NEW_DEPTH_BUFFER |
+ PIPE_NEW_RAST |
+ PIPE_NEW_DEPTH_STENCIL_ALPHA |
+ PIPE_NEW_QUERY),
+
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_NR_WM_SURFACES),
+
+ .cache = (CACHE_NEW_WM_PROG |
+ CACHE_NEW_SAMPLER)
+ },
+ .prepare = upload_wm_unit,
+};
+
diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c
new file mode 100644
index 00000000000..f92b8198ed3
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_surface_state.c
@@ -0,0 +1,294 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_format.h"
+
+#include "brw_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_screen.h"
+
+
+
+
+static enum pipe_error
+brw_update_texture_surface( struct brw_context *brw,
+ struct brw_texture *tex,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_winsys_reloc reloc[1];
+ enum pipe_error ret;
+
+ /* Emit relocation to surface contents */
+ make_reloc(&reloc[0],
+ BRW_USAGE_SAMPLER,
+ 0,
+ offsetof(struct brw_surface_state, ss1),
+ tex->bo);
+
+ if (brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &tex->ss, sizeof tex->ss,
+ reloc, Elements(reloc),
+ NULL,
+ bo_out))
+ return PIPE_OK;
+
+ ret = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
+ &tex->ss, sizeof tex->ss,
+ reloc, Elements(reloc),
+ &tex->ss, sizeof tex->ss,
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+
+
+
+
+
+
+/**
+ * Sets up a surface state structure to point at the given region.
+ * While it is only used for the front/back buffer currently, it should be
+ * usable for further buffers when doing ARB_draw_buffer support.
+ */
+static enum pipe_error
+brw_update_render_surface(struct brw_context *brw,
+ struct brw_surface *surface,
+ struct brw_winsys_buffer **bo_out)
+{
+ struct brw_surf_ss0 blend_ss0 = brw->curr.blend->ss0;
+ struct brw_surface_state ss;
+ struct brw_winsys_reloc reloc[1];
+ enum pipe_error ret;
+
+ /* XXX: we will only be rendering to this surface:
+ */
+ make_reloc(&reloc[0],
+ BRW_USAGE_RENDER_TARGET,
+ 0,
+ offsetof(struct brw_surface_state, ss1),
+ surface->bo);
+
+ /* Surfaces are potentially shared between contexts, so can't
+ * scribble the in-place ss0 value in the surface.
+ */
+ memcpy(&ss, &surface->ss, sizeof ss);
+
+ ss.ss0.color_blend = blend_ss0.color_blend;
+ ss.ss0.writedisable_blue = blend_ss0.writedisable_blue;
+ ss.ss0.writedisable_green = blend_ss0.writedisable_green;
+ ss.ss0.writedisable_red = blend_ss0.writedisable_red;
+ ss.ss0.writedisable_alpha = blend_ss0.writedisable_alpha;
+
+ if (brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &ss, sizeof(ss),
+ reloc, Elements(reloc),
+ NULL,
+ bo_out))
+ return PIPE_OK;
+
+ ret = brw_upload_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &ss, sizeof ss,
+ reloc, Elements(reloc),
+ &ss, sizeof ss,
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+
+/**
+ * Constructs the binding table for the WM surface state, which maps unit
+ * numbers to surface state objects.
+ */
+static enum pipe_error
+brw_wm_get_binding_table(struct brw_context *brw,
+ struct brw_winsys_buffer **bo_out )
+{
+ enum pipe_error ret;
+ struct brw_winsys_reloc reloc[BRW_WM_MAX_SURF];
+ uint32_t data[BRW_WM_MAX_SURF];
+ GLuint nr_relocs = 0;
+ GLuint data_size = brw->wm.nr_surfaces * sizeof data[0];
+ int i;
+
+ assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
+ assert(brw->wm.nr_surfaces > 0);
+
+ /* Emit binding table relocations to surface state
+ */
+ for (i = 0; i < brw->wm.nr_surfaces; i++) {
+ if (brw->wm.surf_bo[i]) {
+ make_reloc(&reloc[nr_relocs++],
+ BRW_USAGE_STATE,
+ 0,
+ i * sizeof(GLuint),
+ brw->wm.surf_bo[i]);
+ }
+ }
+
+ /* Note there is no key for this search beyond the values in the
+ * relocation array:
+ */
+ if (brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
+ NULL, 0,
+ reloc, nr_relocs,
+ NULL,
+ bo_out))
+ return PIPE_OK;
+
+ /* Upload zero data, will all be overwitten with relocation
+ * offsets:
+ */
+ for (i = 0; i < brw->wm.nr_surfaces; i++)
+ data[i] = 0;
+
+ ret = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
+ NULL, 0,
+ reloc, nr_relocs,
+ data, data_size,
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+static enum pipe_error prepare_wm_surfaces(struct brw_context *brw )
+{
+ enum pipe_error ret;
+ int nr_surfaces = 0;
+ GLuint i;
+
+ /* PIPE_NEW_COLOR_BUFFERS | PIPE_NEW_BLEND
+ *
+ * Update surfaces for drawing buffers. Mixes in colormask and
+ * blend state.
+ *
+ * XXX: no color buffer case
+ */
+ for (i = 0; i < brw->curr.fb.nr_cbufs; i++) {
+ ret = brw_update_render_surface(brw,
+ brw_surface(brw->curr.fb.cbufs[i]),
+ &brw->wm.surf_bo[BTI_COLOR_BUF(i)]);
+ if (ret)
+ return ret;
+
+ nr_surfaces = BTI_COLOR_BUF(i) + 1;
+ }
+
+
+
+ /* PIPE_NEW_FRAGMENT_CONSTANTS
+ */
+#if 0
+ if (brw->curr.fragment_constants) {
+ ret = brw_update_fragment_constant_surface(
+ brw,
+ brw->curr.fragment_constants,
+ &brw->wm.surf_bo[BTI_FRAGMENT_CONSTANTS]);
+
+ if (ret)
+ return ret;
+
+ nr_surfaces = BTI_FRAGMENT_CONSTANTS + 1;
+ }
+ else {
+ bo_reference(&brw->wm.surf_bo[SURF_FRAG_CONSTANTS], NULL);
+ }
+#endif
+
+
+ /* PIPE_NEW_TEXTURE
+ */
+ for (i = 0; i < brw->curr.num_textures; i++) {
+ ret = brw_update_texture_surface(brw,
+ brw_texture(brw->curr.texture[i]),
+ &brw->wm.surf_bo[BTI_TEXTURE(i)]);
+ if (ret)
+ return ret;
+
+ nr_surfaces = BTI_TEXTURE(i) + 1;
+ }
+
+ /* Clear any inactive entries:
+ */
+ for (i = brw->curr.fb.nr_cbufs; i < BRW_MAX_DRAW_BUFFERS; i++)
+ bo_reference(&brw->wm.surf_bo[BTI_COLOR_BUF(i)], NULL);
+
+ if (!brw->curr.fragment_constants)
+ bo_reference(&brw->wm.surf_bo[BTI_FRAGMENT_CONSTANTS], NULL);
+
+ /* XXX: no pipe_max_textures define?? */
+ for (i = brw->curr.num_textures; i < PIPE_MAX_SAMPLERS; i++)
+ bo_reference(&brw->wm.surf_bo[BTI_TEXTURE(i)], NULL);
+
+ if (brw->wm.nr_surfaces != nr_surfaces) {
+ brw->wm.nr_surfaces = nr_surfaces;
+ brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
+ }
+
+ ret = brw_wm_get_binding_table(brw, &brw->wm.bind_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
+}
+
+const struct brw_tracked_state brw_wm_surfaces = {
+ .dirty = {
+ .mesa = (PIPE_NEW_COLOR_BUFFERS |
+ PIPE_NEW_BOUND_TEXTURES |
+ PIPE_NEW_FRAGMENT_CONSTANTS |
+ PIPE_NEW_BLEND),
+ .brw = (BRW_NEW_CONTEXT |
+ BRW_NEW_WM_SURFACES),
+ .cache = 0
+ },
+ .prepare = prepare_wm_surfaces,
+};
+
+
+
diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c
new file mode 100644
index 00000000000..3166958bad3
--- /dev/null
+++ b/src/gallium/drivers/i965/intel_decode.c
@@ -0,0 +1,1790 @@
+/* -*- c-basic-offset: 4 -*- */
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file intel_decode.c
+ * This file contains code to print out batchbuffer contents in a
+ * human-readable format.
+ *
+ * The current version only supports i915 packets, and only pretty-prints a
+ * subset of them. The intention is for it to make just a best attempt to
+ * decode, but never crash in the process.
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include "intel_decode.h"
+
+/*#include "intel_chipset.h"*/
+#define IS_965(x) 1 /* XXX */
+#define IS_9XX(x) 1 /* XXX */
+
+#define BUFFER_FAIL(_count, _len, _name) do { \
+ fprintf(out, "Buffer size too small in %s (%d < %d)\n", \
+ (_name), (_count), (_len)); \
+ (*failures)++; \
+ return count; \
+} while (0)
+
+static FILE *out;
+static uint32_t saved_s2 = 0, saved_s4 = 0;
+static char saved_s2_set = 0, saved_s4_set = 0;
+
+static float
+int_as_float(uint32_t intval)
+{
+ union intfloat {
+ uint32_t i;
+ float f;
+ } uval;
+
+ uval.i = intval;
+ return uval.f;
+}
+
+static void
+instr_out(const uint32_t *data, uint32_t hw_offset, unsigned int index,
+ char *fmt, ...)
+{
+ va_list va;
+
+ fprintf(out, "0x%08x: 0x%08x:%s ", hw_offset + index * 4, data[index],
+ index == 0 ? "" : " ");
+ va_start(va, fmt);
+ vfprintf(out, fmt, va);
+ va_end(va);
+}
+
+
+static int
+decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+ unsigned int opcode;
+
+ struct {
+ uint32_t opcode;
+ int len_mask;
+ int min_len;
+ int max_len;
+ char *name;
+ } opcodes_mi[] = {
+ { 0x08, 0, 1, 1, "MI_ARB_ON_OFF" },
+ { 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" },
+ { 0x31, 0x3f, 2, 2, "MI_BATCH_BUFFER_START" },
+ { 0x14, 0x3f, 3, 3, "MI_DISPLAY_BUFFER_INFO" },
+ { 0x04, 0, 1, 1, "MI_FLUSH" },
+ { 0x22, 0, 3, 3, "MI_LOAD_REGISTER_IMM" },
+ { 0x13, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" },
+ { 0x12, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_INCL" },
+ { 0x00, 0, 1, 1, "MI_NOOP" },
+ { 0x11, 0x3f, 2, 2, "MI_OVERLAY_FLIP" },
+ { 0x07, 0, 1, 1, "MI_REPORT_HEAD" },
+ { 0x18, 0x3f, 2, 2, "MI_SET_CONTEXT" },
+ { 0x20, 0x3f, 3, 4, "MI_STORE_DATA_IMM" },
+ { 0x21, 0x3f, 3, 4, "MI_STORE_DATA_INDEX" },
+ { 0x24, 0x3f, 3, 3, "MI_STORE_REGISTER_MEM" },
+ { 0x02, 0, 1, 1, "MI_USER_INTERRUPT" },
+ { 0x03, 0, 1, 1, "MI_WAIT_FOR_EVENT" },
+ };
+
+
+ for (opcode = 0; opcode < sizeof(opcodes_mi) / sizeof(opcodes_mi[0]);
+ opcode++) {
+ if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) {
+ unsigned int len = 1, i;
+
+ instr_out(data, hw_offset, 0, "%s\n", opcodes_mi[opcode].name);
+ if (opcodes_mi[opcode].max_len > 1) {
+ len = (data[0] & opcodes_mi[opcode].len_mask) + 2;
+ if (len < opcodes_mi[opcode].min_len ||
+ len > opcodes_mi[opcode].max_len)
+ {
+ fprintf(out, "Bad length (%d) in %s, [%d, %d]\n",
+ len, opcodes_mi[opcode].name,
+ opcodes_mi[opcode].min_len,
+ opcodes_mi[opcode].max_len);
+ }
+ }
+
+ for (i = 1; i < len; i++) {
+ if (i >= count)
+ BUFFER_FAIL(count, len, opcodes_mi[opcode].name);
+ instr_out(data, hw_offset, i, "dword %d\n", i);
+ }
+
+ return len;
+ }
+ }
+
+ instr_out(data, hw_offset, 0, "MI UNKNOWN\n");
+ (*failures)++;
+ return 1;
+}
+
+static int
+decode_2d(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+ unsigned int opcode, len;
+ char *format = NULL;
+
+ struct {
+ uint32_t opcode;
+ int min_len;
+ int max_len;
+ char *name;
+ } opcodes_2d[] = {
+ { 0x40, 5, 5, "COLOR_BLT" },
+ { 0x43, 6, 6, "SRC_COPY_BLT" },
+ { 0x01, 8, 8, "XY_SETUP_BLT" },
+ { 0x11, 9, 9, "XY_SETUP_MONO_PATTERN_SL_BLT" },
+ { 0x03, 3, 3, "XY_SETUP_CLIP_BLT" },
+ { 0x24, 2, 2, "XY_PIXEL_BLT" },
+ { 0x25, 3, 3, "XY_SCANLINES_BLT" },
+ { 0x26, 4, 4, "Y_TEXT_BLT" },
+ { 0x31, 5, 134, "XY_TEXT_IMMEDIATE_BLT" },
+ { 0x50, 6, 6, "XY_COLOR_BLT" },
+ { 0x51, 6, 6, "XY_PAT_BLT" },
+ { 0x76, 8, 8, "XY_PAT_CHROMA_BLT" },
+ { 0x72, 7, 135, "XY_PAT_BLT_IMMEDIATE" },
+ { 0x77, 9, 137, "XY_PAT_CHROMA_BLT_IMMEDIATE" },
+ { 0x52, 9, 9, "XY_MONO_PAT_BLT" },
+ { 0x59, 7, 7, "XY_MONO_PAT_FIXED_BLT" },
+ { 0x53, 8, 8, "XY_SRC_COPY_BLT" },
+ { 0x54, 8, 8, "XY_MONO_SRC_COPY_BLT" },
+ { 0x71, 9, 137, "XY_MONO_SRC_COPY_IMMEDIATE_BLT" },
+ { 0x55, 9, 9, "XY_FULL_BLT" },
+ { 0x55, 9, 137, "XY_FULL_IMMEDIATE_PATTERN_BLT" },
+ { 0x56, 9, 9, "XY_FULL_MONO_SRC_BLT" },
+ { 0x75, 10, 138, "XY_FULL_MONO_SRC_IMMEDIATE_PATTERN_BLT" },
+ { 0x57, 12, 12, "XY_FULL_MONO_PATTERN_BLT" },
+ { 0x58, 12, 12, "XY_FULL_MONO_PATTERN_MONO_SRC_BLT" },
+ };
+
+ switch ((data[0] & 0x1fc00000) >> 22) {
+ case 0x50:
+ instr_out(data, hw_offset, 0,
+ "XY_COLOR_BLT (rgb %sabled, alpha %sabled, dst tile %d)\n",
+ (data[0] & (1 << 20)) ? "en" : "dis",
+ (data[0] & (1 << 21)) ? "en" : "dis",
+ (data[0] >> 11) & 1);
+
+ len = (data[0] & 0x000000ff) + 2;
+ if (len != 6)
+ fprintf(out, "Bad count in XY_COLOR_BLT\n");
+ if (count < 6)
+ BUFFER_FAIL(count, len, "XY_COLOR_BLT");
+
+ switch ((data[1] >> 24) & 0x3) {
+ case 0:
+ format="8";
+ break;
+ case 1:
+ format="565";
+ break;
+ case 2:
+ format="1555";
+ break;
+ case 3:
+ format="8888";
+ break;
+ }
+
+ instr_out(data, hw_offset, 1, "format %s, pitch %d, "
+ "clipping %sabled\n", format,
+ (short)(data[1] & 0xffff),
+ data[1] & (1 << 30) ? "en" : "dis");
+ instr_out(data, hw_offset, 2, "(%d,%d)\n",
+ data[2] & 0xffff, data[2] >> 16);
+ instr_out(data, hw_offset, 3, "(%d,%d)\n",
+ data[3] & 0xffff, data[3] >> 16);
+ instr_out(data, hw_offset, 4, "offset 0x%08x\n", data[4]);
+ instr_out(data, hw_offset, 5, "color\n");
+ return len;
+ case 0x53:
+ instr_out(data, hw_offset, 0,
+ "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled, "
+ "src tile %d, dst tile %d)\n",
+ (data[0] & (1 << 20)) ? "en" : "dis",
+ (data[0] & (1 << 21)) ? "en" : "dis",
+ (data[0] >> 15) & 1,
+ (data[0] >> 11) & 1);
+
+ len = (data[0] & 0x000000ff) + 2;
+ if (len != 8)
+ fprintf(out, "Bad count in XY_SRC_COPY_BLT\n");
+ if (count < 8)
+ BUFFER_FAIL(count, len, "XY_SRC_COPY_BLT");
+
+ switch ((data[1] >> 24) & 0x3) {
+ case 0:
+ format="8";
+ break;
+ case 1:
+ format="565";
+ break;
+ case 2:
+ format="1555";
+ break;
+ case 3:
+ format="8888";
+ break;
+ }
+
+ instr_out(data, hw_offset, 1, "format %s, dst pitch %d, "
+ "clipping %sabled\n", format,
+ (short)(data[1] & 0xffff),
+ data[1] & (1 << 30) ? "en" : "dis");
+ instr_out(data, hw_offset, 2, "dst (%d,%d)\n",
+ data[2] & 0xffff, data[2] >> 16);
+ instr_out(data, hw_offset, 3, "dst (%d,%d)\n",
+ data[3] & 0xffff, data[3] >> 16);
+ instr_out(data, hw_offset, 4, "dst offset 0x%08x\n", data[4]);
+ instr_out(data, hw_offset, 5, "src (%d,%d)\n",
+ data[5] & 0xffff, data[5] >> 16);
+ instr_out(data, hw_offset, 6, "src pitch %d\n",
+ (short)(data[6] & 0xffff));
+ instr_out(data, hw_offset, 7, "src offset 0x%08x\n", data[7]);
+ return len;
+ }
+
+ for (opcode = 0; opcode < sizeof(opcodes_2d) / sizeof(opcodes_2d[0]);
+ opcode++) {
+ if ((data[0] & 0x1fc00000) >> 22 == opcodes_2d[opcode].opcode) {
+ unsigned int i;
+
+ len = 1;
+ instr_out(data, hw_offset, 0, "%s\n", opcodes_2d[opcode].name);
+ if (opcodes_2d[opcode].max_len > 1) {
+ len = (data[0] & 0x000000ff) + 2;
+ if (len < opcodes_2d[opcode].min_len ||
+ len > opcodes_2d[opcode].max_len)
+ {
+ fprintf(out, "Bad count in %s\n", opcodes_2d[opcode].name);
+ }
+ }
+
+ for (i = 1; i < len; i++) {
+ if (i >= count)
+ BUFFER_FAIL(count, len, opcodes_2d[opcode].name);
+ instr_out(data, hw_offset, i, "dword %d\n", i);
+ }
+
+ return len;
+ }
+ }
+
+ instr_out(data, hw_offset, 0, "2D UNKNOWN\n");
+ (*failures)++;
+ return 1;
+}
+
+static int
+decode_3d_1c(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+ switch ((data[0] & 0x00f80000) >> 19) {
+ case 0x11:
+ instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISALBE\n");
+ return 1;
+ case 0x10:
+ instr_out(data, hw_offset, 0, "3DSTATE_SCISSOR_ENABLE\n");
+ return 1;
+ case 0x01:
+ instr_out(data, hw_offset, 0, "3DSTATE_MAP_COORD_SET_I830\n");
+ return 1;
+ case 0x0a:
+ instr_out(data, hw_offset, 0, "3DSTATE_MAP_CUBE_I830\n");
+ return 1;
+ case 0x05:
+ instr_out(data, hw_offset, 0, "3DSTATE_MAP_TEX_STREAM_I830\n");
+ return 1;
+ }
+
+ instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ (*failures)++;
+ return 1;
+}
+
+/** Sets the string dstname to describe the destination of the PS instruction */
+static void
+i915_get_instruction_dst(const uint32_t *data, int i, char *dstname, int do_mask)
+{
+ uint32_t a0 = data[i];
+ int dst_nr = (a0 >> 14) & 0xf;
+ char dstmask[8];
+ char *sat;
+
+ if (do_mask) {
+ if (((a0 >> 10) & 0xf) == 0xf) {
+ dstmask[0] = 0;
+ } else {
+ int dstmask_index = 0;
+
+ dstmask[dstmask_index++] = '.';
+ if (a0 & (1 << 10))
+ dstmask[dstmask_index++] = 'x';
+ if (a0 & (1 << 11))
+ dstmask[dstmask_index++] = 'y';
+ if (a0 & (1 << 12))
+ dstmask[dstmask_index++] = 'z';
+ if (a0 & (1 << 13))
+ dstmask[dstmask_index++] = 'w';
+ dstmask[dstmask_index++] = 0;
+ }
+
+ if (a0 & (1 << 22))
+ sat = ".sat";
+ else
+ sat = "";
+ } else {
+ dstmask[0] = 0;
+ sat = "";
+ }
+
+ switch ((a0 >> 19) & 0x7) {
+ case 0:
+ if (dst_nr > 15)
+ fprintf(out, "bad destination reg R%d\n", dst_nr);
+ sprintf(dstname, "R%d%s%s", dst_nr, dstmask, sat);
+ break;
+ case 4:
+ if (dst_nr > 0)
+ fprintf(out, "bad destination reg oC%d\n", dst_nr);
+ sprintf(dstname, "oC%s%s", dstmask, sat);
+ break;
+ case 5:
+ if (dst_nr > 0)
+ fprintf(out, "bad destination reg oD%d\n", dst_nr);
+ sprintf(dstname, "oD%s%s", dstmask, sat);
+ break;
+ case 6:
+ if (dst_nr > 2)
+ fprintf(out, "bad destination reg U%d\n", dst_nr);
+ sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat);
+ break;
+ default:
+ sprintf(dstname, "RESERVED");
+ break;
+ }
+}
+
+static char *
+i915_get_channel_swizzle(uint32_t select)
+{
+ switch (select & 0x7) {
+ case 0:
+ return (select & 8) ? "-x" : "x";
+ case 1:
+ return (select & 8) ? "-y" : "y";
+ case 2:
+ return (select & 8) ? "-z" : "z";
+ case 3:
+ return (select & 8) ? "-w" : "w";
+ case 4:
+ return (select & 8) ? "-0" : "0";
+ case 5:
+ return (select & 8) ? "-1" : "1";
+ default:
+ return (select & 8) ? "-bad" : "bad";
+ }
+}
+
+static void
+i915_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name)
+{
+ switch (src_type) {
+ case 0:
+ sprintf(name, "R%d", src_nr);
+ if (src_nr > 15)
+ fprintf(out, "bad src reg %s\n", name);
+ break;
+ case 1:
+ if (src_nr < 8)
+ sprintf(name, "T%d", src_nr);
+ else if (src_nr == 8)
+ sprintf(name, "DIFFUSE");
+ else if (src_nr == 9)
+ sprintf(name, "SPECULAR");
+ else if (src_nr == 10)
+ sprintf(name, "FOG");
+ else {
+ fprintf(out, "bad src reg T%d\n", src_nr);
+ sprintf(name, "RESERVED");
+ }
+ break;
+ case 2:
+ sprintf(name, "C%d", src_nr);
+ if (src_nr > 31)
+ fprintf(out, "bad src reg %s\n", name);
+ break;
+ case 4:
+ sprintf(name, "oC");
+ if (src_nr > 0)
+ fprintf(out, "bad src reg oC%d\n", src_nr);
+ break;
+ case 5:
+ sprintf(name, "oD");
+ if (src_nr > 0)
+ fprintf(out, "bad src reg oD%d\n", src_nr);
+ break;
+ case 6:
+ sprintf(name, "U%d", src_nr);
+ if (src_nr > 2)
+ fprintf(out, "bad src reg %s\n", name);
+ break;
+ default:
+ fprintf(out, "bad src reg type %d\n", src_type);
+ sprintf(name, "RESERVED");
+ break;
+ }
+}
+
+static void
+i915_get_instruction_src0(const uint32_t *data, int i, char *srcname)
+{
+ uint32_t a0 = data[i];
+ uint32_t a1 = data[i + 1];
+ int src_nr = (a0 >> 2) & 0x1f;
+ char *swizzle_x = i915_get_channel_swizzle((a1 >> 28) & 0xf);
+ char *swizzle_y = i915_get_channel_swizzle((a1 >> 24) & 0xf);
+ char *swizzle_z = i915_get_channel_swizzle((a1 >> 20) & 0xf);
+ char *swizzle_w = i915_get_channel_swizzle((a1 >> 16) & 0xf);
+ char swizzle[100];
+
+ i915_get_instruction_src_name((a0 >> 7) & 0x7, src_nr, srcname);
+ sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+ if (strcmp(swizzle, ".xyzw") != 0)
+ strcat(srcname, swizzle);
+}
+
+static void
+i915_get_instruction_src1(const uint32_t *data, int i, char *srcname)
+{
+ uint32_t a1 = data[i + 1];
+ uint32_t a2 = data[i + 2];
+ int src_nr = (a1 >> 8) & 0x1f;
+ char *swizzle_x = i915_get_channel_swizzle((a1 >> 4) & 0xf);
+ char *swizzle_y = i915_get_channel_swizzle((a1 >> 0) & 0xf);
+ char *swizzle_z = i915_get_channel_swizzle((a2 >> 28) & 0xf);
+ char *swizzle_w = i915_get_channel_swizzle((a2 >> 24) & 0xf);
+ char swizzle[100];
+
+ i915_get_instruction_src_name((a1 >> 13) & 0x7, src_nr, srcname);
+ sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+ if (strcmp(swizzle, ".xyzw") != 0)
+ strcat(srcname, swizzle);
+}
+
+static void
+i915_get_instruction_src2(const uint32_t *data, int i, char *srcname)
+{
+ uint32_t a2 = data[i + 2];
+ int src_nr = (a2 >> 16) & 0x1f;
+ char *swizzle_x = i915_get_channel_swizzle((a2 >> 12) & 0xf);
+ char *swizzle_y = i915_get_channel_swizzle((a2 >> 8) & 0xf);
+ char *swizzle_z = i915_get_channel_swizzle((a2 >> 4) & 0xf);
+ char *swizzle_w = i915_get_channel_swizzle((a2 >> 0) & 0xf);
+ char swizzle[100];
+
+ i915_get_instruction_src_name((a2 >> 21) & 0x7, src_nr, srcname);
+ sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+ if (strcmp(swizzle, ".xyzw") != 0)
+ strcat(srcname, swizzle);
+}
+
+static void
+i915_get_instruction_addr(uint32_t src_type, uint32_t src_nr, char *name)
+{
+ switch (src_type) {
+ case 0:
+ sprintf(name, "R%d", src_nr);
+ if (src_nr > 15)
+ fprintf(out, "bad src reg %s\n", name);
+ break;
+ case 1:
+ if (src_nr < 8)
+ sprintf(name, "T%d", src_nr);
+ else if (src_nr == 8)
+ sprintf(name, "DIFFUSE");
+ else if (src_nr == 9)
+ sprintf(name, "SPECULAR");
+ else if (src_nr == 10)
+ sprintf(name, "FOG");
+ else {
+ fprintf(out, "bad src reg T%d\n", src_nr);
+ sprintf(name, "RESERVED");
+ }
+ break;
+ case 4:
+ sprintf(name, "oC");
+ if (src_nr > 0)
+ fprintf(out, "bad src reg oC%d\n", src_nr);
+ break;
+ case 5:
+ sprintf(name, "oD");
+ if (src_nr > 0)
+ fprintf(out, "bad src reg oD%d\n", src_nr);
+ break;
+ default:
+ fprintf(out, "bad src reg type %d\n", src_type);
+ sprintf(name, "RESERVED");
+ break;
+ }
+}
+
+static void
+i915_decode_alu1(const uint32_t *data, uint32_t hw_offset,
+ int i, char *instr_prefix, char *op_name)
+{
+ char dst[100], src0[100];
+
+ i915_get_instruction_dst(data, i, dst, 1);
+ i915_get_instruction_src0(data, i, src0);
+
+ instr_out(data, hw_offset, i++, "%s: %s %s, %s\n", instr_prefix,
+ op_name, dst, src0);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_alu2(const uint32_t *data, uint32_t hw_offset,
+ int i, char *instr_prefix, char *op_name)
+{
+ char dst[100], src0[100], src1[100];
+
+ i915_get_instruction_dst(data, i, dst, 1);
+ i915_get_instruction_src0(data, i, src0);
+ i915_get_instruction_src1(data, i, src1);
+
+ instr_out(data, hw_offset, i++, "%s: %s %s, %s, %s\n", instr_prefix,
+ op_name, dst, src0, src1);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_alu3(const uint32_t *data, uint32_t hw_offset,
+ int i, char *instr_prefix, char *op_name)
+{
+ char dst[100], src0[100], src1[100], src2[100];
+
+ i915_get_instruction_dst(data, i, dst, 1);
+ i915_get_instruction_src0(data, i, src0);
+ i915_get_instruction_src1(data, i, src1);
+ i915_get_instruction_src2(data, i, src2);
+
+ instr_out(data, hw_offset, i++, "%s: %s %s, %s, %s, %s\n", instr_prefix,
+ op_name, dst, src0, src1, src2);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_tex(const uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix,
+ char *tex_name)
+{
+ uint32_t t0 = data[i];
+ uint32_t t1 = data[i + 1];
+ char dst_name[100];
+ char addr_name[100];
+ int sampler_nr;
+
+ i915_get_instruction_dst(data, i, dst_name, 0);
+ i915_get_instruction_addr((t1 >> 24) & 0x7,
+ (t1 >> 17) & 0xf,
+ addr_name);
+ sampler_nr = t0 & 0xf;
+
+ instr_out(data, hw_offset, i++, "%s: %s %s, S%d, %s\n", instr_prefix,
+ tex_name, dst_name, sampler_nr, addr_name);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_dcl(const uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix)
+{
+ uint32_t d0 = data[i];
+ char *sampletype;
+ int dcl_nr = (d0 >> 14) & 0xf;
+ char *dcl_x = d0 & (1 << 10) ? "x" : "";
+ char *dcl_y = d0 & (1 << 11) ? "y" : "";
+ char *dcl_z = d0 & (1 << 12) ? "z" : "";
+ char *dcl_w = d0 & (1 << 13) ? "w" : "";
+ char dcl_mask[10];
+
+ switch ((d0 >> 19) & 0x3) {
+ case 1:
+ sprintf(dcl_mask, ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w);
+ if (strcmp(dcl_mask, ".") == 0)
+ fprintf(out, "bad (empty) dcl mask\n");
+
+ if (dcl_nr > 10)
+ fprintf(out, "bad T%d dcl register number\n", dcl_nr);
+ if (dcl_nr < 8) {
+ if (strcmp(dcl_mask, ".x") != 0 &&
+ strcmp(dcl_mask, ".xy") != 0 &&
+ strcmp(dcl_mask, ".xz") != 0 &&
+ strcmp(dcl_mask, ".w") != 0 &&
+ strcmp(dcl_mask, ".xyzw") != 0) {
+ fprintf(out, "bad T%d.%s dcl mask\n", dcl_nr, dcl_mask);
+ }
+ instr_out(data, hw_offset, i++, "%s: DCL T%d%s\n", instr_prefix,
+ dcl_nr, dcl_mask);
+ } else {
+ if (strcmp(dcl_mask, ".xz") == 0)
+ fprintf(out, "errataed bad dcl mask %s\n", dcl_mask);
+ else if (strcmp(dcl_mask, ".xw") == 0)
+ fprintf(out, "errataed bad dcl mask %s\n", dcl_mask);
+ else if (strcmp(dcl_mask, ".xzw") == 0)
+ fprintf(out, "errataed bad dcl mask %s\n", dcl_mask);
+
+ if (dcl_nr == 8) {
+ instr_out(data, hw_offset, i++, "%s: DCL DIFFUSE%s\n", instr_prefix,
+ dcl_mask);
+ } else if (dcl_nr == 9) {
+ instr_out(data, hw_offset, i++, "%s: DCL SPECULAR%s\n", instr_prefix,
+ dcl_mask);
+ } else if (dcl_nr == 10) {
+ instr_out(data, hw_offset, i++, "%s: DCL FOG%s\n", instr_prefix,
+ dcl_mask);
+ }
+ }
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ break;
+ case 3:
+ switch ((d0 >> 22) & 0x3) {
+ case 0:
+ sampletype = "2D";
+ break;
+ case 1:
+ sampletype = "CUBE";
+ break;
+ case 2:
+ sampletype = "3D";
+ break;
+ default:
+ sampletype = "RESERVED";
+ break;
+ }
+ if (dcl_nr > 15)
+ fprintf(out, "bad S%d dcl register number\n", dcl_nr);
+ instr_out(data, hw_offset, i++, "%s: DCL S%d %s\n", instr_prefix,
+ dcl_nr, sampletype);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ break;
+ default:
+ instr_out(data, hw_offset, i++, "%s: DCL RESERVED%d\n", instr_prefix, dcl_nr);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ }
+}
+
+static void
+i915_decode_instruction(const uint32_t *data, uint32_t hw_offset,
+ int i, char *instr_prefix)
+{
+ switch ((data[i] >> 24) & 0x1f) {
+ case 0x0:
+ instr_out(data, hw_offset, i++, "%s: NOP\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ break;
+ case 0x01:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "ADD");
+ break;
+ case 0x02:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "MOV");
+ break;
+ case 0x03:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "MUL");
+ break;
+ case 0x04:
+ i915_decode_alu3(data, hw_offset, i, instr_prefix, "MAD");
+ break;
+ case 0x05:
+ i915_decode_alu3(data, hw_offset, i, instr_prefix, "DP2ADD");
+ break;
+ case 0x06:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "DP3");
+ break;
+ case 0x07:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "DP4");
+ break;
+ case 0x08:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "FRC");
+ break;
+ case 0x09:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "RCP");
+ break;
+ case 0x0a:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "RSQ");
+ break;
+ case 0x0b:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "EXP");
+ break;
+ case 0x0c:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "LOG");
+ break;
+ case 0x0d:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "CMP");
+ break;
+ case 0x0e:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "MIN");
+ break;
+ case 0x0f:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "MAX");
+ break;
+ case 0x10:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "FLR");
+ break;
+ case 0x11:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "MOD");
+ break;
+ case 0x12:
+ i915_decode_alu1(data, hw_offset, i, instr_prefix, "TRC");
+ break;
+ case 0x13:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "SGE");
+ break;
+ case 0x14:
+ i915_decode_alu2(data, hw_offset, i, instr_prefix, "SLT");
+ break;
+ case 0x15:
+ i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLD");
+ break;
+ case 0x16:
+ i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLDP");
+ break;
+ case 0x17:
+ i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLDB");
+ break;
+ case 0x19:
+ i915_decode_dcl(data, hw_offset, i, instr_prefix);
+ break;
+ default:
+ instr_out(data, hw_offset, i++, "%s: unknown\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+ break;
+ }
+}
+
+static int
+decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830)
+{
+ unsigned int len, i, c, opcode, word, map, sampler, instr;
+ char *format;
+
+ struct {
+ uint32_t opcode;
+ int i830_only;
+ int min_len;
+ int max_len;
+ char *name;
+ } opcodes_3d_1d[] = {
+ { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" },
+ { 0x86, 0, 4, 4, "3DSTATE_CHROMA_KEY" },
+ { 0x9c, 0, 1, 1, "3DSTATE_CLEAR_PARAMETERS" },
+ { 0x88, 0, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" },
+ { 0x99, 0, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" },
+ { 0x9a, 0, 2, 2, "3DSTATE_DEFAULT_SPECULAR" },
+ { 0x98, 0, 2, 2, "3DSTATE_DEFAULT_Z" },
+ { 0x97, 0, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" },
+ { 0x85, 0, 2, 2, "3DSTATE_DEST_BUFFER_VARIABLES" },
+ { 0x80, 0, 5, 5, "3DSTATE_DRAWING_RECTANGLE" },
+ { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" },
+ { 0x9d, 0, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" },
+ { 0x9e, 0, 4, 4, "3DSTATE_MONO_FILTER" },
+ { 0x89, 0, 4, 4, "3DSTATE_FOG_MODE" },
+ { 0x8f, 0, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" },
+ { 0x81, 0, 3, 3, "3DSTATE_SCISSOR_RECTANGLE" },
+ { 0x83, 0, 2, 2, "3DSTATE_SPAN_STIPPLE" },
+ { 0x8c, 1, 2, 2, "3DSTATE_MAP_COORD_TRANSFORM_I830" },
+ { 0x8b, 1, 2, 2, "3DSTATE_MAP_VERTEX_TRANSFORM_I830" },
+ { 0x8d, 1, 3, 3, "3DSTATE_W_STATE_I830" },
+ { 0x01, 1, 2, 2, "3DSTATE_COLOR_FACTOR_I830" },
+ { 0x02, 1, 2, 2, "3DSTATE_MAP_COORD_SETBIND_I830" },
+ };
+
+ switch ((data[0] & 0x00ff0000) >> 16) {
+ case 0x07:
+ /* This instruction is unusual. A 0 length means just 1 DWORD instead of
+ * 2. The 0 length is specified in one place to be unsupported, but
+ * stated to be required in another, and 0 length LOAD_INDIRECTs appear
+ * to cause no harm at least.
+ */
+ instr_out(data, hw_offset, 0, "3DSTATE_LOAD_INDIRECT\n");
+ len = (data[0] & 0x000000ff) + 1;
+ i = 1;
+ if (data[0] & (0x01 << 8)) {
+ if (i + 2 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+ instr_out(data, hw_offset, i++, "SIS.0\n");
+ instr_out(data, hw_offset, i++, "SIS.1\n");
+ }
+ if (data[0] & (0x02 << 8)) {
+ if (i + 1 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+ instr_out(data, hw_offset, i++, "DIS.0\n");
+ }
+ if (data[0] & (0x04 << 8)) {
+ if (i + 2 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+ instr_out(data, hw_offset, i++, "SSB.0\n");
+ instr_out(data, hw_offset, i++, "SSB.1\n");
+ }
+ if (data[0] & (0x08 << 8)) {
+ if (i + 2 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+ instr_out(data, hw_offset, i++, "MSB.0\n");
+ instr_out(data, hw_offset, i++, "MSB.1\n");
+ }
+ if (data[0] & (0x10 << 8)) {
+ if (i + 2 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+ instr_out(data, hw_offset, i++, "PSP.0\n");
+ instr_out(data, hw_offset, i++, "PSP.1\n");
+ }
+ if (data[0] & (0x20 << 8)) {
+ if (i + 2 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+ instr_out(data, hw_offset, i++, "PSC.0\n");
+ instr_out(data, hw_offset, i++, "PSC.1\n");
+ }
+ if (len != i) {
+ fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n");
+ (*failures)++;
+ return len;
+ }
+ return len;
+ case 0x04:
+ instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n");
+ len = (data[0] & 0x0000000f) + 2;
+ i = 1;
+ for (word = 0; word <= 7; word++) {
+ if (data[0] & (1 << (4 + word))) {
+ if (i >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_1");
+
+ /* save vertex state for decode */
+ if (word == 2) {
+ saved_s2_set = 1;
+ saved_s2 = data[i];
+ }
+ if (word == 4) {
+ saved_s4_set = 1;
+ saved_s4 = data[i];
+ }
+
+ instr_out(data, hw_offset, i++, "S%d\n", word);
+ }
+ }
+ if (len != i) {
+ fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n");
+ (*failures)++;
+ }
+ return len;
+ case 0x00:
+ instr_out(data, hw_offset, 0, "3DSTATE_MAP_STATE\n");
+ len = (data[0] & 0x0000003f) + 2;
+ instr_out(data, hw_offset, 1, "mask\n");
+
+ i = 2;
+ for (map = 0; map <= 15; map++) {
+ if (data[1] & (1 << map)) {
+ if (i + 3 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_MAP_STATE");
+ instr_out(data, hw_offset, i++, "map %d MS2\n", map);
+ instr_out(data, hw_offset, i++, "map %d MS3\n", map);
+ instr_out(data, hw_offset, i++, "map %d MS4\n", map);
+ }
+ }
+ if (len != i) {
+ fprintf(out, "Bad count in 3DSTATE_MAP_STATE\n");
+ (*failures)++;
+ return len;
+ }
+ return len;
+ case 0x06:
+ instr_out(data, hw_offset, 0, "3DSTATE_PIXEL_SHADER_CONSTANTS\n");
+ len = (data[0] & 0x000000ff) + 2;
+
+ i = 2;
+ for (c = 0; c <= 31; c++) {
+ if (data[1] & (1 << c)) {
+ if (i + 4 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_PIXEL_SHADER_CONSTANTS");
+ instr_out(data, hw_offset, i, "C%d.X = %f\n",
+ c, int_as_float(data[i]));
+ i++;
+ instr_out(data, hw_offset, i, "C%d.Y = %f\n",
+ c, int_as_float(data[i]));
+ i++;
+ instr_out(data, hw_offset, i, "C%d.Z = %f\n",
+ c, int_as_float(data[i]));
+ i++;
+ instr_out(data, hw_offset, i, "C%d.W = %f\n",
+ c, int_as_float(data[i]));
+ i++;
+ }
+ }
+ if (len != i) {
+ fprintf(out, "Bad count in 3DSTATE_PIXEL_SHADER_CONSTANTS\n");
+ (*failures)++;
+ }
+ return len;
+ case 0x05:
+ instr_out(data, hw_offset, 0, "3DSTATE_PIXEL_SHADER_PROGRAM\n");
+ len = (data[0] & 0x000000ff) + 2;
+ if ((len - 1) % 3 != 0 || len > 370) {
+ fprintf(out, "Bad count in 3DSTATE_PIXEL_SHADER_PROGRAM\n");
+ (*failures)++;
+ }
+ i = 1;
+ for (instr = 0; instr < (len - 1) / 3; instr++) {
+ char instr_prefix[10];
+
+ if (i + 3 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_PIXEL_SHADER_PROGRAM");
+ sprintf(instr_prefix, "PS%03d", instr);
+ i915_decode_instruction(data, hw_offset, i, instr_prefix);
+ i += 3;
+ }
+ return len;
+ case 0x01:
+ if (i830)
+ break;
+ instr_out(data, hw_offset, 0, "3DSTATE_SAMPLER_STATE\n");
+ instr_out(data, hw_offset, 1, "mask\n");
+ len = (data[0] & 0x0000003f) + 2;
+ i = 2;
+ for (sampler = 0; sampler <= 15; sampler++) {
+ if (data[1] & (1 << sampler)) {
+ if (i + 3 >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_SAMPLER_STATE");
+ instr_out(data, hw_offset, i++, "sampler %d SS2\n",
+ sampler);
+ instr_out(data, hw_offset, i++, "sampler %d SS3\n",
+ sampler);
+ instr_out(data, hw_offset, i++, "sampler %d SS4\n",
+ sampler);
+ }
+ }
+ if (len != i) {
+ fprintf(out, "Bad count in 3DSTATE_SAMPLER_STATE\n");
+ (*failures)++;
+ }
+ return len;
+ case 0x85:
+ len = (data[0] & 0x0000000f) + 2;
+
+ if (len != 2)
+ fprintf(out, "Bad count in 3DSTATE_DEST_BUFFER_VARIABLES\n");
+ if (count < 2)
+ BUFFER_FAIL(count, len, "3DSTATE_DEST_BUFFER_VARIABLES");
+
+ instr_out(data, hw_offset, 0,
+ "3DSTATE_DEST_BUFFER_VARIABLES\n");
+
+ switch ((data[1] >> 8) & 0xf) {
+ case 0x0: format = "g8"; break;
+ case 0x1: format = "x1r5g5b5"; break;
+ case 0x2: format = "r5g6b5"; break;
+ case 0x3: format = "a8r8g8b8"; break;
+ case 0x4: format = "ycrcb_swapy"; break;
+ case 0x5: format = "ycrcb_normal"; break;
+ case 0x6: format = "ycrcb_swapuv"; break;
+ case 0x7: format = "ycrcb_swapuvy"; break;
+ case 0x8: format = "a4r4g4b4"; break;
+ case 0x9: format = "a1r5g5b5"; break;
+ case 0xa: format = "a2r10g10b10"; break;
+ default: format = "BAD"; break;
+ }
+ instr_out(data, hw_offset, 1, "%s format, early Z %sabled\n",
+ format,
+ (data[1] & (1 << 31)) ? "en" : "dis");
+ return len;
+ }
+
+ for (opcode = 0; opcode < sizeof(opcodes_3d_1d) / sizeof(opcodes_3d_1d[0]);
+ opcode++)
+ {
+ if (opcodes_3d_1d[opcode].i830_only && !i830)
+ continue;
+
+ if (((data[0] & 0x00ff0000) >> 16) == opcodes_3d_1d[opcode].opcode) {
+ len = 1;
+
+ instr_out(data, hw_offset, 0, "%s\n", opcodes_3d_1d[opcode].name);
+ if (opcodes_3d_1d[opcode].max_len > 1) {
+ len = (data[0] & 0x0000ffff) + 2;
+ if (len < opcodes_3d_1d[opcode].min_len ||
+ len > opcodes_3d_1d[opcode].max_len)
+ {
+ fprintf(out, "Bad count in %s\n",
+ opcodes_3d_1d[opcode].name);
+ (*failures)++;
+ }
+ }
+
+ for (i = 1; i < len; i++) {
+ if (i >= count)
+ BUFFER_FAIL(count, len, opcodes_3d_1d[opcode].name);
+ instr_out(data, hw_offset, i, "dword %d\n", i);
+ }
+
+ return len;
+ }
+ }
+
+ instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ (*failures)++;
+ return 1;
+}
+
+static int
+decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
+ int *failures)
+{
+ char immediate = (data[0] & (1 << 23)) == 0;
+ unsigned int len, i;
+ char *primtype;
+
+ switch ((data[0] >> 18) & 0xf) {
+ case 0x0: primtype = "TRILIST"; break;
+ case 0x1: primtype = "TRISTRIP"; break;
+ case 0x2: primtype = "TRISTRIP_REVERSE"; break;
+ case 0x3: primtype = "TRIFAN"; break;
+ case 0x4: primtype = "POLYGON"; break;
+ case 0x5: primtype = "LINELIST"; break;
+ case 0x6: primtype = "LINESTRIP"; break;
+ case 0x7: primtype = "RECTLIST"; break;
+ case 0x8: primtype = "POINTLIST"; break;
+ case 0x9: primtype = "DIB"; break;
+ case 0xa: primtype = "CLEAR_RECT"; break;
+ default: primtype = "unknown"; break;
+ }
+
+ /* XXX: 3DPRIM_DIB not supported */
+ if (immediate) {
+ len = (data[0] & 0x0003ffff) + 2;
+ instr_out(data, hw_offset, 0, "3DPRIMITIVE inline %s\n", primtype);
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DPRIMITIVE inline");
+ if (!saved_s2_set || !saved_s4_set) {
+ fprintf(out, "unknown vertex format\n");
+ for (i = 1; i < len; i++) {
+ instr_out(data, hw_offset, i,
+ " vertex data (%f float)\n",
+ int_as_float(data[i]));
+ }
+ } else {
+ unsigned int vertex = 0;
+ for (i = 1; i < len;) {
+ unsigned int tc;
+
+#define VERTEX_OUT(fmt, ...) do { \
+ if (i < len) \
+ instr_out(data, hw_offset, i, " V%d."fmt"\n", vertex, __VA_ARGS__); \
+ else \
+ fprintf(out, " missing data in V%d\n", vertex); \
+ i++; \
+} while (0)
+
+ VERTEX_OUT("X = %f", int_as_float(data[i]));
+ VERTEX_OUT("Y = %f", int_as_float(data[i]));
+ switch (saved_s4 >> 6 & 0x7) {
+ case 0x1:
+ VERTEX_OUT("Z = %f", int_as_float(data[i]));
+ break;
+ case 0x2:
+ VERTEX_OUT("Z = %f", int_as_float(data[i]));
+ VERTEX_OUT("W = %f", int_as_float(data[i]));
+ break;
+ case 0x3:
+ break;
+ case 0x4:
+ VERTEX_OUT("W = %f", int_as_float(data[i]));
+ break;
+ default:
+ fprintf(out, "bad S4 position mask\n");
+ }
+
+ if (saved_s4 & (1 << 10)) {
+ VERTEX_OUT("color = (A=0x%02x, R=0x%02x, G=0x%02x, "
+ "B=0x%02x)",
+ data[i] >> 24,
+ (data[i] >> 16) & 0xff,
+ (data[i] >> 8) & 0xff,
+ data[i] & 0xff);
+ }
+ if (saved_s4 & (1 << 11)) {
+ VERTEX_OUT("spec = (A=0x%02x, R=0x%02x, G=0x%02x, "
+ "B=0x%02x)",
+ data[i] >> 24,
+ (data[i] >> 16) & 0xff,
+ (data[i] >> 8) & 0xff,
+ data[i] & 0xff);
+ }
+ if (saved_s4 & (1 << 12))
+ VERTEX_OUT("width = 0x%08x)", data[i]);
+
+ for (tc = 0; tc <= 7; tc++) {
+ switch ((saved_s2 >> (tc * 4)) & 0xf) {
+ case 0x0:
+ VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+ VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i]));
+ break;
+ case 0x1:
+ VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+ VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i]));
+ VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i]));
+ break;
+ case 0x2:
+ VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+ VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i]));
+ VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i]));
+ VERTEX_OUT("T%d.W = %f", tc, int_as_float(data[i]));
+ break;
+ case 0x3:
+ VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+ break;
+ case 0x4:
+ VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]);
+ break;
+ case 0x5:
+ VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]);
+ VERTEX_OUT("T%d.ZW = 0x%08x half-float", tc, data[i]);
+ break;
+ case 0xf:
+ break;
+ default:
+ fprintf(out, "bad S2.T%d format\n", tc);
+ }
+ }
+ vertex++;
+ }
+ }
+ } else {
+ /* indirect vertices */
+ len = data[0] & 0x0000ffff; /* index count */
+ if (data[0] & (1 << 17)) {
+ /* random vertex access */
+ if (count < (len + 1) / 2 + 1) {
+ BUFFER_FAIL(count, (len + 1) / 2 + 1,
+ "3DPRIMITIVE random indirect");
+ }
+ instr_out(data, hw_offset, 0,
+ "3DPRIMITIVE random indirect %s (%d)\n", primtype, len);
+ if (len == 0) {
+ /* vertex indices continue until 0xffff is found */
+ for (i = 1; i < count; i++) {
+ if ((data[i] & 0xffff) == 0xffff) {
+ instr_out(data, hw_offset, i,
+ " indices: (terminator)\n");
+ return i;
+ } else if ((data[i] >> 16) == 0xffff) {
+ instr_out(data, hw_offset, i,
+ " indices: 0x%04x, "
+ "(terminator)\n",
+ data[i] & 0xffff);
+ return i;
+ } else {
+ instr_out(data, hw_offset, i,
+ " indices: 0x%04x, 0x%04x\n",
+ data[i] & 0xffff, data[i] >> 16);
+ }
+ }
+ fprintf(out,
+ "3DPRIMITIVE: no terminator found in index buffer\n");
+ (*failures)++;
+ return count;
+ } else {
+ /* fixed size vertex index buffer */
+ for (i = 0; i < len; i += 2) {
+ if (i * 2 == len - 1) {
+ instr_out(data, hw_offset, i,
+ " indices: 0x%04x\n",
+ data[i] & 0xffff);
+ } else {
+ instr_out(data, hw_offset, i,
+ " indices: 0x%04x, 0x%04x\n",
+ data[i] & 0xffff, data[i] >> 16);
+ }
+ }
+ }
+ return (len + 1) / 2 + 1;
+ } else {
+ /* sequential vertex access */
+ if (count < 2)
+ BUFFER_FAIL(count, 2, "3DPRIMITIVE seq indirect");
+ instr_out(data, hw_offset, 0,
+ "3DPRIMITIVE sequential indirect %s, %d starting from "
+ "%d\n", primtype, len, data[1] & 0xffff);
+ instr_out(data, hw_offset, 1, " start\n");
+ return 2;
+ }
+ }
+
+ return len;
+}
+
+static int
+decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+ unsigned int opcode;
+
+ struct {
+ uint32_t opcode;
+ int min_len;
+ int max_len;
+ char *name;
+ } opcodes_3d[] = {
+ { 0x06, 1, 1, "3DSTATE_ANTI_ALIASING" },
+ { 0x08, 1, 1, "3DSTATE_BACKFACE_STENCIL_OPS" },
+ { 0x09, 1, 1, "3DSTATE_BACKFACE_STENCIL_MASKS" },
+ { 0x16, 1, 1, "3DSTATE_COORD_SET_BINDINGS" },
+ { 0x15, 1, 1, "3DSTATE_FOG_COLOR" },
+ { 0x0b, 1, 1, "3DSTATE_INDEPENDENT_ALPHA_BLEND" },
+ { 0x0d, 1, 1, "3DSTATE_MODES_4" },
+ { 0x0c, 1, 1, "3DSTATE_MODES_5" },
+ { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
+ };
+
+ switch ((data[0] & 0x1f000000) >> 24) {
+ case 0x1f:
+ return decode_3d_primitive(data, count, hw_offset, failures);
+ case 0x1d:
+ return decode_3d_1d(data, count, hw_offset, failures, 0);
+ case 0x1c:
+ return decode_3d_1c(data, count, hw_offset, failures);
+ }
+
+ for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
+ opcode++) {
+ if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
+ unsigned int len = 1, i;
+
+ instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
+ if (opcodes_3d[opcode].max_len > 1) {
+ len = (data[0] & 0xff) + 2;
+ if (len < opcodes_3d[opcode].min_len ||
+ len > opcodes_3d[opcode].max_len)
+ {
+ fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+ }
+ }
+
+ for (i = 1; i < len; i++) {
+ if (i >= count)
+ BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+ instr_out(data, hw_offset, i, "dword %d\n", i);
+ }
+ return len;
+ }
+ }
+
+ instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ (*failures)++;
+ return 1;
+}
+
+static const char *
+get_965_surfacetype(unsigned int surfacetype)
+{
+ switch (surfacetype) {
+ case 0: return "1D";
+ case 1: return "2D";
+ case 2: return "3D";
+ case 3: return "CUBE";
+ case 4: return "BUFFER";
+ case 7: return "NULL";
+ default: return "unknown";
+ }
+}
+
+static const char *
+get_965_depthformat(unsigned int depthformat)
+{
+ switch (depthformat) {
+ case 0: return "s8_z24float";
+ case 1: return "z32float";
+ case 2: return "z24s8";
+ case 5: return "z16";
+ default: return "unknown";
+ }
+}
+
+static const char *
+get_965_element_component(uint32_t data, int component)
+{
+ uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7;
+
+ switch (component_control) {
+ case 0:
+ return "nostore";
+ case 1:
+ switch (component) {
+ case 0: return "X";
+ case 1: return "Y";
+ case 2: return "Z";
+ case 3: return "W";
+ default: return "fail";
+ }
+ case 2:
+ return "0.0";
+ case 3:
+ return "1.0";
+ case 4:
+ return "0x1";
+ case 5:
+ return "VID";
+ default:
+ return "fail";
+ }
+}
+
+static const char *
+get_965_prim_type(uint32_t data)
+{
+ uint32_t primtype = (data >> 10) & 0x1f;
+
+ switch (primtype) {
+ case 0x01: return "point list";
+ case 0x02: return "line list";
+ case 0x03: return "line strip";
+ case 0x04: return "tri list";
+ case 0x05: return "tri strip";
+ case 0x06: return "tri fan";
+ case 0x07: return "quad list";
+ case 0x08: return "quad strip";
+ case 0x09: return "line list adj";
+ case 0x0a: return "line strip adj";
+ case 0x0b: return "tri list adj";
+ case 0x0c: return "tri strip adj";
+ case 0x0d: return "tri strip reverse";
+ case 0x0e: return "polygon";
+ case 0x0f: return "rect list";
+ case 0x10: return "line loop";
+ case 0x11: return "point list bf";
+ case 0x12: return "line strip cont";
+ case 0x13: return "line strip bf";
+ case 0x14: return "line strip cont bf";
+ case 0x15: return "tri fan no stipple";
+ default: return "fail";
+ }
+}
+
+static int
+decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+ unsigned int opcode, len;
+ int i;
+
+ struct {
+ uint32_t opcode;
+ int min_len;
+ int max_len;
+ char *name;
+ } opcodes_3d[] = {
+ { 0x6000, 3, 3, "URB_FENCE" },
+ { 0x6001, 2, 2, "CS_URB_STATE" },
+ { 0x6002, 2, 2, "CONSTANT_BUFFER" },
+ { 0x6101, 6, 6, "STATE_BASE_ADDRESS" },
+ { 0x6102, 2, 2 , "STATE_SIP" },
+ { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" },
+ { 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" },
+ { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" },
+ { 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" },
+ { 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" },
+ { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" },
+ { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" },
+ { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" },
+ { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" },
+ { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" },
+ { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" },
+ { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" },
+ { 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" },
+ { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" },
+ { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" },
+ { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" },
+ { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
+ { 0x7b00, 6, 6, "3DPRIMITIVE" },
+ };
+
+ len = (data[0] & 0x0000ffff) + 2;
+
+ switch ((data[0] & 0xffff0000) >> 16) {
+ case 0x6101:
+ if (len != 6)
+ fprintf(out, "Bad count in STATE_BASE_ADDRESS\n");
+ if (count < 6)
+ BUFFER_FAIL(count, len, "STATE_BASE_ADDRESS");
+
+ instr_out(data, hw_offset, 0,
+ "STATE_BASE_ADDRESS\n");
+
+ if (data[1] & 1) {
+ instr_out(data, hw_offset, 1, "General state at 0x%08x\n",
+ data[1] & ~1);
+ } else
+ instr_out(data, hw_offset, 1, "General state not updated\n");
+
+ if (data[2] & 1) {
+ instr_out(data, hw_offset, 2, "Surface state at 0x%08x\n",
+ data[2] & ~1);
+ } else
+ instr_out(data, hw_offset, 2, "Surface state not updated\n");
+
+ if (data[3] & 1) {
+ instr_out(data, hw_offset, 3, "Indirect state at 0x%08x\n",
+ data[3] & ~1);
+ } else
+ instr_out(data, hw_offset, 3, "Indirect state not updated\n");
+
+ if (data[4] & 1) {
+ instr_out(data, hw_offset, 4, "General state upper bound 0x%08x\n",
+ data[4] & ~1);
+ } else
+ instr_out(data, hw_offset, 4, "General state not updated\n");
+
+ if (data[5] & 1) {
+ instr_out(data, hw_offset, 5, "Indirect state upper bound 0x%08x\n",
+ data[5] & ~1);
+ } else
+ instr_out(data, hw_offset, 5, "Indirect state not updated\n");
+
+ return len;
+ case 0x7800:
+ if (len != 7)
+ fprintf(out, "Bad count in 3DSTATE_PIPELINED_POINTERS\n");
+ if (count < 7)
+ BUFFER_FAIL(count, len, "3DSTATE_PIPELINED_POINTERS");
+
+ instr_out(data, hw_offset, 0,
+ "3DSTATE_PIPELINED_POINTERS\n");
+ instr_out(data, hw_offset, 1, "VS state\n");
+ instr_out(data, hw_offset, 2, "GS state\n");
+ instr_out(data, hw_offset, 3, "Clip state\n");
+ instr_out(data, hw_offset, 4, "SF state\n");
+ instr_out(data, hw_offset, 5, "WM state\n");
+ instr_out(data, hw_offset, 6, "CC state\n");
+ return len;
+ case 0x7801:
+ if (len != 6)
+ fprintf(out, "Bad count in 3DSTATE_BINDING_TABLE_POINTERS\n");
+ if (count < 6)
+ BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS");
+
+ instr_out(data, hw_offset, 0,
+ "3DSTATE_BINDING_TABLE_POINTERS\n");
+ instr_out(data, hw_offset, 1, "VS binding table\n");
+ instr_out(data, hw_offset, 2, "GS binding table\n");
+ instr_out(data, hw_offset, 3, "Clip binding table\n");
+ instr_out(data, hw_offset, 4, "SF binding table\n");
+ instr_out(data, hw_offset, 5, "WM binding table\n");
+
+ return len;
+
+ case 0x7808:
+ len = (data[0] & 0xff) + 2;
+ if ((len - 1) % 4 != 0)
+ fprintf(out, "Bad count in 3DSTATE_VERTEX_BUFFERS\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DSTATE_VERTEX_BUFFERS");
+ instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_BUFFERS\n");
+
+ for (i = 1; i < len;) {
+ instr_out(data, hw_offset, i, "buffer %d: %s, pitch %db\n",
+ data[i] >> 27,
+ data[i] & (1 << 26) ? "random" : "sequential",
+ data[i] & 0x07ff);
+ i++;
+ instr_out(data, hw_offset, i++, "buffer address\n");
+ instr_out(data, hw_offset, i++, "max index\n");
+ instr_out(data, hw_offset, i++, "mbz\n");
+ }
+ return len;
+
+ case 0x7809:
+ len = (data[0] & 0xff) + 2;
+ if ((len + 1) % 2 != 0)
+ fprintf(out, "Bad count in 3DSTATE_VERTEX_ELEMENTS\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DSTATE_VERTEX_ELEMENTS");
+ instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_ELEMENTS\n");
+
+ for (i = 1; i < len;) {
+ instr_out(data, hw_offset, i, "buffer %d: %svalid, type 0x%04x, "
+ "src offset 0x%04x bytes\n",
+ data[i] >> 27,
+ data[i] & (1 << 26) ? "" : "in",
+ (data[i] >> 16) & 0x1ff,
+ data[i] & 0x07ff);
+ i++;
+ instr_out(data, hw_offset, i, "(%s, %s, %s, %s), "
+ "dst offset 0x%02x bytes\n",
+ get_965_element_component(data[i], 0),
+ get_965_element_component(data[i], 1),
+ get_965_element_component(data[i], 2),
+ get_965_element_component(data[i], 3),
+ (data[i] & 0xff) * 4);
+ i++;
+ }
+ return len;
+
+ case 0x780a:
+ len = (data[0] & 0xff) + 2;
+ if (len != 3)
+ fprintf(out, "Bad count in 3DSTATE_INDEX_BUFFER\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DSTATE_INDEX_BUFFER");
+ instr_out(data, hw_offset, 0, "3DSTATE_INDEX_BUFFER\n");
+ instr_out(data, hw_offset, 1, "beginning buffer address\n");
+ instr_out(data, hw_offset, 2, "ending buffer address\n");
+ return len;
+
+ case 0x7900:
+ if (len != 4)
+ fprintf(out, "Bad count in 3DSTATE_DRAWING_RECTANGLE\n");
+ if (count < 4)
+ BUFFER_FAIL(count, len, "3DSTATE_DRAWING_RECTANGLE");
+
+ instr_out(data, hw_offset, 0,
+ "3DSTATE_DRAWING_RECTANGLE\n");
+ instr_out(data, hw_offset, 1, "top left: %d,%d\n",
+ data[1] & 0xffff,
+ (data[1] >> 16) & 0xffff);
+ instr_out(data, hw_offset, 2, "bottom right: %d,%d\n",
+ data[2] & 0xffff,
+ (data[2] >> 16) & 0xffff);
+ instr_out(data, hw_offset, 3, "origin: %d,%d\n",
+ (int)data[3] & 0xffff,
+ ((int)data[3] >> 16) & 0xffff);
+
+ return len;
+
+ case 0x7905:
+ if (len != 5 && len != 6)
+ fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DSTATE_DEPTH_BUFFER");
+
+ instr_out(data, hw_offset, 0,
+ "3DSTATE_DEPTH_BUFFER\n");
+ instr_out(data, hw_offset, 1, "%s, %s, pitch = %d bytes, %stiled\n",
+ get_965_surfacetype(data[1] >> 29),
+ get_965_depthformat((data[1] >> 18) & 0x7),
+ (data[1] & 0x0001ffff) + 1,
+ data[1] & (1 << 27) ? "" : "not ");
+ instr_out(data, hw_offset, 2, "depth offset\n");
+ instr_out(data, hw_offset, 3, "%dx%d\n",
+ ((data[3] & 0x0007ffc0) >> 6) + 1,
+ ((data[3] & 0xfff80000) >> 19) + 1);
+ instr_out(data, hw_offset, 4, "volume depth\n");
+ if (len == 6)
+ instr_out(data, hw_offset, 5, "\n");
+
+ return len;
+
+ case 0x7b00:
+ len = (data[0] & 0xff) + 2;
+ if (len != 6)
+ fprintf(out, "Bad count in 3DPRIMITIVE\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DPRIMITIVE");
+
+ instr_out(data, hw_offset, 0,
+ "3DPRIMITIVE: %s %s\n",
+ get_965_prim_type(data[0]),
+ (data[0] & (1 << 15)) ? "random" : "sequential");
+ instr_out(data, hw_offset, 1, "vertex count\n");
+ instr_out(data, hw_offset, 2, "start vertex\n");
+ instr_out(data, hw_offset, 3, "instance count\n");
+ instr_out(data, hw_offset, 4, "start instance\n");
+ instr_out(data, hw_offset, 5, "index bias\n");
+ return len;
+ }
+
+ for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
+ opcode++) {
+ if ((data[0] & 0xffff0000) >> 16 == opcodes_3d[opcode].opcode) {
+ unsigned int i;
+ len = 1;
+
+ instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
+ if (opcodes_3d[opcode].max_len > 1) {
+ len = (data[0] & 0xff) + 2;
+ if (len < opcodes_3d[opcode].min_len ||
+ len > opcodes_3d[opcode].max_len)
+ {
+ fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+ }
+ }
+
+ for (i = 1; i < len; i++) {
+ if (i >= count)
+ BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+ instr_out(data, hw_offset, i, "dword %d\n", i);
+ }
+ return len;
+ }
+ }
+
+ instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ (*failures)++;
+ return 1;
+}
+
+static int
+decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+ unsigned int opcode;
+
+ struct {
+ uint32_t opcode;
+ int min_len;
+ int max_len;
+ char *name;
+ } opcodes_3d[] = {
+ { 0x02, 1, 1, "3DSTATE_MODES_3" },
+ { 0x03, 1, 1, "3DSTATE_ENABLES_1"},
+ { 0x04, 1, 1, "3DSTATE_ENABLES_2"},
+ { 0x05, 1, 1, "3DSTATE_VFT0"},
+ { 0x06, 1, 1, "3DSTATE_AA"},
+ { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
+ { 0x08, 1, 1, "3DSTATE_MODES_1" },
+ { 0x09, 1, 1, "3DSTATE_STENCIL_TEST" },
+ { 0x0a, 1, 1, "3DSTATE_VFT1"},
+ { 0x0b, 1, 1, "3DSTATE_INDPT_ALPHA_BLEND" },
+ { 0x0c, 1, 1, "3DSTATE_MODES_5" },
+ { 0x0d, 1, 1, "3DSTATE_MAP_BLEND_OP" },
+ { 0x0e, 1, 1, "3DSTATE_MAP_BLEND_ARG" },
+ { 0x0f, 1, 1, "3DSTATE_MODES_2" },
+ { 0x15, 1, 1, "3DSTATE_FOG_COLOR" },
+ { 0x16, 1, 1, "3DSTATE_MODES_4" },
+ };
+
+ switch ((data[0] & 0x1f000000) >> 24) {
+ case 0x1f:
+ return decode_3d_primitive(data, count, hw_offset, failures);
+ case 0x1d:
+ return decode_3d_1d(data, count, hw_offset, failures, 1);
+ case 0x1c:
+ return decode_3d_1c(data, count, hw_offset, failures);
+ }
+
+ for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
+ opcode++) {
+ if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
+ unsigned int len = 1, i;
+
+ instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
+ if (opcodes_3d[opcode].max_len > 1) {
+ len = (data[0] & 0xff) + 2;
+ if (len < opcodes_3d[opcode].min_len ||
+ len > opcodes_3d[opcode].max_len)
+ {
+ fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+ }
+ }
+
+ for (i = 1; i < len; i++) {
+ if (i >= count)
+ BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+ instr_out(data, hw_offset, i, "dword %d\n", i);
+ }
+ return len;
+ }
+ }
+
+ instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ (*failures)++;
+ return 1;
+}
+
+/**
+ * Decodes an i830-i915 batch buffer, writing the output to stdout.
+ *
+ * \param data batch buffer contents
+ * \param count number of DWORDs to decode in the batch buffer
+ * \param hw_offset hardware address for the buffer
+ */
+int
+intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid)
+{
+ int index = 0;
+ int failures = 0;
+
+ out = stderr;
+
+ while (index < count) {
+ switch ((data[index] & 0xe0000000) >> 29) {
+ case 0x0:
+ index += decode_mi(data + index, count - index,
+ hw_offset + index * 4, &failures);
+ break;
+ case 0x2:
+ index += decode_2d(data + index, count - index,
+ hw_offset + index * 4, &failures);
+ break;
+ case 0x3:
+ if (IS_965(devid)) {
+ index += decode_3d_965(data + index, count - index,
+ hw_offset + index * 4, &failures);
+ } else if (IS_9XX(devid)) {
+ index += decode_3d(data + index, count - index,
+ hw_offset + index * 4, &failures);
+ } else {
+ index += decode_3d_i830(data + index, count - index,
+ hw_offset + index * 4, &failures);
+ }
+ break;
+ default:
+ instr_out(data, hw_offset, index, "UNKNOWN\n");
+ failures++;
+ index++;
+ break;
+ }
+ fflush(out);
+ }
+
+ return failures;
+}
+
+void intel_decode_context_reset(void)
+{
+ saved_s2_set = 0;
+ saved_s4_set = 1;
+}
+
diff --git a/src/gallium/drivers/i965/intel_decode.h b/src/gallium/drivers/i965/intel_decode.h
new file mode 100644
index 00000000000..7683097b869
--- /dev/null
+++ b/src/gallium/drivers/i965/intel_decode.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+int intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid);
+void intel_decode_context_reset(void);
diff --git a/src/gallium/drivers/i965/intel_structs.h b/src/gallium/drivers/i965/intel_structs.h
new file mode 100644
index 00000000000..522e3bd92c2
--- /dev/null
+++ b/src/gallium/drivers/i965/intel_structs.h
@@ -0,0 +1,132 @@
+#ifndef INTEL_STRUCTS_H
+#define INTEL_STRUCTS_H
+
+struct br0 {
+ GLuint length:8;
+ GLuint pad0:3;
+ GLuint dst_tiled:1;
+ GLuint pad1:8;
+ GLuint write_rgb:1;
+ GLuint write_alpha:1;
+ GLuint opcode:7;
+ GLuint client:3;
+};
+
+
+struct br13 {
+ GLint dest_pitch:16;
+ GLuint rop:8;
+ GLuint color_depth:2;
+ GLuint pad1:3;
+ GLuint mono_source_transparency:1;
+ GLuint clipping_enable:1;
+ GLuint pad0:1;
+};
+
+
+
+/* This is an attempt to move some of the 2D interaction in this
+ * driver to using structs for packets rather than a bunch of #defines
+ * and dwords.
+ */
+struct xy_color_blit {
+ struct br0 br0;
+ struct br13 br13;
+
+ struct {
+ GLuint dest_x1:16;
+ GLuint dest_y1:16;
+ } dw2;
+
+ struct {
+ GLuint dest_x2:16;
+ GLuint dest_y2:16;
+ } dw3;
+
+ GLuint dest_base_addr;
+ GLuint color;
+};
+
+struct xy_src_copy_blit {
+ struct br0 br0;
+ struct br13 br13;
+
+ struct {
+ GLuint dest_x1:16;
+ GLuint dest_y1:16;
+ } dw2;
+
+ struct {
+ GLuint dest_x2:16;
+ GLuint dest_y2:16;
+ } dw3;
+
+ GLuint dest_base_addr;
+
+ struct {
+ GLuint src_x1:16;
+ GLuint src_y1:16;
+ } dw5;
+
+ struct {
+ GLint src_pitch:16;
+ GLuint pad:16;
+ } dw6;
+
+ GLuint src_base_addr;
+};
+
+struct xy_setup_blit {
+ struct br0 br0;
+ struct br13 br13;
+
+ struct {
+ GLuint clip_x1:16;
+ GLuint clip_y1:16;
+ } dw2;
+
+ struct {
+ GLuint clip_x2:16;
+ GLuint clip_y2:16;
+ } dw3;
+
+ GLuint dest_base_addr;
+ GLuint background_color;
+ GLuint foreground_color;
+ GLuint pattern_base_addr;
+};
+
+
+struct xy_text_immediate_blit {
+ struct {
+ GLuint length:8;
+ GLuint pad2:3;
+ GLuint dst_tiled:1;
+ GLuint pad1:4;
+ GLuint byte_packed:1;
+ GLuint pad0:5;
+ GLuint opcode:7;
+ GLuint client:3;
+ } dw0;
+
+ struct {
+ GLuint dest_x1:16;
+ GLuint dest_y1:16;
+ } dw1;
+
+ struct {
+ GLuint dest_x2:16;
+ GLuint dest_y2:16;
+ } dw2;
+
+ /* Src bitmap data follows as inline dwords.
+ */
+};
+
+
+#define CLIENT_2D 0x2
+#define OPCODE_XY_SETUP_BLT 0x1
+#define OPCODE_XY_COLOR_BLT 0x50
+#define OPCODE_XY_TEXT_IMMEDIATE_BLT 0x31
+
+#endif
diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c
index 4e700089e33..9f5b4e63236 100644
--- a/src/gallium/drivers/identity/id_context.c
+++ b/src/gallium/drivers/identity/id_context.c
@@ -46,17 +46,6 @@ identity_destroy(struct pipe_context *_pipe)
}
static void
-identity_set_edgeflags(struct pipe_context *_pipe,
- const unsigned *bitfield)
-{
- struct identity_context *id_pipe = identity_context(_pipe);
- struct pipe_context *pipe = id_pipe->pipe;
-
- pipe->set_edgeflags(pipe,
- bitfield);
-}
-
-static boolean
identity_draw_arrays(struct pipe_context *_pipe,
unsigned prim,
unsigned start,
@@ -65,13 +54,13 @@ identity_draw_arrays(struct pipe_context *_pipe,
struct identity_context *id_pipe = identity_context(_pipe);
struct pipe_context *pipe = id_pipe->pipe;
- return pipe->draw_arrays(pipe,
- prim,
- start,
- count);
+ pipe->draw_arrays(pipe,
+ prim,
+ start,
+ count);
}
-static boolean
+static void
identity_draw_elements(struct pipe_context *_pipe,
struct pipe_buffer *_indexBuffer,
unsigned indexSize,
@@ -84,15 +73,15 @@ identity_draw_elements(struct pipe_context *_pipe,
struct pipe_context *pipe = id_pipe->pipe;
struct pipe_buffer *indexBuffer = id_buffer->buffer;
- return pipe->draw_elements(pipe,
- indexBuffer,
- indexSize,
- prim,
- start,
- count);
+ pipe->draw_elements(pipe,
+ indexBuffer,
+ indexSize,
+ prim,
+ start,
+ count);
}
-static boolean
+static void
identity_draw_range_elements(struct pipe_context *_pipe,
struct pipe_buffer *_indexBuffer,
unsigned indexSize,
@@ -107,14 +96,14 @@ identity_draw_range_elements(struct pipe_context *_pipe,
struct pipe_context *pipe = id_pipe->pipe;
struct pipe_buffer *indexBuffer = id_buffer->buffer;
- return pipe->draw_range_elements(pipe,
- indexBuffer,
- indexSize,
- minIndex,
- maxIndex,
- mode,
- start,
- count);
+ pipe->draw_range_elements(pipe,
+ indexBuffer,
+ indexSize,
+ minIndex,
+ maxIndex,
+ mode,
+ start,
+ count);
}
static struct pipe_query *
@@ -221,16 +210,29 @@ identity_create_sampler_state(struct pipe_context *_pipe,
}
static void
-identity_bind_sampler_states(struct pipe_context *_pipe,
- unsigned num,
- void **samplers)
+identity_bind_fragment_sampler_states(struct pipe_context *_pipe,
+ unsigned num_samplers,
+ void **samplers)
+{
+ struct identity_context *id_pipe = identity_context(_pipe);
+ struct pipe_context *pipe = id_pipe->pipe;
+
+ pipe->bind_fragment_sampler_states(pipe,
+ num_samplers,
+ samplers);
+}
+
+static void
+identity_bind_vertex_sampler_states(struct pipe_context *_pipe,
+ unsigned num_samplers,
+ void **samplers)
{
struct identity_context *id_pipe = identity_context(_pipe);
struct pipe_context *pipe = id_pipe->pipe;
- pipe->bind_sampler_states(pipe,
- num,
- samplers);
+ pipe->bind_vertex_sampler_states(pipe,
+ num_samplers,
+ samplers);
}
static void
@@ -480,9 +482,9 @@ identity_set_viewport_state(struct pipe_context *_pipe,
}
static void
-identity_set_sampler_textures(struct pipe_context *_pipe,
- unsigned num_textures,
- struct pipe_texture **_textures)
+identity_set_fragment_sampler_textures(struct pipe_context *_pipe,
+ unsigned num_textures,
+ struct pipe_texture **_textures)
{
struct identity_context *id_pipe = identity_context(_pipe);
struct pipe_context *pipe = id_pipe->pipe;
@@ -499,9 +501,34 @@ identity_set_sampler_textures(struct pipe_context *_pipe,
textures = unwrapped_textures;
}
- pipe->set_sampler_textures(pipe,
- num_textures,
- textures);
+ pipe->set_fragment_sampler_textures(pipe,
+ num_textures,
+ textures);
+}
+
+static void
+identity_set_vertex_sampler_textures(struct pipe_context *_pipe,
+ unsigned num_textures,
+ struct pipe_texture **_textures)
+{
+ struct identity_context *id_pipe = identity_context(_pipe);
+ struct pipe_context *pipe = id_pipe->pipe;
+ struct pipe_texture *unwrapped_textures[PIPE_MAX_VERTEX_SAMPLERS];
+ struct pipe_texture **textures = NULL;
+ unsigned i;
+
+ if (_textures) {
+ for (i = 0; i < num_textures; i++)
+ unwrapped_textures[i] = identity_texture_unwrap(_textures[i]);
+ for (; i < PIPE_MAX_VERTEX_SAMPLERS; i++)
+ unwrapped_textures[i] = NULL;
+
+ textures = unwrapped_textures;
+ }
+
+ pipe->set_vertex_sampler_textures(pipe,
+ num_textures,
+ textures);
}
static void
@@ -669,7 +696,6 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
id_pipe->base.draw = NULL;
id_pipe->base.destroy = identity_destroy;
- id_pipe->base.set_edgeflags = identity_set_edgeflags;
id_pipe->base.draw_arrays = identity_draw_arrays;
id_pipe->base.draw_elements = identity_draw_elements;
id_pipe->base.draw_range_elements = identity_draw_range_elements;
@@ -682,7 +708,8 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
id_pipe->base.bind_blend_state = identity_bind_blend_state;
id_pipe->base.delete_blend_state = identity_delete_blend_state;
id_pipe->base.create_sampler_state = identity_create_sampler_state;
- id_pipe->base.bind_sampler_states = identity_bind_sampler_states;
+ id_pipe->base.bind_fragment_sampler_states = identity_bind_fragment_sampler_states;
+ id_pipe->base.bind_vertex_sampler_states = identity_bind_vertex_sampler_states;
id_pipe->base.delete_sampler_state = identity_delete_sampler_state;
id_pipe->base.create_rasterizer_state = identity_create_rasterizer_state;
id_pipe->base.bind_rasterizer_state = identity_bind_rasterizer_state;
@@ -703,7 +730,8 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
id_pipe->base.set_polygon_stipple = identity_set_polygon_stipple;
id_pipe->base.set_scissor_state = identity_set_scissor_state;
id_pipe->base.set_viewport_state = identity_set_viewport_state;
- id_pipe->base.set_sampler_textures = identity_set_sampler_textures;
+ id_pipe->base.set_fragment_sampler_textures = identity_set_fragment_sampler_textures;
+ id_pipe->base.set_vertex_sampler_textures = identity_set_vertex_sampler_textures;
id_pipe->base.set_vertex_buffers = identity_set_vertex_buffers;
id_pipe->base.set_vertex_elements = identity_set_vertex_elements;
id_pipe->base.surface_copy = identity_surface_copy;
diff --git a/src/gallium/drivers/identity/id_objects.c b/src/gallium/drivers/identity/id_objects.c
index e893e599408..bc9bc7121d5 100644
--- a/src/gallium/drivers/identity/id_objects.c
+++ b/src/gallium/drivers/identity/id_objects.c
@@ -180,3 +180,42 @@ identity_transfer_destroy(struct identity_transfer *id_transfer)
screen->tex_transfer_destroy(id_transfer->transfer);
FREE(id_transfer);
}
+
+struct pipe_video_surface *
+identity_video_surface_create(struct identity_screen *id_screen,
+ struct pipe_video_surface *video_surface)
+{
+ struct identity_video_surface *id_video_surface;
+
+ if (!video_surface) {
+ goto error;
+ }
+
+ assert(video_surface->screen == id_screen->screen);
+
+ id_video_surface = CALLOC_STRUCT(identity_video_surface);
+ if (!id_video_surface) {
+ goto error;
+ }
+
+ memcpy(&id_video_surface->base,
+ video_surface,
+ sizeof(struct pipe_video_surface));
+
+ pipe_reference_init(&id_video_surface->base.reference, 1);
+ id_video_surface->base.screen = &id_screen->base;
+ id_video_surface->video_surface = video_surface;
+
+ return &id_video_surface->base;
+
+error:
+ pipe_video_surface_reference(&video_surface, NULL);
+ return NULL;
+}
+
+void
+identity_video_surface_destroy(struct identity_video_surface *id_video_surface)
+{
+ pipe_video_surface_reference(&id_video_surface->video_surface, NULL);
+ FREE(id_video_surface);
+}
diff --git a/src/gallium/drivers/identity/id_objects.h b/src/gallium/drivers/identity/id_objects.h
index ce58faa3c7c..77cc7190798 100644
--- a/src/gallium/drivers/identity/id_objects.h
+++ b/src/gallium/drivers/identity/id_objects.h
@@ -31,6 +31,7 @@
#include "pipe/p_compiler.h"
#include "pipe/p_state.h"
+#include "pipe/p_video_state.h"
#include "id_screen.h"
@@ -67,6 +68,14 @@ struct identity_transfer
};
+struct identity_video_surface
+{
+ struct pipe_video_surface base;
+
+ struct pipe_video_surface *video_surface;
+};
+
+
static INLINE struct identity_buffer *
identity_buffer(struct pipe_buffer *_buffer)
{
@@ -103,6 +112,15 @@ identity_transfer(struct pipe_transfer *_transfer)
return (struct identity_transfer *)_transfer;
}
+static INLINE struct identity_video_surface *
+identity_video_surface(struct pipe_video_surface *_video_surface)
+{
+ if (!_video_surface) {
+ return NULL;
+ }
+ (void)identity_screen(_video_surface->screen);
+ return (struct identity_video_surface *)_video_surface;
+}
static INLINE struct pipe_buffer *
identity_buffer_unwrap(struct pipe_buffer *_buffer)
@@ -165,5 +183,12 @@ identity_transfer_create(struct identity_texture *id_texture,
void
identity_transfer_destroy(struct identity_transfer *id_transfer);
+struct pipe_video_surface *
+identity_video_surface_create(struct identity_screen *id_screen,
+ struct pipe_video_surface *video_surface);
+
+void
+identity_video_surface_destroy(struct identity_video_surface *id_video_surface);
+
#endif /* ID_OBJECTS_H */
diff --git a/src/gallium/drivers/identity/id_public.h b/src/gallium/drivers/identity/id_public.h
index cac14cfd604..3d2862eaa01 100644
--- a/src/gallium/drivers/identity/id_public.h
+++ b/src/gallium/drivers/identity/id_public.h
@@ -37,4 +37,4 @@ identity_screen_create(struct pipe_screen *screen);
struct pipe_context *
identity_context_create(struct pipe_screen *screen, struct pipe_context *pipe);
-#endif /* PT_PUBLIC_H */
+#endif /* ID_PUBLIC_H */
diff --git a/src/gallium/drivers/identity/id_screen.c b/src/gallium/drivers/identity/id_screen.c
index 26439637d08..53eae3ef544 100644
--- a/src/gallium/drivers/identity/id_screen.c
+++ b/src/gallium/drivers/identity/id_screen.c
@@ -379,6 +379,33 @@ identity_screen_buffer_destroy(struct pipe_buffer *_buffer)
identity_buffer_destroy(identity_buffer(_buffer));
}
+static struct pipe_video_surface *
+identity_screen_video_surface_create(struct pipe_screen *_screen,
+ enum pipe_video_chroma_format chroma_format,
+ unsigned width,
+ unsigned height)
+{
+ struct identity_screen *id_screen = identity_screen(_screen);
+ struct pipe_screen *screen = id_screen->screen;
+ struct pipe_video_surface *result;
+
+ result = screen->video_surface_create(screen,
+ chroma_format,
+ width,
+ height);
+
+ if (result) {
+ return identity_video_surface_create(id_screen, result);
+ }
+ return NULL;
+}
+
+static void
+identity_screen_video_surface_destroy(struct pipe_video_surface *_vsfc)
+{
+ identity_video_surface_destroy(identity_video_surface(_vsfc));
+}
+
static void
identity_screen_flush_frontbuffer(struct pipe_screen *_screen,
struct pipe_surface *_surface,
@@ -472,6 +499,12 @@ identity_screen_create(struct pipe_screen *screen)
if (screen->buffer_unmap)
id_screen->base.buffer_unmap = identity_screen_buffer_unmap;
id_screen->base.buffer_destroy = identity_screen_buffer_destroy;
+ if (screen->video_surface_create) {
+ id_screen->base.video_surface_create = identity_screen_video_surface_create;
+ }
+ if (screen->video_surface_destroy) {
+ id_screen->base.video_surface_destroy = identity_screen_video_surface_destroy;
+ }
id_screen->base.flush_frontbuffer = identity_screen_flush_frontbuffer;
id_screen->base.fence_reference = identity_screen_fence_reference;
id_screen->base.fence_signalled = identity_screen_fence_signalled;
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 6ec97046e15..264999a7cea 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -19,10 +19,13 @@ C_SOURCES = \
lp_bld_depth.c \
lp_bld_flow.c \
lp_bld_format_aos.c \
+ lp_bld_format_query.c \
lp_bld_format_soa.c \
lp_bld_interp.c \
lp_bld_intr.c \
lp_bld_logic.c \
+ lp_bld_pack.c \
+ lp_bld_sample.c \
lp_bld_sample_soa.c \
lp_bld_swizzle.c \
lp_bld_struct.c \
@@ -58,6 +61,9 @@ C_SOURCES = \
lp_texture.c \
lp_tile_soa.c
+CPP_SOURCES = \
+ lp_bld_misc.cpp
+
include ../../Makefile.template
lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_access.py ../../auxiliary/util/u_format.csv
diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README
index 89d08834a3c..0c3f00fd58f 100644
--- a/src/gallium/drivers/llvmpipe/README
+++ b/src/gallium/drivers/llvmpipe/README
@@ -51,21 +51,22 @@ Requirements
- Linux
- - udis86, http://udis86.sourceforge.net/ . Use my repository, which decodes
- opcodes not yet supported by upstream.
+ - A x86 or amd64 processor. 64bit mode is preferred.
- git clone git://people.freedesktop.org/~jrfonseca/udis86
- cd udis86
- ./configure --with-pic
- make
- sudo make install
+ Support for sse2 is strongly encouraged. Support for ssse3, and sse4.1 will
+ yield the most efficient code. The less features the CPU has the more
+ likely is that you ran into underperforming, buggy, or incomplete code.
+
+ See /proc/cpuinfo to know what your CPU supports.
+
+ - LLVM 2.5 or greater. LLVM 2.6 is preferred.
- - LLVM 2.5. On Debian based distributions do:
+ On Debian based distributions do:
aptitude install llvm-dev
- There is a typo in one of the llvm-dev 2.5 headers, that causes compilation
- errors in the debug build:
+ There is a typo in one of the llvm 2.5 headers, that may cause compilation
+ errors. To fix it apply the change:
--- /usr/include/llvm-c/Core.h.orig 2009-08-10 15:38:54.000000000 +0100
+++ /usr/include/llvm-c/Core.h 2009-08-10 15:38:25.000000000 +0100
@@ -79,12 +80,17 @@ Requirements
#endif
return reinterpret_cast<T**>(Vals);
- - A x86 or amd64 processor with support for sse2, sse3, and sse4.1 SIMD
- instructions. This is necessary because we emit several SSE intrinsics for
- convenience. See /proc/cpuinfo to know what your CPU supports.
-
- - scons
+ - scons (optional)
+ - udis86, http://udis86.sourceforge.net/ (optional):
+
+ git clone git://udis86.git.sourceforge.net/gitroot/udis86/udis86
+ cd udis86
+ ./autogen.sh
+ ./configure --with-pic
+ make
+ sudo make install
+
Building
========
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index ae4303bd24f..5af77c4a12d 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -9,6 +9,8 @@ if not env.has_key('LLVM_VERSION'):
env.Tool('udis86')
+env.Append(CPPPATH = ['.'])
+
env.CodeGenerate(
target = 'lp_tile_soa.c',
script = 'lp_tile_soa.py',
@@ -30,10 +32,14 @@ llvmpipe = env.ConvenienceLibrary(
'lp_bld_depth.c',
'lp_bld_flow.c',
'lp_bld_format_aos.c',
+ 'lp_bld_format_query.c',
'lp_bld_format_soa.c',
'lp_bld_interp.c',
'lp_bld_intr.c',
'lp_bld_logic.c',
+ 'lp_bld_misc.cpp',
+ 'lp_bld_pack.c',
+ 'lp_bld_sample.c',
'lp_bld_sample_soa.c',
'lp_bld_struct.c',
'lp_bld_swizzle.c',
@@ -75,21 +81,19 @@ llvmpipe = env.ConvenienceLibrary(
env = env.Clone()
-env.Prepend(LIBS = [llvmpipe] + auxiliaries)
+env.Prepend(LIBS = [llvmpipe] + gallium)
-env.Program(
- target = 'lp_test_format',
- source = ['lp_test_format.c'],
-)
+tests = [
+ 'format',
+ 'blend',
+ 'conv',
+]
-env.Program(
- target = 'lp_test_blend',
- source = ['lp_test_blend.c', 'lp_test_main.c'],
-)
-
-env.Program(
- target = 'lp_test_conv',
- source = ['lp_test_conv.c', 'lp_test_main.c'],
-)
+for test in tests:
+ target = env.Program(
+ target = 'lp_test_' + test,
+ source = ['lp_test_' + test + '.c', 'lp_test_main.c'],
+ )
+ env.InstallProgram(target)
Export('llvmpipe')
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index f8260938f5a..2df86dd32e5 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -47,6 +47,7 @@
#include "util/u_memory.h"
#include "util/u_debug.h"
+#include "util/u_math.h"
#include "util/u_string.h"
#include "util/u_cpu_detect.h"
@@ -54,6 +55,7 @@
#include "lp_bld_const.h"
#include "lp_bld_intr.h"
#include "lp_bld_logic.h"
+#include "lp_bld_pack.h"
#include "lp_bld_debug.h"
#include "lp_bld_arit.h"
@@ -280,45 +282,6 @@ lp_build_sub(struct lp_build_context *bld,
/**
- * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions.
- */
-static LLVMValueRef
-lp_build_unpack_shuffle(unsigned n, unsigned lo_hi)
-{
- LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
- unsigned i, j;
-
- assert(n <= LP_MAX_VECTOR_LENGTH);
- assert(lo_hi < 2);
-
- for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) {
- elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0);
- elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0);
- }
-
- return LLVMConstVector(elems, n);
-}
-
-
-/**
- * Build constant int vector of width 'n' and value 'c'.
- */
-static LLVMValueRef
-lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c)
-{
- LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
- unsigned i;
-
- assert(n <= LP_MAX_VECTOR_LENGTH);
-
- for(i = 0; i < n; ++i)
- elems[i] = LLVMConstInt(type, c, 0);
-
- return LLVMConstVector(elems, n);
-}
-
-
-/**
* Normalized 8bit multiplication.
*
* - alpha plus one
@@ -361,33 +324,30 @@ lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c)
*/
static LLVMValueRef
lp_build_mul_u8n(LLVMBuilderRef builder,
+ struct lp_type i16_type,
LLVMValueRef a, LLVMValueRef b)
{
- static LLVMValueRef c01 = NULL;
- static LLVMValueRef c08 = NULL;
- static LLVMValueRef c80 = NULL;
+ LLVMValueRef c8;
LLVMValueRef ab;
- if(!c01) c01 = lp_build_const_vec(LLVMInt16Type(), 8, 0x01);
- if(!c08) c08 = lp_build_const_vec(LLVMInt16Type(), 8, 0x08);
- if(!c80) c80 = lp_build_const_vec(LLVMInt16Type(), 8, 0x80);
+ c8 = lp_build_int_const_scalar(i16_type, 8);
#if 0
/* a*b/255 ~= (a*(b + 1)) >> 256 */
- b = LLVMBuildAdd(builder, b, c01, "");
+ b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), "");
ab = LLVMBuildMul(builder, a, b, "");
#else
- /* t/255 ~= (t + (t >> 8) + 0x80) >> 8 */
+ /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */
ab = LLVMBuildMul(builder, a, b, "");
- ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c08, ""), "");
- ab = LLVMBuildAdd(builder, ab, c80, "");
+ ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), "");
+ ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), "");
#endif
- ab = LLVMBuildLShr(builder, ab, c08, "");
+ ab = LLVMBuildLShr(builder, ab, c8, "");
return ab;
}
@@ -402,6 +362,8 @@ lp_build_mul(struct lp_build_context *bld,
LLVMValueRef b)
{
const struct lp_type type = bld->type;
+ LLVMValueRef shift;
+ LLVMValueRef res;
if(a == bld->zero)
return bld->zero;
@@ -415,39 +377,18 @@ lp_build_mul(struct lp_build_context *bld,
return bld->undef;
if(!type.floating && !type.fixed && type.norm) {
- if(util_cpu_caps.has_sse2 && type.width == 8 && type.length == 16) {
- LLVMTypeRef i16x8 = LLVMVectorType(LLVMInt16Type(), 8);
- LLVMTypeRef i8x16 = LLVMVectorType(LLVMInt8Type(), 16);
- static LLVMValueRef ml = NULL;
- static LLVMValueRef mh = NULL;
- LLVMValueRef al, ah, bl, bh;
- LLVMValueRef abl, abh;
- LLVMValueRef ab;
-
- if(!ml) ml = lp_build_unpack_shuffle(16, 0);
- if(!mh) mh = lp_build_unpack_shuffle(16, 1);
-
- /* PUNPCKLBW, PUNPCKHBW */
- al = LLVMBuildShuffleVector(bld->builder, a, bld->zero, ml, "");
- bl = LLVMBuildShuffleVector(bld->builder, b, bld->zero, ml, "");
- ah = LLVMBuildShuffleVector(bld->builder, a, bld->zero, mh, "");
- bh = LLVMBuildShuffleVector(bld->builder, b, bld->zero, mh, "");
+ if(type.width == 8) {
+ struct lp_type i16_type = lp_wider_type(type);
+ LLVMValueRef al, ah, bl, bh, abl, abh, ab;
- /* NOP */
- al = LLVMBuildBitCast(bld->builder, al, i16x8, "");
- bl = LLVMBuildBitCast(bld->builder, bl, i16x8, "");
- ah = LLVMBuildBitCast(bld->builder, ah, i16x8, "");
- bh = LLVMBuildBitCast(bld->builder, bh, i16x8, "");
+ lp_build_unpack2(bld->builder, type, i16_type, a, &al, &ah);
+ lp_build_unpack2(bld->builder, type, i16_type, b, &bl, &bh);
/* PMULLW, PSRLW, PADDW */
- abl = lp_build_mul_u8n(bld->builder, al, bl);
- abh = lp_build_mul_u8n(bld->builder, ah, bh);
+ abl = lp_build_mul_u8n(bld->builder, i16_type, al, bl);
+ abh = lp_build_mul_u8n(bld->builder, i16_type, ah, bh);
- /* PACKUSWB */
- ab = lp_build_intrinsic_binary(bld->builder, "llvm.x86.sse2.packuswb.128" , i16x8, abl, abh);
-
- /* NOP */
- ab = LLVMBuildBitCast(bld->builder, ab, i8x16, "");
+ ab = lp_build_pack2(bld->builder, i16_type, type, abl, abh);
return ab;
}
@@ -456,10 +397,84 @@ lp_build_mul(struct lp_build_context *bld,
assert(0);
}
- if(LLVMIsConstant(a) && LLVMIsConstant(b))
- return LLVMConstMul(a, b);
+ if(type.fixed)
+ shift = lp_build_int_const_scalar(type, type.width/2);
+ else
+ shift = NULL;
+
+ if(LLVMIsConstant(a) && LLVMIsConstant(b)) {
+ res = LLVMConstMul(a, b);
+ if(shift) {
+ if(type.sign)
+ res = LLVMConstAShr(res, shift);
+ else
+ res = LLVMConstLShr(res, shift);
+ }
+ }
+ else {
+ res = LLVMBuildMul(bld->builder, a, b, "");
+ if(shift) {
+ if(type.sign)
+ res = LLVMBuildAShr(bld->builder, res, shift, "");
+ else
+ res = LLVMBuildLShr(bld->builder, res, shift, "");
+ }
+ }
+
+ return res;
+}
+
+
+/**
+ * Small vector x scale multiplication optimization.
+ */
+LLVMValueRef
+lp_build_mul_imm(struct lp_build_context *bld,
+ LLVMValueRef a,
+ int b)
+{
+ LLVMValueRef factor;
+
+ if(b == 0)
+ return bld->zero;
- return LLVMBuildMul(bld->builder, a, b, "");
+ if(b == 1)
+ return a;
+
+ if(b == -1)
+ return LLVMBuildNeg(bld->builder, a, "");
+
+ if(b == 2 && bld->type.floating)
+ return lp_build_add(bld, a, a);
+
+ if(util_is_pot(b)) {
+ unsigned shift = ffs(b) - 1;
+
+ if(bld->type.floating) {
+#if 0
+ /*
+ * Power of two multiplication by directly manipulating the mantissa.
+ *
+ * XXX: This might not be always faster, it will introduce a small error
+ * for multiplication by zero, and it will produce wrong results
+ * for Inf and NaN.
+ */
+ unsigned mantissa = lp_mantissa(bld->type);
+ factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa);
+ a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), "");
+ a = LLVMBuildAdd(bld->builder, a, factor, "");
+ a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), "");
+ return a;
+#endif
+ }
+ else {
+ factor = lp_build_const_scalar(bld->type, shift);
+ return LLVMBuildShl(bld->builder, a, factor, "");
+ }
+ }
+
+ factor = lp_build_const_scalar(bld->type, (double)b);
+ return lp_build_mul(bld, a, factor);
}
@@ -494,13 +509,36 @@ lp_build_div(struct lp_build_context *bld,
}
+/**
+ * Linear interpolation.
+ *
+ * This also works for integer values with a few caveats.
+ *
+ * @sa http://www.stereopsis.com/doubleblend.html
+ */
LLVMValueRef
lp_build_lerp(struct lp_build_context *bld,
LLVMValueRef x,
LLVMValueRef v0,
LLVMValueRef v1)
{
- return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0)));
+ LLVMValueRef delta;
+ LLVMValueRef res;
+
+ delta = lp_build_sub(bld, v1, v0);
+
+ res = lp_build_mul(bld, x, delta);
+
+ res = lp_build_add(bld, v0, res);
+
+ if(bld->type.fixed)
+ /* XXX: This step is necessary for lerping 8bit colors stored on 16bits,
+ * but it will be wrong for other uses. Basically we need a more
+ * powerful lp_type, capable of further distinguishing the values
+ * interpretation from the value storage. */
+ res = LLVMBuildAnd(bld->builder, res, lp_build_int_const_scalar(bld->type, (1 << bld->type.width/2) - 1), "");
+
+ return res;
}
@@ -1046,7 +1084,7 @@ lp_build_log(struct lp_build_context *bld,
LLVMValueRef x)
{
/* log(2) */
- LLVMValueRef log2 = lp_build_const_scalar(bld->type, 1.4426950408889634);
+ LLVMValueRef log2 = lp_build_const_scalar(bld->type, 0.69314718055994529);
return lp_build_mul(bld, log2, lp_build_exp2(bld, x));
}
@@ -1058,7 +1096,7 @@ lp_build_log(struct lp_build_context *bld,
/**
* Generate polynomial.
- * Ex: x^2 * coeffs[0] + x * coeffs[1] + coeffs[2].
+ * Ex: coeffs[0] + x * coeffs[1] + x^2 * coeffs[2].
*/
static LLVMValueRef
lp_build_polynomial(struct lp_build_context *bld,
@@ -1248,13 +1286,13 @@ lp_build_log2_approx(struct lp_build_context *bld,
/* mant = (float) mantissa(x) */
mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
mant = LLVMBuildOr(bld->builder, mant, one, "");
- mant = LLVMBuildSIToFP(bld->builder, mant, vec_type, "");
+ mant = LLVMBuildBitCast(bld->builder, mant, vec_type, "");
logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
Elements(lp_build_log2_polynomial));
/* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
- logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildMul(bld->builder, mant, bld->one, ""), "");
+ logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");
res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
index 095a8e1cabe..62be4b9aee1 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
@@ -40,7 +40,7 @@
#include <llvm-c/Core.h>
-struct lp_type type;
+struct lp_type;
struct lp_build_context;
@@ -67,6 +67,11 @@ lp_build_mul(struct lp_build_context *bld,
LLVMValueRef b);
LLVMValueRef
+lp_build_mul_imm(struct lp_build_context *bld,
+ LLVMValueRef a,
+ int b);
+
+LLVMValueRef
lp_build_div(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index d14f468ba93..ced7b9c11d7 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -142,7 +142,7 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
enum lp_build_blend_swizzle {
LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
- LP_BUILD_BLEND_SWIZZLE_AAAA = 1,
+ LP_BUILD_BLEND_SWIZZLE_AAAA = 1
};
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/drivers/llvmpipe/lp_bld_const.h
index ffb302f7366..cb8e1c7b006 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_const.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_const.h
@@ -42,7 +42,7 @@
#include <pipe/p_compiler.h>
-struct lp_type type;
+struct lp_type;
unsigned
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
index 1df938529c4..9fa88372022 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
@@ -69,6 +69,7 @@
#include "lp_bld_const.h"
#include "lp_bld_intr.h"
#include "lp_bld_arit.h"
+#include "lp_bld_pack.h"
#include "lp_bld_conv.h"
@@ -203,241 +204,6 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
/**
- * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions.
- */
-static LLVMValueRef
-lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi)
-{
- LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
- unsigned i, j;
-
- assert(n <= LP_MAX_VECTOR_LENGTH);
- assert(lo_hi < 2);
-
- /* TODO: cache results in a static table */
-
- for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) {
- elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0);
- elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0);
- }
-
- return LLVMConstVector(elems, n);
-}
-
-
-/**
- * Build shuffle vectors that match PACKxx instructions.
- */
-static LLVMValueRef
-lp_build_const_pack_shuffle(unsigned n)
-{
- LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
- unsigned i;
-
- assert(n <= LP_MAX_VECTOR_LENGTH);
-
- /* TODO: cache results in a static table */
-
- for(i = 0; i < n; ++i)
- elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0);
-
- return LLVMConstVector(elems, n);
-}
-
-
-/**
- * Expand the bit width.
- *
- * This will only change the number of bits the values are represented, not the
- * values themselves.
- */
-static void
-lp_build_expand(LLVMBuilderRef builder,
- struct lp_type src_type,
- struct lp_type dst_type,
- LLVMValueRef src,
- LLVMValueRef *dst, unsigned num_dsts)
-{
- unsigned num_tmps;
- unsigned i;
-
- /* Register width must remain constant */
- assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
-
- /* We must not loose or gain channels. Only precision */
- assert(src_type.length == dst_type.length * num_dsts);
-
- num_tmps = 1;
- dst[0] = src;
-
- while(src_type.width < dst_type.width) {
- struct lp_type new_type = src_type;
- LLVMTypeRef new_vec_type;
-
- new_type.width *= 2;
- new_type.length /= 2;
- new_vec_type = lp_build_vec_type(new_type);
-
- for(i = num_tmps; i--; ) {
- LLVMValueRef zero;
- LLVMValueRef shuffle_lo;
- LLVMValueRef shuffle_hi;
- LLVMValueRef lo;
- LLVMValueRef hi;
-
- zero = lp_build_zero(src_type);
- shuffle_lo = lp_build_const_unpack_shuffle(src_type.length, 0);
- shuffle_hi = lp_build_const_unpack_shuffle(src_type.length, 1);
-
- /* PUNPCKLBW, PUNPCKHBW */
- lo = LLVMBuildShuffleVector(builder, dst[i], zero, shuffle_lo, "");
- hi = LLVMBuildShuffleVector(builder, dst[i], zero, shuffle_hi, "");
-
- dst[2*i + 0] = LLVMBuildBitCast(builder, lo, new_vec_type, "");
- dst[2*i + 1] = LLVMBuildBitCast(builder, hi, new_vec_type, "");
- }
-
- src_type = new_type;
-
- num_tmps *= 2;
- }
-
- assert(num_tmps == num_dsts);
-}
-
-
-/**
- * Non-interleaved pack.
- *
- * This will move values as
- *
- * lo = __ l0 __ l1 __ l2 __.. __ ln
- * hi = __ h0 __ h1 __ h2 __.. __ hn
- * res = l0 l1 l2 .. ln h0 h1 h2 .. hn
- *
- * TODO: handle saturation consistently.
- */
-static LLVMValueRef
-lp_build_pack2(LLVMBuilderRef builder,
- struct lp_type src_type,
- struct lp_type dst_type,
- boolean clamped,
- LLVMValueRef lo,
- LLVMValueRef hi)
-{
- LLVMTypeRef src_vec_type = lp_build_vec_type(src_type);
- LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type);
- LLVMValueRef shuffle;
- LLVMValueRef res;
-
- /* Register width must remain constant */
- assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
-
- /* We must not loose or gain channels. Only precision */
- assert(src_type.length * 2 == dst_type.length);
-
- assert(!src_type.floating);
- assert(!dst_type.floating);
-
- if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) {
- /* All X86 non-interleaved pack instructions all take signed inputs and
- * saturate them, so saturate beforehand. */
- if(!src_type.sign && !clamped) {
- struct lp_build_context bld;
- unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width;
- LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1);
- lp_build_context_init(&bld, builder, src_type);
- lo = lp_build_min(&bld, lo, dst_max);
- hi = lp_build_min(&bld, hi, dst_max);
- }
-
- switch(src_type.width) {
- case 32:
- if(dst_type.sign || !util_cpu_caps.has_sse4_1)
- res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi);
- else
- /* PACKUSDW is the only instrinsic with a consistent signature */
- return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi);
- break;
-
- case 16:
- if(dst_type.sign)
- res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi);
- else
- res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi);
- break;
-
- default:
- assert(0);
- return LLVMGetUndef(dst_vec_type);
- break;
- }
-
- res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
- return res;
- }
-
- lo = LLVMBuildBitCast(builder, lo, dst_vec_type, "");
- hi = LLVMBuildBitCast(builder, hi, dst_vec_type, "");
-
- shuffle = lp_build_const_pack_shuffle(dst_type.length);
-
- res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, "");
-
- return res;
-}
-
-
-/**
- * Truncate the bit width.
- *
- * TODO: Handle saturation consistently.
- */
-static LLVMValueRef
-lp_build_pack(LLVMBuilderRef builder,
- struct lp_type src_type,
- struct lp_type dst_type,
- boolean clamped,
- const LLVMValueRef *src, unsigned num_srcs)
-{
- LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
- unsigned i;
-
- /* Register width must remain constant */
- assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
-
- /* We must not loose or gain channels. Only precision */
- assert(src_type.length * num_srcs == dst_type.length);
-
- for(i = 0; i < num_srcs; ++i)
- tmp[i] = src[i];
-
- while(src_type.width > dst_type.width) {
- struct lp_type new_type = src_type;
-
- new_type.width /= 2;
- new_type.length *= 2;
-
- /* Take in consideration the sign changes only in the last step */
- if(new_type.width == dst_type.width)
- new_type.sign = dst_type.sign;
-
- num_srcs /= 2;
-
- for(i = 0; i < num_srcs; ++i)
- tmp[i] = lp_build_pack2(builder, src_type, new_type, clamped,
- tmp[2*i + 0], tmp[2*i + 1]);
-
- src_type = new_type;
- }
-
- assert(num_srcs == 1);
-
- return tmp[0];
-}
-
-
-/**
* Generic type conversion.
*
* TODO: Take a precision argument, or even better, add a new precision member
@@ -576,7 +342,7 @@ lp_build_conv(LLVMBuilderRef builder,
if(tmp_type.width < dst_type.width) {
assert(num_tmps == 1);
- lp_build_expand(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);
+ lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);
tmp_type.width = dst_type.width;
tmp_type.length = dst_type.length;
num_tmps = num_dsts;
@@ -696,7 +462,7 @@ lp_build_conv_mask(LLVMBuilderRef builder,
}
else if(src_type.width < dst_type.width) {
assert(num_srcs == 1);
- lp_build_expand(builder, src_type, dst_type, src[0], dst, num_dsts);
+ lp_build_unpack(builder, src_type, dst_type, src[0], dst, num_dsts);
}
else {
assert(num_srcs == num_dsts);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.h b/src/gallium/drivers/llvmpipe/lp_bld_conv.h
index ca378804d2a..948e68fae4f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.h
@@ -40,7 +40,7 @@
#include <llvm-c/Core.h>
-struct lp_type type;
+struct lp_type;
LLVMValueRef
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
index 59d8f492e60..39dfc51e503 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
@@ -77,10 +77,10 @@ lp_disassemble(const void* func)
while (ud_disassemble(&ud_obj)) {
#ifdef PIPE_ARCH_X86
- debug_printf("%08lx: ", (unsigned long)ud_insn_off(&ud_obj));
+ debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj));
#endif
#ifdef PIPE_ARCH_X86_64
- debug_printf("%016llx: ", (unsigned long long)ud_insn_off(&ud_obj));
+ debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj));
#endif
#if 0
@@ -115,9 +115,16 @@ lp_disassemble(const void* func)
}
}
- if (ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret)
+ if ((ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) ||
+ ud_obj.mnemonic == UD_Iinvalid)
break;
}
+
+#if 0
+ /* Print GDB command, useful to verify udis86 output */
+ debug_printf("disassemble %p %p\n", func, (void*)(uintptr_t)ud_obj.pc);
+#endif
+
debug_printf("\n");
#else
(void)func;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 98ec1cb1b9d..d438c0e63d7 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -90,7 +90,7 @@ lp_depth_type(const struct util_format_description *format_desc,
if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
type.floating = TRUE;
- assert(swizzle = 0);
+ assert(swizzle == 0);
assert(format_desc->channel[swizzle].size == format_desc->block.bits);
}
else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h
index c087fc986ed..970bee379f5 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h
@@ -42,62 +42,34 @@ struct util_format_description;
struct lp_type;
-/**
- * Unpack a pixel into its RGBA components.
- *
- * @param packed integer.
- *
- * @return RGBA in a 4 floats vector.
- */
-LLVMValueRef
-lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef packed);
+boolean
+lp_format_is_rgba8(const struct util_format_description *desc);
-/**
- * Pack a pixel.
- *
- * @param rgba 4 float vector with the unpacked components.
- */
-LLVMValueRef
-lp_build_pack_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef rgba);
+void
+lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
+ struct lp_type type,
+ const LLVMValueRef *unswizzled,
+ LLVMValueRef *swizzled);
-/**
- * Load a pixel into its RGBA components.
- *
- * @param ptr value with the pointer to the packed pixel. Pointer type is
- * irrelevant.
- *
- * @return RGBA in a 4 floats vector.
- */
LLVMValueRef
-lp_build_load_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr);
+lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
+ const struct util_format_description *desc,
+ LLVMValueRef packed);
-/**
- * Store a pixel.
- *
- * @param rgba 4 float vector with the unpacked components.
- */
-void
-lp_build_store_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr,
- LLVMValueRef rgba);
+LLVMValueRef
+lp_build_unpack_rgba8_aos(LLVMBuilderRef builder,
+ const struct util_format_description *desc,
+ struct lp_type type,
+ LLVMValueRef packed);
+
LLVMValueRef
-lp_build_gather(LLVMBuilderRef builder,
- unsigned length,
- unsigned src_width,
- unsigned dst_width,
- LLVMValueRef base_ptr,
- LLVMValueRef offsets);
+lp_build_pack_rgba_aos(LLVMBuilderRef builder,
+ const struct util_format_description *desc,
+ LLVMValueRef rgba);
void
@@ -108,12 +80,4 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
LLVMValueRef *rgba);
-void
-lp_build_load_rgba_soa(LLVMBuilderRef builder,
- const struct util_format_description *format_desc,
- struct lp_type type,
- LLVMValueRef base_ptr,
- LLVMValueRef offsets,
- LLVMValueRef *rgba);
-
#endif /* !LP_BLD_FORMAT_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
index b9b5d84bed5..10e82f120bb 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
@@ -25,18 +25,39 @@
*
**************************************************************************/
+/**
+ * @file
+ * AoS pixel format manipulation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "util/u_cpu_detect.h"
#include "util/u_format.h"
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_logic.h"
+#include "lp_bld_swizzle.h"
#include "lp_bld_format.h"
+/**
+ * Unpack a single pixel into its RGBA components.
+ *
+ * @param packed integer.
+ *
+ * @return RGBA in a 4 floats vector.
+ *
+ * XXX: This is mostly for reference and testing -- operating a single pixel at
+ * a time is rarely if ever needed.
+ */
LLVMValueRef
lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
+ const struct util_format_description *desc,
LLVMValueRef packed)
{
- const struct util_format_description *desc;
LLVMTypeRef type;
LLVMValueRef shifted, casted, scaled, masked;
LLVMValueRef shifts[4];
@@ -49,8 +70,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
unsigned shift;
unsigned i;
- desc = util_format_description(format);
-
/* FIXME: Support more formats */
assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
assert(desc->block.width == 1);
@@ -111,7 +130,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
- // UIToFP can't be expressed in SSE2
+ /* UIToFP can't be expressed in SSE2 */
casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
if (normalized)
@@ -151,12 +170,130 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
}
+/**
+ * Take a vector with packed pixels and unpack into a rgba8 vector.
+ *
+ * Formats with bit depth smaller than 32bits are accepted, but they must be
+ * padded to 32bits.
+ */
+LLVMValueRef
+lp_build_unpack_rgba8_aos(LLVMBuilderRef builder,
+ const struct util_format_description *desc,
+ struct lp_type type,
+ LLVMValueRef packed)
+{
+ struct lp_build_context bld;
+ bool rgba8;
+ LLVMValueRef res;
+ unsigned i;
+
+ lp_build_context_init(&bld, builder, type);
+
+ /* FIXME: Support more formats */
+ assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+ assert(desc->block.width == 1);
+ assert(desc->block.height == 1);
+ assert(desc->block.bits <= 32);
+
+ assert(!type.floating);
+ assert(!type.fixed);
+ assert(type.norm);
+ assert(type.width == 8);
+ assert(type.length % 4 == 0);
+
+ rgba8 = TRUE;
+ for(i = 0; i < 4; ++i) {
+ assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED ||
+ desc->channel[i].type == UTIL_FORMAT_TYPE_VOID);
+ if(desc->channel[0].size != 8)
+ rgba8 = FALSE;
+ }
+
+ if(rgba8) {
+ /*
+ * The pixel is already in a rgba8 format variant. All it is necessary
+ * is to swizzle the channels.
+ */
+
+ unsigned char swizzles[4];
+ boolean zeros[4]; /* bitwise AND mask */
+ boolean ones[4]; /* bitwise OR mask */
+ boolean swizzles_needed = FALSE;
+ boolean zeros_needed = FALSE;
+ boolean ones_needed = FALSE;
+
+ for(i = 0; i < 4; ++i) {
+ enum util_format_swizzle swizzle = desc->swizzle[i];
+
+ /* Initialize with the no-op case */
+ swizzles[i] = util_cpu_caps.little_endian ? 3 - i : i;
+ zeros[i] = TRUE;
+ ones[i] = FALSE;
+
+ switch (swizzle) {
+ case UTIL_FORMAT_SWIZZLE_X:
+ case UTIL_FORMAT_SWIZZLE_Y:
+ case UTIL_FORMAT_SWIZZLE_Z:
+ case UTIL_FORMAT_SWIZZLE_W:
+ if(swizzle != swizzles[i]) {
+ swizzles[i] = swizzle;
+ swizzles_needed = TRUE;
+ }
+ break;
+ case UTIL_FORMAT_SWIZZLE_0:
+ zeros[i] = FALSE;
+ zeros_needed = TRUE;
+ break;
+ case UTIL_FORMAT_SWIZZLE_1:
+ ones[i] = TRUE;
+ ones_needed = TRUE;
+ break;
+ case UTIL_FORMAT_SWIZZLE_NONE:
+ assert(0);
+ break;
+ }
+ }
+
+ res = packed;
+
+ if(swizzles_needed)
+ res = lp_build_swizzle1_aos(&bld, res, swizzles);
+
+ if(zeros_needed) {
+ /* Mask out zero channels */
+ LLVMValueRef mask = lp_build_const_mask_aos(type, zeros);
+ res = LLVMBuildAnd(builder, res, mask, "");
+ }
+
+ if(ones_needed) {
+ /* Or one channels */
+ LLVMValueRef mask = lp_build_const_mask_aos(type, ones);
+ res = LLVMBuildOr(builder, res, mask, "");
+ }
+ }
+ else {
+ /* FIXME */
+ assert(0);
+ res = lp_build_undef(type);
+ }
+
+ return res;
+}
+
+
+/**
+ * Pack a single pixel.
+ *
+ * @param rgba 4 float vector with the unpacked components.
+ *
+ * XXX: This is mostly for reference and testing -- operating a single pixel at
+ * a time is rarely if ever needed.
+ */
LLVMValueRef
lp_build_pack_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
+ const struct util_format_description *desc,
LLVMValueRef rgba)
{
- const struct util_format_description *desc;
LLVMTypeRef type;
LLVMValueRef packed = NULL;
LLVMValueRef swizzles[4];
@@ -167,8 +304,6 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
unsigned shift;
unsigned i, j;
- desc = util_format_description(format);
-
assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
assert(desc->block.width == 1);
assert(desc->block.height == 1);
@@ -247,57 +382,3 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
return packed;
}
-
-
-LLVMValueRef
-lp_build_load_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr)
-{
- const struct util_format_description *desc;
- LLVMTypeRef type;
- LLVMValueRef packed;
-
- desc = util_format_description(format);
-
- /* FIXME: Support more formats */
- assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
- assert(desc->block.width == 1);
- assert(desc->block.height == 1);
- assert(desc->block.bits <= 32);
-
- type = LLVMIntType(desc->block.bits);
-
- ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
-
- packed = LLVMBuildLoad(builder, ptr, "");
-
- return lp_build_unpack_rgba_aos(builder, format, packed);
-}
-
-
-void
-lp_build_store_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr,
- LLVMValueRef rgba)
-{
- const struct util_format_description *desc;
- LLVMTypeRef type;
- LLVMValueRef packed;
-
- desc = util_format_description(format);
-
- assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
- assert(desc->block.width == 1);
- assert(desc->block.height == 1);
-
- type = LLVMIntType(desc->block.bits);
-
- packed = lp_build_pack_rgba_aos(builder, format, rgba);
-
- ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
-
- LLVMBuildStore(builder, packed, ptr);
-}
-
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_query.c b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c
new file mode 100644
index 00000000000..f3832d07ff9
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c
@@ -0,0 +1,72 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Utility functions to make assertions about formats.
+ *
+ * This module centralizes most of logic used when determining what algorithm
+ * is most suitable (i.e., most efficient yet correct) for a given format.
+ *
+ * It might be possible to move some of these functions to u_format module,
+ * but since tiny differences in the format my render it more/less
+ * appropriate to a given algorithm it is impossible to make any long term
+ * guarantee about the semantics of these functions.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_format.h"
+
+#include "lp_bld_format.h"
+
+
+/**
+ * Whether this format is a 4 rgba8 variant
+ */
+boolean
+lp_format_is_rgba8(const struct util_format_description *desc)
+{
+ unsigned chan;
+
+ if(desc->block.width != 1 ||
+ desc->block.height != 1 ||
+ desc->block.bits != 32)
+ return FALSE;
+
+ for(chan = 0; chan < 4; ++chan) {
+ if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED &&
+ desc->channel[chan].type != UTIL_FORMAT_TYPE_SIGNED &&
+ desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID)
+ return FALSE;
+ if(desc->channel[chan].size != 8)
+ return FALSE;
+ }
+
+ return TRUE;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
index 66bebdcdec8..64151d169da 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
@@ -34,66 +34,17 @@
#include "lp_bld_format.h"
-/**
- * Gather elements from scatter positions in memory into a single vector.
- *
- * @param src_width src element width
- * @param dst_width result element width (source will be expanded to fit)
- * @param length length of the offsets,
- * @param base_ptr base pointer, should be a i8 pointer type.
- * @param offsets vector with offsets
- */
-LLVMValueRef
-lp_build_gather(LLVMBuilderRef builder,
- unsigned length,
- unsigned src_width,
- unsigned dst_width,
- LLVMValueRef base_ptr,
- LLVMValueRef offsets)
-{
- LLVMTypeRef src_type = LLVMIntType(src_width);
- LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
- LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
- LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
- LLVMValueRef res;
- unsigned i;
-
- res = LLVMGetUndef(dst_vec_type);
- for(i = 0; i < length; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- LLVMValueRef elem_offset;
- LLVMValueRef elem_ptr;
- LLVMValueRef elem;
-
- elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
- elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
- elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
- elem = LLVMBuildLoad(builder, elem_ptr, "");
-
- assert(src_width <= dst_width);
- if(src_width > dst_width)
- elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
- if(src_width < dst_width)
- elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");
-
- res = LLVMBuildInsertElement(builder, res, elem, index, "");
- }
-
- return res;
-}
-
-
static LLVMValueRef
-lp_build_format_swizzle(struct lp_type type,
- const LLVMValueRef *inputs,
- enum util_format_swizzle swizzle)
+lp_build_format_swizzle_chan_soa(struct lp_type type,
+ const LLVMValueRef *unswizzled,
+ enum util_format_swizzle swizzle)
{
switch (swizzle) {
case UTIL_FORMAT_SWIZZLE_X:
case UTIL_FORMAT_SWIZZLE_Y:
case UTIL_FORMAT_SWIZZLE_Z:
case UTIL_FORMAT_SWIZZLE_W:
- return inputs[swizzle];
+ return unswizzled[swizzle];
case UTIL_FORMAT_SWIZZLE_0:
return lp_build_zero(type);
case UTIL_FORMAT_SWIZZLE_1:
@@ -108,6 +59,28 @@ lp_build_format_swizzle(struct lp_type type,
void
+lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
+ struct lp_type type,
+ const LLVMValueRef *unswizzled,
+ LLVMValueRef *swizzled)
+{
+ if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ enum util_format_swizzle swizzle = format_desc->swizzle[0];
+ LLVMValueRef depth = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
+ swizzled[2] = swizzled[1] = swizzled[0] = depth;
+ swizzled[3] = lp_build_one(type);
+ }
+ else {
+ unsigned chan;
+ for (chan = 0; chan < 4; ++chan) {
+ enum util_format_swizzle swizzle = format_desc->swizzle[chan];
+ swizzled[chan] = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
+ }
+ }
+}
+
+
+void
lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
const struct util_format_description *format_desc,
struct lp_type type,
@@ -172,38 +145,5 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
start = stop;
}
- if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
- enum util_format_swizzle swizzle = format_desc->swizzle[0];
- LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle);
- rgba[2] = rgba[1] = rgba[0] = depth;
- rgba[3] = lp_build_one(type);
- }
- else {
- for (chan = 0; chan < 4; ++chan) {
- enum util_format_swizzle swizzle = format_desc->swizzle[chan];
- rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle);
- }
- }
-}
-
-
-void
-lp_build_load_rgba_soa(LLVMBuilderRef builder,
- const struct util_format_description *format_desc,
- struct lp_type type,
- LLVMValueRef base_ptr,
- LLVMValueRef offsets,
- LLVMValueRef *rgba)
-{
- LLVMValueRef packed;
-
- assert(format_desc->block.width == 1);
- assert(format_desc->block.height == 1);
- assert(format_desc->block.bits <= type.width);
-
- packed = lp_build_gather(builder,
- type.length, format_desc->block.bits, type.width,
- base_ptr, offsets);
-
- lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba);
+ lp_build_format_swizzle_soa(format_desc, type, inputs, rgba);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index affeeca6ff9..daedf40d558 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -143,32 +143,6 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
/**
- * Small vector x scale multiplication optimization.
- *
- * TODO: Should be elsewhere.
- */
-static LLVMValueRef
-coeff_multiply(struct lp_build_interp_soa_context *bld,
- LLVMValueRef coeff,
- int step)
-{
- LLVMValueRef factor;
-
- switch(step) {
- case 0:
- return bld->base.zero;
- case 1:
- return coeff;
- case 2:
- return lp_build_add(&bld->base, coeff, coeff);
- default:
- factor = lp_build_const_scalar(bld->base.type, (double)step);
- return lp_build_mul(&bld->base, coeff, factor);
- }
-}
-
-
-/**
* Emit LLVM code to compute the fragment shader input attribute values.
* For example, for a color input, we'll compute red, green, blue and alpha
* values for the four pixels in a quad.
@@ -378,8 +352,8 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
unsigned first, last, mask;
unsigned attrib;
- first = decl->DeclarationRange.First;
- last = decl->DeclarationRange.Last;
+ first = decl->Range.First;
+ last = decl->Range.Last;
mask = decl->Declaration.UsageMask;
for( attrib = first; attrib <= last; ++attrib ) {
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
index d6876366561..40d64eb2c19 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
@@ -42,7 +42,7 @@
#include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */
-struct lp_type type;
+struct lp_type;
struct lp_build_context;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp
new file mode 100644
index 00000000000..d3f78c06d92
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp
@@ -0,0 +1,61 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_config.h"
+
+#include "lp_bld_misc.h"
+
+
+#ifndef LLVM_NATIVE_ARCH
+
+namespace llvm {
+ extern void LinkInJIT();
+}
+
+
+void
+LLVMLinkInJIT(void)
+{
+ llvm::LinkInJIT();
+}
+
+
+extern "C" int X86TargetMachineModule;
+
+
+int
+LLVMInitializeNativeTarget(void)
+{
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ X86TargetMachineModule = 1;
+#endif
+ return 0;
+}
+
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.h b/src/gallium/drivers/llvmpipe/lp_bld_misc.h
new file mode 100644
index 00000000000..0e787e0b9cb
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.h
@@ -0,0 +1,56 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_BLD_MISC_H
+#define LP_BLD_MISC_H
+
+
+#include "llvm/Config/config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#ifndef LLVM_NATIVE_ARCH
+
+void
+LLVMLinkInJIT(void);
+
+int
+LLVMInitializeNativeTarget(void);
+
+#endif /* !LLVM_NATIVE_ARCH */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* !LP_BLD_MISC_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.c b/src/gallium/drivers/llvmpipe/lp_bld_pack.c
new file mode 100644
index 00000000000..bc360ad77ad
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.c
@@ -0,0 +1,418 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Helper functions for packing/unpacking.
+ *
+ * Pack/unpacking is necessary for conversion between types of different
+ * bit width.
+ *
+ * They are also commonly used when an computation needs higher
+ * precision for the intermediate values. For example, if one needs the
+ * function:
+ *
+ * c = compute(a, b);
+ *
+ * to use more precision for intermediate results then one should implement it
+ * as:
+ *
+ * LLVMValueRef
+ * compute(LLVMBuilderRef builder struct lp_type type, LLVMValueRef a, LLVMValueRef b)
+ * {
+ * struct lp_type wide_type = lp_wider_type(type);
+ * LLVMValueRef al, ah, bl, bh, cl, ch, c;
+ *
+ * lp_build_unpack2(builder, type, wide_type, a, &al, &ah);
+ * lp_build_unpack2(builder, type, wide_type, b, &bl, &bh);
+ *
+ * cl = compute_half(al, bl);
+ * ch = compute_half(ah, bh);
+ *
+ * c = lp_build_pack2(bld->builder, wide_type, type, cl, ch);
+ *
+ * return c;
+ * }
+ *
+ * where compute_half() would do the computation for half the elements with
+ * twice the precision.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "util/u_cpu_detect.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_intr.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_pack.h"
+
+
+/**
+ * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions.
+ */
+static LLVMValueRef
+lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi)
+{
+ LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
+ unsigned i, j;
+
+ assert(n <= LP_MAX_VECTOR_LENGTH);
+ assert(lo_hi < 2);
+
+ /* TODO: cache results in a static table */
+
+ for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) {
+ elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0);
+ elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0);
+ }
+
+ return LLVMConstVector(elems, n);
+}
+
+
+/**
+ * Build shuffle vectors that match PACKxx instructions.
+ */
+static LLVMValueRef
+lp_build_const_pack_shuffle(unsigned n)
+{
+ LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+
+ assert(n <= LP_MAX_VECTOR_LENGTH);
+
+ /* TODO: cache results in a static table */
+
+ for(i = 0; i < n; ++i)
+ elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0);
+
+ return LLVMConstVector(elems, n);
+}
+
+
+/**
+ * Interleave vector elements.
+ *
+ * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions.
+ */
+LLVMValueRef
+lp_build_interleave2(LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef a,
+ LLVMValueRef b,
+ unsigned lo_hi)
+{
+ LLVMValueRef shuffle;
+
+ shuffle = lp_build_const_unpack_shuffle(type.length, lo_hi);
+
+ return LLVMBuildShuffleVector(builder, a, b, shuffle, "");
+}
+
+
+/**
+ * Double the bit width.
+ *
+ * This will only change the number of bits the values are represented, not the
+ * values themselves.
+ */
+void
+lp_build_unpack2(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef src,
+ LLVMValueRef *dst_lo,
+ LLVMValueRef *dst_hi)
+{
+ LLVMValueRef msb;
+ LLVMTypeRef dst_vec_type;
+
+ assert(!src_type.floating);
+ assert(!dst_type.floating);
+ assert(dst_type.width == src_type.width * 2);
+ assert(dst_type.length * 2 == src_type.length);
+
+ if(dst_type.sign && src_type.sign) {
+ /* Replicate the sign bit in the most significant bits */
+ msb = LLVMBuildAShr(builder, src, lp_build_int_const_scalar(src_type, src_type.width - 1), "");
+ }
+ else
+ /* Most significant bits always zero */
+ msb = lp_build_zero(src_type);
+
+ /* Interleave bits */
+ if(util_cpu_caps.little_endian) {
+ *dst_lo = lp_build_interleave2(builder, src_type, src, msb, 0);
+ *dst_hi = lp_build_interleave2(builder, src_type, src, msb, 1);
+ }
+ else {
+ *dst_lo = lp_build_interleave2(builder, src_type, msb, src, 0);
+ *dst_hi = lp_build_interleave2(builder, src_type, msb, src, 1);
+ }
+
+ /* Cast the result into the new type (twice as wide) */
+
+ dst_vec_type = lp_build_vec_type(dst_type);
+
+ *dst_lo = LLVMBuildBitCast(builder, *dst_lo, dst_vec_type, "");
+ *dst_hi = LLVMBuildBitCast(builder, *dst_hi, dst_vec_type, "");
+}
+
+
+/**
+ * Expand the bit width.
+ *
+ * This will only change the number of bits the values are represented, not the
+ * values themselves.
+ */
+void
+lp_build_unpack(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef src,
+ LLVMValueRef *dst, unsigned num_dsts)
+{
+ unsigned num_tmps;
+ unsigned i;
+
+ /* Register width must remain constant */
+ assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+
+ /* We must not loose or gain channels. Only precision */
+ assert(src_type.length == dst_type.length * num_dsts);
+
+ num_tmps = 1;
+ dst[0] = src;
+
+ while(src_type.width < dst_type.width) {
+ struct lp_type tmp_type = src_type;
+
+ tmp_type.width *= 2;
+ tmp_type.length /= 2;
+
+ for(i = num_tmps; i--; ) {
+ lp_build_unpack2(builder, src_type, tmp_type, dst[i], &dst[2*i + 0], &dst[2*i + 1]);
+ }
+
+ src_type = tmp_type;
+
+ num_tmps *= 2;
+ }
+
+ assert(num_tmps == num_dsts);
+}
+
+
+/**
+ * Non-interleaved pack.
+ *
+ * This will move values as
+ *
+ * lo = __ l0 __ l1 __ l2 __.. __ ln
+ * hi = __ h0 __ h1 __ h2 __.. __ hn
+ * res = l0 l1 l2 .. ln h0 h1 h2 .. hn
+ *
+ * This will only change the number of bits the values are represented, not the
+ * values themselves.
+ *
+ * It is assumed the values are already clamped into the destination type range.
+ * Values outside that range will produce undefined results. Use
+ * lp_build_packs2 instead.
+ */
+LLVMValueRef
+lp_build_pack2(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef lo,
+ LLVMValueRef hi)
+{
+ LLVMTypeRef src_vec_type = lp_build_vec_type(src_type);
+ LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type);
+ LLVMValueRef shuffle;
+ LLVMValueRef res;
+
+ dst_vec_type = lp_build_vec_type(dst_type);
+
+ assert(!src_type.floating);
+ assert(!dst_type.floating);
+ assert(src_type.width == dst_type.width * 2);
+ assert(src_type.length * 2 == dst_type.length);
+
+ if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) {
+ switch(src_type.width) {
+ case 32:
+ if(dst_type.sign) {
+ res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi);
+ }
+ else {
+ if (util_cpu_caps.has_sse4_1) {
+ /* PACKUSDW is the only instrinsic with a consistent signature */
+ return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi);
+ }
+ else {
+ assert(0);
+ return LLVMGetUndef(dst_vec_type);
+ }
+ }
+ break;
+
+ case 16:
+ if(dst_type.sign)
+ res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi);
+ else
+ res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi);
+ break;
+
+ default:
+ assert(0);
+ return LLVMGetUndef(dst_vec_type);
+ break;
+ }
+
+ res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
+ return res;
+ }
+
+ lo = LLVMBuildBitCast(builder, lo, dst_vec_type, "");
+ hi = LLVMBuildBitCast(builder, hi, dst_vec_type, "");
+
+ shuffle = lp_build_const_pack_shuffle(dst_type.length);
+
+ res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, "");
+
+ return res;
+}
+
+
+
+/**
+ * Non-interleaved pack and saturate.
+ *
+ * Same as lp_build_pack2 but will saturate values so that they fit into the
+ * destination type.
+ */
+LLVMValueRef
+lp_build_packs2(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef lo,
+ LLVMValueRef hi)
+{
+ boolean clamp;
+
+ assert(!src_type.floating);
+ assert(!dst_type.floating);
+ assert(src_type.sign == dst_type.sign);
+ assert(src_type.width == dst_type.width * 2);
+ assert(src_type.length * 2 == dst_type.length);
+
+ clamp = TRUE;
+
+ /* All X86 SSE non-interleaved pack instructions take signed inputs and
+ * saturate them, so no need to clamp for those cases. */
+ if(util_cpu_caps.has_sse2 &&
+ src_type.width * src_type.length == 128 &&
+ src_type.sign)
+ clamp = FALSE;
+
+ if(clamp) {
+ struct lp_build_context bld;
+ unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width;
+ LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1);
+ lp_build_context_init(&bld, builder, src_type);
+ lo = lp_build_min(&bld, lo, dst_max);
+ hi = lp_build_min(&bld, hi, dst_max);
+ /* FIXME: What about lower bound? */
+ }
+
+ return lp_build_pack2(builder, src_type, dst_type, lo, hi);
+}
+
+
+/**
+ * Truncate the bit width.
+ *
+ * TODO: Handle saturation consistently.
+ */
+LLVMValueRef
+lp_build_pack(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ boolean clamped,
+ const LLVMValueRef *src, unsigned num_srcs)
+{
+ LLVMValueRef (*pack2)(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef lo,
+ LLVMValueRef hi);
+ LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+
+
+ /* Register width must remain constant */
+ assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+
+ /* We must not loose or gain channels. Only precision */
+ assert(src_type.length * num_srcs == dst_type.length);
+
+ if(clamped)
+ pack2 = &lp_build_pack2;
+ else
+ pack2 = &lp_build_packs2;
+
+ for(i = 0; i < num_srcs; ++i)
+ tmp[i] = src[i];
+
+ while(src_type.width > dst_type.width) {
+ struct lp_type tmp_type = src_type;
+
+ tmp_type.width /= 2;
+ tmp_type.length *= 2;
+
+ /* Take in consideration the sign changes only in the last step */
+ if(tmp_type.width == dst_type.width)
+ tmp_type.sign = dst_type.sign;
+
+ num_srcs /= 2;
+
+ for(i = 0; i < num_srcs; ++i)
+ tmp[i] = pack2(builder, src_type, tmp_type, tmp[2*i + 0], tmp[2*i + 1]);
+
+ src_type = tmp_type;
+ }
+
+ assert(num_srcs == 1);
+
+ return tmp[0];
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.h b/src/gallium/drivers/llvmpipe/lp_bld_pack.h
new file mode 100644
index 00000000000..fb2a34984a4
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.h
@@ -0,0 +1,95 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Helper functions for packing/unpacking conversions.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef LP_BLD_PACK_H
+#define LP_BLD_PACK_H
+
+
+#include <llvm-c/Core.h>
+
+
+struct lp_type;
+
+
+LLVMValueRef
+lp_build_interleave2(LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef a,
+ LLVMValueRef b,
+ unsigned lo_hi);
+
+
+void
+lp_build_unpack2(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef src,
+ LLVMValueRef *dst_lo,
+ LLVMValueRef *dst_hi);
+
+
+void
+lp_build_unpack(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef src,
+ LLVMValueRef *dst, unsigned num_dsts);
+
+
+LLVMValueRef
+lp_build_packs2(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef lo,
+ LLVMValueRef hi);
+
+
+LLVMValueRef
+lp_build_pack2(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef lo,
+ LLVMValueRef hi);
+
+
+LLVMValueRef
+lp_build_pack(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ boolean clamped,
+ const LLVMValueRef *src, unsigned num_srcs);
+
+
+#endif /* !LP_BLD_PACK_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
new file mode 100644
index 00000000000..9003e108c1c
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
@@ -0,0 +1,190 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture sampling -- common code.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_type.h"
+#include "lp_bld_format.h"
+#include "lp_bld_sample.h"
+
+
+void
+lp_sampler_static_state(struct lp_sampler_static_state *state,
+ const struct pipe_texture *texture,
+ const struct pipe_sampler_state *sampler)
+{
+ memset(state, 0, sizeof *state);
+
+ if(!texture)
+ return;
+
+ if(!sampler)
+ return;
+
+ state->format = texture->format;
+ state->target = texture->target;
+ state->pot_width = util_is_pot(texture->width0);
+ state->pot_height = util_is_pot(texture->height0);
+ state->pot_depth = util_is_pot(texture->depth0);
+
+ state->wrap_s = sampler->wrap_s;
+ state->wrap_t = sampler->wrap_t;
+ state->wrap_r = sampler->wrap_r;
+ state->min_img_filter = sampler->min_img_filter;
+ state->min_mip_filter = sampler->min_mip_filter;
+ state->mag_img_filter = sampler->mag_img_filter;
+ state->compare_mode = sampler->compare_mode;
+ if(sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
+ state->compare_func = sampler->compare_func;
+ }
+ state->normalized_coords = sampler->normalized_coords;
+ state->prefilter = sampler->prefilter;
+}
+
+
+/**
+ * Gather elements from scatter positions in memory into a single vector.
+ *
+ * @param src_width src element width
+ * @param dst_width result element width (source will be expanded to fit)
+ * @param length length of the offsets,
+ * @param base_ptr base pointer, should be a i8 pointer type.
+ * @param offsets vector with offsets
+ */
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets)
+{
+ LLVMTypeRef src_type = LLVMIntType(src_width);
+ LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
+ LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
+ LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
+ LLVMValueRef res;
+ unsigned i;
+
+ res = LLVMGetUndef(dst_vec_type);
+ for(i = 0; i < length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef elem_offset;
+ LLVMValueRef elem_ptr;
+ LLVMValueRef elem;
+
+ elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
+ elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
+ elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
+ elem = LLVMBuildLoad(builder, elem_ptr, "");
+
+ assert(src_width <= dst_width);
+ if(src_width > dst_width)
+ elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
+ if(src_width < dst_width)
+ elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");
+
+ res = LLVMBuildInsertElement(builder, res, elem, index, "");
+ }
+
+ return res;
+}
+
+
+/**
+ * Compute the offset of a pixel.
+ *
+ * x, y, y_stride are vectors
+ */
+LLVMValueRef
+lp_build_sample_offset(struct lp_build_context *bld,
+ const struct util_format_description *format_desc,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef y_stride,
+ LLVMValueRef data_ptr)
+{
+ LLVMValueRef x_stride;
+ LLVMValueRef offset;
+
+ x_stride = lp_build_const_scalar(bld->type, format_desc->block.bits/8);
+
+ if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ LLVMValueRef x_lo, x_hi;
+ LLVMValueRef y_lo, y_hi;
+ LLVMValueRef x_stride_lo, x_stride_hi;
+ LLVMValueRef y_stride_lo, y_stride_hi;
+ LLVMValueRef x_offset_lo, x_offset_hi;
+ LLVMValueRef y_offset_lo, y_offset_hi;
+ LLVMValueRef offset_lo, offset_hi;
+
+ x_lo = LLVMBuildAnd(bld->builder, x, bld->one, "");
+ y_lo = LLVMBuildAnd(bld->builder, y, bld->one, "");
+
+ x_hi = LLVMBuildLShr(bld->builder, x, bld->one, "");
+ y_hi = LLVMBuildLShr(bld->builder, y, bld->one, "");
+
+ x_stride_lo = x_stride;
+ y_stride_lo = lp_build_const_scalar(bld->type, 2*format_desc->block.bits/8);
+
+ x_stride_hi = lp_build_const_scalar(bld->type, 4*format_desc->block.bits/8);
+ y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, "");
+
+ x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo);
+ y_offset_lo = lp_build_mul(bld, y_lo, y_stride_lo);
+ offset_lo = lp_build_add(bld, x_offset_lo, y_offset_lo);
+
+ x_offset_hi = lp_build_mul(bld, x_hi, x_stride_hi);
+ y_offset_hi = lp_build_mul(bld, y_hi, y_stride_hi);
+ offset_hi = lp_build_add(bld, x_offset_hi, y_offset_hi);
+
+ offset = lp_build_add(bld, offset_hi, offset_lo);
+ }
+ else {
+ LLVMValueRef x_offset;
+ LLVMValueRef y_offset;
+
+ x_offset = lp_build_mul(bld, x, x_stride);
+ y_offset = lp_build_mul(bld, y, y_stride);
+
+ offset = lp_build_add(bld, x_offset, y_offset);
+ }
+
+ return offset;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
index 403d0e48367..8cb8210ca76 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
@@ -40,7 +40,9 @@
struct pipe_texture;
struct pipe_sampler_state;
+struct util_format_description;
struct lp_type;
+struct lp_build_context;
/**
@@ -119,6 +121,24 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
const struct pipe_sampler_state *sampler);
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets);
+
+
+LLVMValueRef
+lp_build_sample_offset(struct lp_build_context *bld,
+ const struct util_format_description *format_desc,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef y_stride,
+ LLVMValueRef data_ptr);
+
+
void
lp_build_sample_soa(LLVMBuilderRef builder,
const struct lp_sampler_static_state *static_state,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index 1a47ca32d2d..5ee8d556a68 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -27,7 +27,7 @@
/**
* @file
- * Texture sampling.
+ * Texture sampling -- SoA.
*
* @author Jose Fonseca <jfonseca@vmware.com>
*/
@@ -35,54 +35,23 @@
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "util/u_debug.h"
+#include "util/u_debug_dump.h"
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_format.h"
+#include "util/u_cpu_detect.h"
#include "lp_bld_debug.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
#include "lp_bld_arit.h"
#include "lp_bld_logic.h"
#include "lp_bld_swizzle.h"
+#include "lp_bld_pack.h"
#include "lp_bld_format.h"
#include "lp_bld_sample.h"
-void
-lp_sampler_static_state(struct lp_sampler_static_state *state,
- const struct pipe_texture *texture,
- const struct pipe_sampler_state *sampler)
-{
- memset(state, 0, sizeof *state);
-
- if(!texture)
- return;
-
- if(!sampler)
- return;
-
- state->format = texture->format;
- state->target = texture->target;
- state->pot_width = util_is_pot(texture->width[0]);
- state->pot_height = util_is_pot(texture->height[0]);
- state->pot_depth = util_is_pot(texture->depth[0]);
-
- state->wrap_s = sampler->wrap_s;
- state->wrap_t = sampler->wrap_t;
- state->wrap_r = sampler->wrap_r;
- state->min_img_filter = sampler->min_img_filter;
- state->min_mip_filter = sampler->min_mip_filter;
- state->mag_img_filter = sampler->mag_img_filter;
- if(sampler->compare_mode) {
- state->compare_mode = sampler->compare_mode;
- state->compare_func = sampler->compare_func;
- }
- state->normalized_coords = sampler->normalized_coords;
- state->prefilter = sampler->prefilter;
-}
-
-
-
/**
* Keep all information for sampling code generation in a single place.
*/
@@ -111,66 +80,61 @@ struct lp_build_sample_context
static void
-lp_build_sample_texel(struct lp_build_sample_context *bld,
- LLVMValueRef x,
- LLVMValueRef y,
- LLVMValueRef y_stride,
- LLVMValueRef data_ptr,
- LLVMValueRef *texel)
+lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef y_stride,
+ LLVMValueRef data_ptr,
+ LLVMValueRef *texel)
{
- struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
- LLVMValueRef x_stride;
LLVMValueRef offset;
+ LLVMValueRef packed;
+
+ offset = lp_build_sample_offset(&bld->int_coord_bld,
+ bld->format_desc,
+ x, y, y_stride,
+ data_ptr);
+
+ assert(bld->format_desc->block.width == 1);
+ assert(bld->format_desc->block.height == 1);
+ assert(bld->format_desc->block.bits <= bld->texel_type.width);
+
+ packed = lp_build_gather(bld->builder,
+ bld->texel_type.length,
+ bld->format_desc->block.bits,
+ bld->texel_type.width,
+ data_ptr, offset);
+
+ lp_build_unpack_rgba_soa(bld->builder,
+ bld->format_desc,
+ bld->texel_type,
+ packed, texel);
+}
- x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8);
-
- if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
- LLVMValueRef x_lo, x_hi;
- LLVMValueRef y_lo, y_hi;
- LLVMValueRef x_stride_lo, x_stride_hi;
- LLVMValueRef y_stride_lo, y_stride_hi;
- LLVMValueRef x_offset_lo, x_offset_hi;
- LLVMValueRef y_offset_lo, y_offset_hi;
- LLVMValueRef offset_lo, offset_hi;
-
- x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, "");
- y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, "");
-
- x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, "");
- y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, "");
-
- x_stride_lo = x_stride;
- y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8);
-
- x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8);
- y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, "");
-
- x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo);
- y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo);
- offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo);
-
- x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi);
- y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi);
- offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi);
- offset = lp_build_add(int_coord_bld, offset_hi, offset_lo);
- }
- else {
- LLVMValueRef x_offset;
- LLVMValueRef y_offset;
+static LLVMValueRef
+lp_build_sample_packed(struct lp_build_sample_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef y_stride,
+ LLVMValueRef data_ptr)
+{
+ LLVMValueRef offset;
- x_offset = lp_build_mul(int_coord_bld, x, x_stride);
- y_offset = lp_build_mul(int_coord_bld, y, y_stride);
+ offset = lp_build_sample_offset(&bld->int_coord_bld,
+ bld->format_desc,
+ x, y, y_stride,
+ data_ptr);
- offset = lp_build_add(int_coord_bld, x_offset, y_offset);
- }
+ assert(bld->format_desc->block.width == 1);
+ assert(bld->format_desc->block.height == 1);
+ assert(bld->format_desc->block.bits <= bld->texel_type.width);
- lp_build_load_rgba_soa(bld->builder,
- bld->format_desc,
- bld->texel_type,
- data_ptr,
- offset,
- texel);
+ return lp_build_gather(bld->builder,
+ bld->texel_type.length,
+ bld->format_desc->block.bits,
+ bld->texel_type.width,
+ data_ptr, offset);
}
@@ -208,7 +172,8 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld,
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
/* FIXME */
- _debug_printf("warning: failed to translate texture wrap mode %u\n", wrap_mode);
+ _debug_printf("warning: failed to translate texture wrap mode %s\n",
+ debug_dump_tex_wrap(wrap_mode, TRUE));
coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
coord = lp_build_min(int_coord_bld, coord, length_minus_one);
break;
@@ -240,7 +205,7 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s);
y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t);
- lp_build_sample_texel(bld, x, y, stride, data_ptr, texel);
+ lp_build_sample_texel_soa(bld, x, y, stride, data_ptr, texel);
}
@@ -286,10 +251,10 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s);
y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
- lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
- lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
- lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
- lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
+ lp_build_sample_texel_soa(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
+ lp_build_sample_texel_soa(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
+ lp_build_sample_texel_soa(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
+ lp_build_sample_texel_soa(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
/* TODO: Don't interpolate missing channels */
for(chan = 0; chan < 4; ++chan) {
@@ -304,6 +269,217 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
static void
+lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
+ struct lp_type dst_type,
+ LLVMValueRef packed,
+ LLVMValueRef *rgba)
+{
+ LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff);
+ unsigned chan;
+
+ /* Decode the input vector components */
+ for (chan = 0; chan < 4; ++chan) {
+ unsigned start = chan*8;
+ unsigned stop = start + 8;
+ LLVMValueRef input;
+
+ input = packed;
+
+ if(start)
+ input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), "");
+
+ if(stop < 32)
+ input = LLVMBuildAnd(builder, input, mask, "");
+
+ input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
+
+ rgba[chan] = input;
+ }
+}
+
+
+static void
+lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef stride,
+ LLVMValueRef data_ptr,
+ LLVMValueRef *texel)
+{
+ LLVMBuilderRef builder = bld->builder;
+ struct lp_build_context i32, h16, u8n;
+ LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
+ LLVMValueRef i32_c8, i32_c128, i32_c255;
+ LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
+ LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
+ LLVMValueRef x0, x1;
+ LLVMValueRef y0, y1;
+ LLVMValueRef neighbors[2][2];
+ LLVMValueRef neighbors_lo[2][2];
+ LLVMValueRef neighbors_hi[2][2];
+ LLVMValueRef packed, packed_lo, packed_hi;
+ LLVMValueRef unswizzled[4];
+
+ lp_build_context_init(&i32, builder, lp_type_int(32));
+ lp_build_context_init(&h16, builder, lp_type_ufixed(16));
+ lp_build_context_init(&u8n, builder, lp_type_unorm(8));
+
+ i32_vec_type = lp_build_vec_type(i32.type);
+ h16_vec_type = lp_build_vec_type(h16.type);
+ u8n_vec_type = lp_build_vec_type(u8n.type);
+
+ s = lp_build_mul_imm(&bld->coord_bld, s, 256);
+ t = lp_build_mul_imm(&bld->coord_bld, t, 256);
+
+ s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
+ t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
+
+ i32_c128 = lp_build_int_const_scalar(i32.type, -128);
+ s = LLVMBuildAdd(builder, s, i32_c128, "");
+ t = LLVMBuildAdd(builder, t, i32_c128, "");
+
+ i32_c8 = lp_build_int_const_scalar(i32.type, 8);
+ s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
+ t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
+
+ i32_c255 = lp_build_int_const_scalar(i32.type, 255);
+ s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
+ t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
+
+ x0 = s_ipart;
+ y0 = t_ipart;
+
+ x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s);
+ y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+ x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
+ y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
+
+ x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s);
+ y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+ /*
+ * Transform 4 x i32 in
+ *
+ * s_fpart = {s0, s1, s2, s3}
+ *
+ * into 8 x i16
+ *
+ * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
+ *
+ * into two 8 x i16
+ *
+ * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
+ * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
+ *
+ * and likewise for t_fpart. There is no risk of loosing precision here
+ * since the fractional parts only use the lower 8bits.
+ */
+
+ s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
+ t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
+
+ {
+ LLVMTypeRef elem_type = LLVMInt32Type();
+ LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef shuffle_lo;
+ LLVMValueRef shuffle_hi;
+ unsigned i, j;
+
+ for(j = 0; j < h16.type.length; j += 4) {
+ unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
+ LLVMValueRef index;
+
+ index = LLVMConstInt(elem_type, j/2 + subindex, 0);
+ for(i = 0; i < 4; ++i)
+ shuffles_lo[j + i] = index;
+
+ index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
+ for(i = 0; i < 4; ++i)
+ shuffles_hi[j + i] = index;
+ }
+
+ shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
+ shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
+
+ s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
+ t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
+ s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
+ t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
+ }
+
+ /*
+ * Fetch the pixels as 4 x 32bit (rgba order might differ):
+ *
+ * rgba0 rgba1 rgba2 rgba3
+ *
+ * bit cast them into 16 x u8
+ *
+ * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
+ *
+ * unpack them into two 8 x i16:
+ *
+ * r0 g0 b0 a0 r1 g1 b1 a1
+ * r2 g2 b2 a2 r3 g3 b3 a3
+ *
+ * The higher 8 bits of the resulting elements will be zero.
+ */
+
+ neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr);
+ neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr);
+ neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr);
+ neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr);
+
+ neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
+ neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
+ neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
+ neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
+
+ lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
+ lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
+ lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
+ lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
+
+ /*
+ * Linear interpolate with 8.8 fixed point.
+ */
+
+ packed_lo = lp_build_lerp_2d(&h16,
+ s_fpart_lo, t_fpart_lo,
+ neighbors_lo[0][0],
+ neighbors_lo[0][1],
+ neighbors_lo[1][0],
+ neighbors_lo[1][1]);
+
+ packed_hi = lp_build_lerp_2d(&h16,
+ s_fpart_hi, t_fpart_hi,
+ neighbors_hi[0][0],
+ neighbors_hi[0][1],
+ neighbors_hi[1][0],
+ neighbors_hi[1][1]);
+
+ packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
+
+ /*
+ * Convert to SoA and swizzle.
+ */
+
+ packed = LLVMBuildBitCast(builder, packed, i32_vec_type, "");
+
+ lp_build_rgba8_to_f32_soa(bld->builder,
+ bld->texel_type,
+ packed, unswizzled);
+
+ lp_build_format_swizzle_soa(bld->format_desc,
+ bld->texel_type, unswizzled,
+ texel);
+}
+
+
+static void
lp_build_sample_compare(struct lp_build_sample_context *bld,
LLVMValueRef p,
LLVMValueRef *texel)
@@ -312,7 +488,7 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
LLVMValueRef res;
unsigned chan;
- if(!bld->static_state->compare_mode)
+ if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE)
return;
/* TODO: Compare before swizzling, to avoid redundant computations */
@@ -401,8 +577,10 @@ lp_build_sample_soa(LLVMBuilderRef builder,
lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel);
break;
case PIPE_TEX_FILTER_LINEAR:
- case PIPE_TEX_FILTER_ANISO:
- lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
+ if(lp_format_is_rgba8(bld.format_desc))
+ lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel);
+ else
+ lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
break;
default:
assert(0);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
index 1f6da804488..b9472127a63 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
@@ -40,7 +40,7 @@
#include <llvm-c/Core.h>
-struct lp_type type;
+struct lp_type;
struct lp_build_context;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 83ac25bb200..fb1eda4423b 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -64,7 +64,7 @@
for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
- ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+ ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
@@ -157,19 +157,19 @@ emit_fetch(
unsigned index,
const unsigned chan_index )
{
- const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index];
- unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+ const struct tgsi_full_src_register *reg = &inst->Src[index];
+ unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
LLVMValueRef res;
switch (swizzle) {
- case TGSI_EXTSWIZZLE_X:
- case TGSI_EXTSWIZZLE_Y:
- case TGSI_EXTSWIZZLE_Z:
- case TGSI_EXTSWIZZLE_W:
+ case TGSI_SWIZZLE_X:
+ case TGSI_SWIZZLE_Y:
+ case TGSI_SWIZZLE_Z:
+ case TGSI_SWIZZLE_W:
- switch (reg->SrcRegister.File) {
+ switch (reg->Register.File) {
case TGSI_FILE_CONSTANT: {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0);
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
res = lp_build_broadcast_scalar(&bld->base, scalar);
@@ -177,17 +177,17 @@ emit_fetch(
}
case TGSI_FILE_IMMEDIATE:
- res = bld->immediates[reg->SrcRegister.Index][swizzle];
+ res = bld->immediates[reg->Register.Index][swizzle];
assert(res);
break;
case TGSI_FILE_INPUT:
- res = bld->inputs[reg->SrcRegister.Index][swizzle];
+ res = bld->inputs[reg->Register.Index][swizzle];
assert(res);
break;
case TGSI_FILE_TEMPORARY:
- res = bld->temps[reg->SrcRegister.Index][swizzle];
+ res = bld->temps[reg->Register.Index][swizzle];
if(!res)
return bld->base.undef;
break;
@@ -198,14 +198,6 @@ emit_fetch(
}
break;
- case TGSI_EXTSWIZZLE_ZERO:
- res = bld->base.zero;
- break;
-
- case TGSI_EXTSWIZZLE_ONE:
- res = bld->base.one;
- break;
-
default:
assert( 0 );
return bld->base.undef;
@@ -275,7 +267,7 @@ emit_store(
unsigned chan_index,
LLVMValueRef value)
{
- const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index];
+ const struct tgsi_full_dst_register *reg = &inst->Dst[index];
switch( inst->Instruction.Saturate ) {
case TGSI_SAT_NONE:
@@ -295,13 +287,13 @@ emit_store(
assert(0);
}
- switch( reg->DstRegister.File ) {
+ switch( reg->Register.File ) {
case TGSI_FILE_OUTPUT:
- bld->outputs[reg->DstRegister.Index][chan_index] = value;
+ bld->outputs[reg->Register.Index][chan_index] = value;
break;
case TGSI_FILE_TEMPORARY:
- bld->temps[reg->DstRegister.Index][chan_index] = value;
+ bld->temps[reg->Register.Index][chan_index] = value;
break;
case TGSI_FILE_ADDRESS:
@@ -327,14 +319,14 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
boolean projected,
LLVMValueRef *texel)
{
- const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+ const uint unit = inst->Src[1].Register.Index;
LLVMValueRef lodbias;
- LLVMValueRef oow;
+ LLVMValueRef oow = NULL;
LLVMValueRef coords[3];
unsigned num_coords;
unsigned i;
- switch (inst->InstructionExtTexture.Texture) {
+ switch (inst->Texture.Texture) {
case TGSI_TEXTURE_1D:
num_coords = 1;
break;
@@ -369,6 +361,9 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
if (projected)
coords[i] = lp_build_mul(&bld->base, coords[i], oow);
}
+ for (i = num_coords; i < 3; i++) {
+ coords[i] = bld->base.undef;
+ }
bld->sampler->emit_fetch_texel(bld->sampler,
bld->base.builder,
@@ -383,7 +378,7 @@ emit_kil(
struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst )
{
- const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0];
+ const struct tgsi_full_src_register *reg = &inst->Src[0];
LLVMValueRef terms[NUM_CHANNELS];
LLVMValueRef mask;
unsigned chan_index;
@@ -394,12 +389,7 @@ emit_kil(
unsigned swizzle;
/* Unswizzle channel */
- swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
-
- /* Note that we test if the value is less than zero, so 1.0 and 0.0 need
- * not to be tested. */
- if(swizzle == TGSI_EXTSWIZZLE_ZERO || swizzle == TGSI_EXTSWIZZLE_ONE)
- continue;
+ swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
/* Check if the component has not been already tested. */
assert(swizzle < NUM_CHANNELS);
@@ -436,15 +426,15 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst)
{
uint i;
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
- if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
- reg->SrcRegister.Indirect)
+ const struct tgsi_full_src_register *reg = &inst->Src[i];
+ if (reg->Register.File == TGSI_FILE_TEMPORARY &&
+ reg->Register.Indirect)
return TRUE;
}
for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
- const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
- if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
- reg->DstRegister.Indirect)
+ const struct tgsi_full_dst_register *reg = &inst->Dst[i];
+ if (reg->Register.File == TGSI_FILE_TEMPORARY &&
+ reg->Register.Indirect)
return TRUE;
}
return FALSE;
@@ -459,7 +449,12 @@ emit_instruction(
{
unsigned chan_index;
LLVMValueRef src0, src1, src2;
- LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ LLVMValueRef tmp0, tmp1, tmp2;
+ LLVMValueRef tmp3 = NULL;
+ LLVMValueRef tmp4 = NULL;
+ LLVMValueRef tmp5 = NULL;
+ LLVMValueRef tmp6 = NULL;
+ LLVMValueRef tmp7 = NULL;
LLVMValueRef res;
LLVMValueRef dst0[NUM_CHANNELS];
@@ -488,7 +483,6 @@ emit_instruction(
#endif
case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_SWZ:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
}
@@ -574,9 +568,9 @@ emit_instruction(
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
- LLVMValueRef *p_floor_log2;
- LLVMValueRef *p_exp;
- LLVMValueRef *p_log2;
+ LLVMValueRef *p_floor_log2 = NULL;
+ LLVMValueRef *p_exp = NULL;
+ LLVMValueRef *p_log2 = NULL;
src0 = emit_fetch( bld, inst, 0, CHAN_X );
src0 = lp_build_abs( &bld->base, src0 );
@@ -1324,7 +1318,7 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_SHR:
+ case TGSI_OPCODE_ISHR:
/* deprecated? */
assert(0);
return 0;
@@ -1386,15 +1380,6 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_NOISE1:
- case TGSI_OPCODE_NOISE2:
- case TGSI_OPCODE_NOISE3:
- case TGSI_OPCODE_NOISE4:
- FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- dst0[chan_index] = bld->base.zero;
- }
- break;
-
case TGSI_OPCODE_NOP:
break;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c
index e8cf7256c0e..8270cd057f6 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c
@@ -161,7 +161,7 @@ lp_build_int_vec_type(struct lp_type type)
* Build int32[4] vector type
*/
LLVMTypeRef
-lp_build_int32_vec4_type()
+lp_build_int32_vec4_type(void)
{
struct lp_type t;
LLVMTypeRef type;
@@ -181,12 +181,31 @@ lp_build_int32_vec4_type()
struct lp_type
lp_int_type(struct lp_type type)
{
- struct lp_type int_type;
+ struct lp_type res_type;
- memset(&int_type, 0, sizeof int_type);
- int_type.width = type.width;
- int_type.length = type.length;
- return int_type;
+ memset(&res_type, 0, sizeof res_type);
+ res_type.width = type.width;
+ res_type.length = type.length;
+
+ return res_type;
+}
+
+
+/**
+ * Return the type with twice the bit width (hence half the number of elements).
+ */
+struct lp_type
+lp_wider_type(struct lp_type type)
+{
+ struct lp_type res_type;
+
+ memcpy(&res_type, &type, sizeof res_type);
+ res_type.width *= 2;
+ res_type.length /= 2;
+
+ assert(res_type.length);
+
+ return res_type;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h
index 118fb339089..b7d8aed396d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h
@@ -43,13 +43,18 @@
/**
+ * Native SIMD register width.
+ *
+ * 128 for all architectures we care about.
+ */
+#define LP_NATIVE_VECTOR_WIDTH 128
+
+/**
* Several functions can only cope with vectors of length up to this value.
* You may need to increase that value if you want to represent bigger vectors.
*/
#define LP_MAX_VECTOR_LENGTH 16
-#define LP_MAX_TYPE_WIDTH 64
-
/**
* The LLVM type system can't conveniently express all the things we care about
@@ -134,6 +139,91 @@ struct lp_build_context
};
+static INLINE struct lp_type
+lp_type_float(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.floating = TRUE;
+ res_type.sign = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_int(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.sign = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_uint(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_unorm(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.norm = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_fixed(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.sign = TRUE;
+ res_type.fixed = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_ufixed(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.fixed = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
LLVMTypeRef
lp_build_elem_type(struct lp_type type);
@@ -170,6 +260,10 @@ struct lp_type
lp_int_type(struct lp_type type);
+struct lp_type
+lp_wider_type(struct lp_type type);
+
+
void
lp_build_context_init(struct lp_build_context *bld,
LLVMBuilderRef builder,
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 696a9d5f6a8..8d965175f8c 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -31,6 +31,7 @@
*/
#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
#include "pipe/p_defines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -68,6 +69,10 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
pipe_texture_reference(&llvmpipe->texture[i], NULL);
}
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ pipe_texture_reference(&llvmpipe->vertex_textures[i], NULL);
+ }
+
for (i = 0; i < Elements(llvmpipe->constants); i++) {
if (llvmpipe->constants[i].buffer) {
pipe_buffer_reference(&llvmpipe->constants[i].buffer, NULL);
@@ -117,7 +122,8 @@ llvmpipe_create( struct pipe_screen *screen )
llvmpipe->pipe.delete_blend_state = llvmpipe_delete_blend_state;
llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state;
- llvmpipe->pipe.bind_sampler_states = llvmpipe_bind_sampler_states;
+ llvmpipe->pipe.bind_fragment_sampler_states = llvmpipe_bind_sampler_states;
+ llvmpipe->pipe.bind_vertex_sampler_states = llvmpipe_bind_vertex_sampler_states;
llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state;
llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state;
@@ -142,7 +148,8 @@ llvmpipe_create( struct pipe_screen *screen )
llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state;
llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple;
llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state;
- llvmpipe->pipe.set_sampler_textures = llvmpipe_set_sampler_textures;
+ llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_sampler_textures;
+ llvmpipe->pipe.set_vertex_sampler_textures = llvmpipe_set_vertex_sampler_textures;
llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state;
llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers;
@@ -151,8 +158,6 @@ llvmpipe_create( struct pipe_screen *screen )
llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays;
llvmpipe->pipe.draw_elements = llvmpipe_draw_elements;
llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements;
- llvmpipe->pipe.set_edgeflags = llvmpipe_set_edgeflags;
-
llvmpipe->pipe.clear = llvmpipe_clear;
llvmpipe->pipe.flush = llvmpipe_flush;
@@ -170,8 +175,7 @@ llvmpipe_create( struct pipe_screen *screen )
if (!llvmpipe->draw)
goto fail;
- /* FIXME: vertex sampler state
- */
+ /* FIXME: devise alternative to draw_texture_samplers */
if (debug_get_bool_option( "LP_NO_RAST", FALSE ))
llvmpipe->no_rast = TRUE;
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 194692045dc..1ede6a6a72f 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -53,6 +53,7 @@ struct llvmpipe_context {
/** Constant state objects */
const struct pipe_blend_state *blend;
const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+ struct pipe_sampler_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS];
const struct pipe_depth_stencil_alpha_state *depth_stencil;
const struct pipe_rasterizer_state *rasterizer;
struct lp_fragment_shader *fs;
@@ -66,12 +67,15 @@ struct llvmpipe_context {
struct pipe_poly_stipple poly_stipple;
struct pipe_scissor_state scissor;
struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+ struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
struct pipe_viewport_state viewport;
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
unsigned num_samplers;
unsigned num_textures;
+ unsigned num_vertex_samplers;
+ unsigned num_vertex_textures;
unsigned num_vertex_elements;
unsigned num_vertex_buffers;
diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
index 91fcbc01c6d..3989cce7445 100644
--- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
+++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -45,11 +45,11 @@
-boolean
+void
llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
unsigned start, unsigned count)
{
- return llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count);
+ llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count);
}
@@ -58,7 +58,7 @@ llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
* Basically, map the vertex buffers (and drawing surfaces), then hand off
* the drawing to the 'draw' module.
*/
-boolean
+void
llvmpipe_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -99,7 +99,7 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
draw_arrays(draw, mode, start, count);
/*
- * unmap vertex/index buffers - will cause draw module to flush
+ * unmap vertex/index buffers
*/
for (i = 0; i < lp->num_vertex_buffers; i++) {
draw_set_mapped_vertex_buffer(draw, i, NULL);
@@ -108,26 +108,24 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
draw_set_mapped_element_buffer(draw, 0, NULL);
}
- return TRUE;
+ /*
+ * TODO: Flush only when a user vertex/index buffer is present
+ * (or even better, modify draw module to do this
+ * internally when this condition is seen?)
+ */
+ draw_flush(draw);
}
-boolean
+void
llvmpipe_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
{
- return llvmpipe_draw_range_elements( pipe, indexBuffer,
- indexSize,
- 0, 0xffffffff,
- mode, start, count );
+ llvmpipe_draw_range_elements( pipe, indexBuffer,
+ indexSize,
+ 0, 0xffffffff,
+ mode, start, count );
}
-
-void
-llvmpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags)
-{
- struct llvmpipe_context *lp = llvmpipe_context(pipe);
- draw_set_edgeflags(lp->draw, edgeflags);
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c
index 14fbea6d993..97c46087da0 100644
--- a/src/gallium/drivers/llvmpipe/lp_fence.c
+++ b/src/gallium/drivers/llvmpipe/lp_fence.c
@@ -64,7 +64,7 @@ llvmpipe_fence_reference(struct pipe_screen *screen,
struct lp_fence *old = (struct lp_fence *) *ptr;
struct lp_fence *f = (struct lp_fence *) fence;
- if (pipe_reference((struct pipe_reference**)ptr, &f->reference)) {
+ if (pipe_reference(&old->reference, &f->reference)) {
lp_fence_destroy(old);
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index fb6ec9bb37a..4ef0783f3e2 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -39,6 +39,7 @@
#include "util/u_cpu_detect.h"
#include "lp_screen.h"
#include "lp_bld_intr.h"
+#include "lp_bld_misc.h"
#include "lp_jit.h"
@@ -78,25 +79,22 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
/* struct lp_jit_context */
{
- LLVMTypeRef elem_types[5];
+ LLVMTypeRef elem_types[4];
LLVMTypeRef context_type;
elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */
- elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */
- elem_types[2] = LLVMFloatType(); /* alpha_ref_value */
- elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */
- elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
+ elem_types[1] = LLVMFloatType(); /* alpha_ref_value */
+ elem_types[2] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */
+ elem_types[3] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants,
screen->target, context_type, 0);
- LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, dummy,
- screen->target, context_type, 1);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value,
- screen->target, context_type, 2);
+ screen->target, context_type, 1);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color,
- screen->target, context_type, 3);
+ screen->target, context_type, 2);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures,
screen->target, context_type,
LP_JIT_CONTEXT_TEXTURES_INDEX);
@@ -135,13 +133,12 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
#if 0
/* For simulating less capable machines */
util_cpu_caps.has_sse3 = 0;
+ util_cpu_caps.has_ssse3 = 0;
util_cpu_caps.has_sse4_1 = 0;
#endif
-#ifdef LLVM_NATIVE_ARCH
LLVMLinkInJIT();
LLVMInitializeNativeTarget();
-#endif
screen->module = LLVMModuleCreateWithName("llvmpipe");
@@ -150,7 +147,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) {
_debug_printf("%s\n", error);
LLVMDisposeMessage(error);
- abort();
+ assert(0);
}
screen->target = LLVMGetExecutionEngineTargetData(screen->engine);
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index e8fb7d990f8..1a6e939aa24 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -41,7 +41,6 @@
#include "pipe/p_state.h"
-struct tgsi_sampler;
struct llvmpipe_screen;
@@ -78,8 +77,6 @@ struct lp_jit_context
{
const float *constants;
- void *dummy; /* remove me */
-
float alpha_ref_value;
/* FIXME: store (also?) in floats */
@@ -92,16 +89,13 @@ struct lp_jit_context
#define lp_jit_context_constants(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, 0, "constants")
-#define lp_jit_context_samplers(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, 1, "samplers")
-
#define lp_jit_context_alpha_ref_value(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, 2, "alpha_ref_value")
+ lp_build_struct_get(_builder, _ptr, 1, "alpha_ref_value")
#define lp_jit_context_blend_color(_builder, _ptr) \
- lp_build_struct_get(_builder, _ptr, 3, "blend_color")
+ lp_build_struct_get(_builder, _ptr, 2, "blend_color")
-#define LP_JIT_CONTEXT_TEXTURES_INDEX 4
+#define LP_JIT_CONTEXT_TEXTURES_INDEX 3
#define lp_jit_context_textures(_builder, _ptr) \
lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures")
@@ -124,7 +118,6 @@ typedef void
const int32_t *step3);
-
void
lp_jit_screen_cleanup(struct llvmpipe_screen *screen);
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index 6772ff332ba..6535e693089 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -230,7 +230,7 @@ void lp_rast_load_color( struct lp_rasterizer *rast,
assert(w <= TILE_SIZE);
assert(h <= TILE_SIZE);
- lp_tile_read_4ub(rast->cbuf_transfer->format,
+ lp_tile_read_4ub(rast->cbuf_transfer->texture->format,
rast->tasks[thread_index].tile.color,
rast->cbuf_map,
rast->cbuf_transfer->stride,
@@ -394,7 +394,7 @@ static void lp_rast_store_color( struct lp_rasterizer *rast,
LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__,
thread_index, x, y, w, h);
- lp_tile_write_4ub(rast->cbuf_transfer->format,
+ lp_tile_write_4ub(rast->cbuf_transfer->texture->format,
rast->tasks[thread_index].tile.color,
rast->cbuf_map,
rast->cbuf_transfer->stride,
@@ -437,7 +437,7 @@ static void lp_rast_store_zstencil( struct lp_rasterizer *rast,
LP_DBG(DEBUG_RAST, "%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
- assert(rast->zsbuf_transfer->format == PIPE_FORMAT_Z32_UNORM);
+ assert(rast->zsbuf_transfer->texture->format == PIPE_FORMAT_Z32_UNORM);
lp_tile_write_z32(rast->tasks[thread_index].tile.depth,
rast->zsbuf_map,
rast->zsbuf_transfer->stride,
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 017496ea5fc..a28f6935b68 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -27,6 +27,7 @@
#include "util/u_memory.h"
+#include "util/u_format.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
@@ -77,7 +78,9 @@ llvmpipe_get_param(struct pipe_screen *screen, int param)
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
return PIPE_MAX_SAMPLERS;
case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
- return PIPE_MAX_SAMPLERS;
+ return PIPE_MAX_VERTEX_SAMPLERS;
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS;
case PIPE_CAP_NPOT_TEXTURES:
return 1;
case PIPE_CAP_TWO_SIDED_STENCIL:
@@ -150,17 +153,17 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
{
struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
struct llvmpipe_winsys *winsys = screen->winsys;
+ const struct util_format_description *format_desc;
+
+ format_desc = util_format_description(format);
+ if(!format_desc)
+ return FALSE;
assert(target == PIPE_TEXTURE_1D ||
target == PIPE_TEXTURE_2D ||
target == PIPE_TEXTURE_3D ||
target == PIPE_TEXTURE_CUBE);
- if(format == PIPE_FORMAT_Z16_UNORM)
- return FALSE;
- if(format == PIPE_FORMAT_S8_UNORM)
- return FALSE;
-
switch(format) {
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:
@@ -171,8 +174,51 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
break;
}
- if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET)
- return winsys->is_displaytarget_format_supported(winsys, format);
+ if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ if(format_desc->block.width != 1 ||
+ format_desc->block.height != 1)
+ return FALSE;
+
+ if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR &&
+ format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
+ format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY)
+ return FALSE;
+
+ if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB &&
+ format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB)
+ return FALSE;
+ }
+
+ if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) {
+ if(!winsys->is_displaytarget_format_supported(winsys, format))
+ return FALSE;
+ }
+
+ if(tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) {
+ if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+ return FALSE;
+
+ /* FIXME: Temporary restriction. See lp_state_fs.c. */
+ if(format_desc->block.bits != 32)
+ return FALSE;
+ }
+
+ /* FIXME: Temporary restrictions. See lp_bld_sample_soa.c */
+ if(tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) {
+ if(format_desc->block.width != 1 ||
+ format_desc->block.height != 1)
+ return FALSE;
+
+ if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR &&
+ format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
+ format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY)
+ return FALSE;
+
+ if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB &&
+ format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB &&
+ format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+ return FALSE;
+ }
return TRUE;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 1eb944a0de7..5cdcf4ecc98 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -451,8 +451,8 @@ lp_setup_set_sampler_textures( struct setup_context *setup,
struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex);
struct lp_jit_texture *jit_tex;
jit_tex = &setup->fs.current.jit_context.textures[i];
- jit_tex->width = tex->width[0];
- jit_tex->height = tex->height[0];
+ jit_tex->width = tex->width0;
+ jit_tex->height = tex->height0;
jit_tex->stride = lp_tex->stride[0];
if(!lp_tex->dt)
jit_tex->data = lp_tex->data;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index e15b987767c..fe34903cf32 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -241,9 +241,6 @@ static inline int subpixel_snap( float a )
}
-#define MIN3(a,b,c) MIN2(MIN2(a,b),c)
-#define MAX3(a,b,c) MAX2(MAX2(a,b),c)
-
/**
* Do basic setup for triangle rasterization and determine which
* framebuffer tiles are touched. Put the triangle in the scene's
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index 6017dc553a6..25d13536741 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -57,7 +57,6 @@
#define LP_NEW_BLEND_COLOR 0x8000
-struct tgsi_sampler;
struct vertex_info;
struct pipe_context;
struct llvmpipe_context;
@@ -127,6 +126,10 @@ void *
llvmpipe_create_sampler_state(struct pipe_context *,
const struct pipe_sampler_state *);
void llvmpipe_bind_sampler_states(struct pipe_context *, unsigned, void **);
+void
+llvmpipe_bind_vertex_sampler_states(struct pipe_context *,
+ unsigned num_samplers,
+ void **samplers);
void llvmpipe_delete_sampler_state(struct pipe_context *, void *);
void *
@@ -173,6 +176,11 @@ void llvmpipe_set_sampler_textures( struct pipe_context *,
unsigned num,
struct pipe_texture ** );
+void
+llvmpipe_set_vertex_sampler_textures(struct pipe_context *,
+ unsigned num_textures,
+ struct pipe_texture **);
+
void llvmpipe_set_viewport_state( struct pipe_context *,
const struct pipe_viewport_state * );
@@ -189,14 +197,14 @@ void llvmpipe_update_fs(struct llvmpipe_context *lp);
void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe );
-boolean llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
+void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
unsigned start, unsigned count);
-boolean llvmpipe_draw_elements(struct pipe_context *pipe,
+void llvmpipe_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start, unsigned count);
-boolean
+void
llvmpipe_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -205,10 +213,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count);
void
-llvmpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags);
-
-
-void
llvmpipe_map_texture_surfaces(struct llvmpipe_context *lp);
void
diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c
index 48afe5f5242..a10c5918df3 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c
@@ -34,6 +34,7 @@
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_debug_dump.h"
+#include "draw/draw_context.h"
#include "lp_screen.h"
#include "lp_context.h"
#include "lp_state.h"
@@ -51,6 +52,11 @@ void llvmpipe_bind_blend_state( struct pipe_context *pipe,
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ if (llvmpipe->blend == blend)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
llvmpipe->blend = blend;
llvmpipe->dirty |= LP_NEW_BLEND;
@@ -74,6 +80,11 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe,
if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0)
return;
+ if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
memcpy(&llvmpipe->blend_color, blend_color, sizeof *blend_color);
llvmpipe->dirty |= LP_NEW_BLEND_COLOR;
@@ -98,7 +109,12 @@ llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe,
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
- llvmpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil;
+ if (llvmpipe->depth_stencil == depth_stencil)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ llvmpipe->depth_stencil = depth_stencil;
llvmpipe->dirty |= LP_NEW_DEPTH_STENCIL_ALPHA;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index cc7b09fd4d1..78d046985b9 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -50,7 +50,7 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
{
const struct lp_fragment_shader *lpfs = llvmpipe->fs;
struct vertex_info *vinfo = &llvmpipe->vertex_info;
- const uint num = draw_num_vs_outputs(llvmpipe->draw);
+ const uint num = draw_num_shader_outputs(llvmpipe->draw);
uint i;
/* Tell setup to tell the draw module to simply emit the whole
@@ -123,9 +123,9 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
/* Search for each input in current vs output:
*/
inputs[i].src_index =
- draw_find_vs_output(llvmpipe->draw,
- lpfs->info.input_semantic_name[i],
- lpfs->info.input_semantic_index[i]);
+ draw_find_shader_output(llvmpipe->draw,
+ lpfs->info.input_semantic_name[i],
+ lpfs->info.input_semantic_index[i]);
}
lp_setup_set_fs_inputs(llvmpipe->setup,
@@ -170,26 +170,6 @@ compute_cliprect(struct llvmpipe_context *lp)
}
-#if 0
-static void
-update_culling(struct llvmpipe_context *lp)
-{
- struct lp_setup_context *setup = lp->setup;
-
- if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES &&
- lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL &&
- lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) {
- /* we'll do culling */
- setup->winding = lp->rasterizer->cull_mode;
- }
- else {
- /* 'draw' will do culling */
- setup->winding = PIPE_WINDING_NONE;
- }
-}
-#endif
-
-
/* Hopefully this will remain quite simple, otherwise need to pull in
* something like the state tracker mechanism.
*/
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 3ad58415e39..3a669ba859a 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -149,6 +149,20 @@ generate_depth(LLVMBuilderRef builder,
format_desc = util_format_description(key->zsbuf_format);
assert(format_desc);
+ /*
+ * Depths are expected to be between 0 and 1, even if they are stored in
+ * floats. Setting these bits here will ensure that the lp_build_conv() call
+ * below won't try to unnecessarily clamp the incoming values.
+ */
+ if(src_type.floating) {
+ src_type.sign = FALSE;
+ src_type.norm = TRUE;
+ }
+ else {
+ assert(!src_type.sign);
+ assert(src_type.norm);
+ }
+
/* Pick the depth type. */
dst_type = lp_depth_type(format_desc, src_type.width*src_type.length);
@@ -156,14 +170,11 @@ generate_depth(LLVMBuilderRef builder,
assert(dst_type.width == src_type.width);
assert(dst_type.length == src_type.length);
-#if 1
- src = lp_build_clamped_float_to_unsigned_norm(builder,
- src_type,
- dst_type.width,
- src);
-#else
lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1);
-#endif
+
+ dst_ptr = LLVMBuildBitCast(builder,
+ dst_ptr,
+ LLVMPointerType(lp_build_vec_type(dst_type), 0), "");
lp_build_depth_test(builder,
&key->depth,
@@ -505,6 +516,7 @@ generate_fragment(struct llvmpipe_context *lp,
if (LP_DEBUG & DEBUG_JIT) {
tgsi_dump(shader->base.tokens, 0);
if(key->depth.enabled) {
+ debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format));
debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
debug_printf("depth.writemask = %u\n", key->depth.writemask);
}
@@ -517,13 +529,40 @@ generate_fragment(struct llvmpipe_context *lp,
}
else if(key->blend.blend_enable) {
debug_printf("blend.rgb_func = %s\n", debug_dump_blend_func (key->blend.rgb_func, TRUE));
- debug_printf("blend.rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE));
- debug_printf("blend.rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE));
- debug_printf("blend.alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE));
- debug_printf("blend.alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE));
- debug_printf("blend.alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE));
+ debug_printf("rgb_src_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE));
+ debug_printf("rgb_dst_factor = %s\n", debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE));
+ debug_printf("alpha_func = %s\n", debug_dump_blend_func (key->blend.alpha_func, TRUE));
+ debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE));
+ debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE));
}
debug_printf("blend.colormask = 0x%x\n", key->blend.colormask);
+ for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
+ if(key->sampler[i].format) {
+ debug_printf("sampler[%u] = \n", i);
+ debug_printf(" .format = %s\n",
+ pf_name(key->sampler[i].format));
+ debug_printf(" .target = %s\n",
+ debug_dump_tex_target(key->sampler[i].target, TRUE));
+ debug_printf(" .pot = %u %u %u\n",
+ key->sampler[i].pot_width,
+ key->sampler[i].pot_height,
+ key->sampler[i].pot_depth);
+ debug_printf(" .wrap = %s %s %s\n",
+ debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
+ debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
+ debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
+ debug_printf(" .min_img_filter = %s\n",
+ debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
+ debug_printf(" .min_mip_filter = %s\n",
+ debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
+ debug_printf(" .mag_img_filter = %s\n",
+ debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
+ if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE)
+ debug_printf(" .compare_func = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
+ debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords);
+ debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter);
+ }
+ }
}
variant = CALLOC_STRUCT(lp_fragment_shader_variant);
@@ -702,10 +741,12 @@ generate_fragment(struct llvmpipe_context *lp,
* Translate the LLVM IR into machine code.
*/
+#ifdef DEBUG
if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
LLVMDumpValue(variant->function);
- abort();
+ assert(0);
}
+#endif
LLVMRunFunctionPassManager(screen->pass, variant->function);
@@ -751,7 +792,12 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
- llvmpipe->fs = (struct lp_fragment_shader *) fs;
+ if (llvmpipe->fs == fs)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ llvmpipe->fs = fs;
llvmpipe->dirty |= LP_NEW_FS;
}
@@ -766,6 +812,7 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
struct lp_fragment_shader_variant *variant;
assert(fs != llvmpipe->fs);
+ (void) llvmpipe;
variant = shader->variants;
while(variant) {
@@ -804,14 +851,14 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
if(llvmpipe->constants[shader].buffer == buffer)
return;
- if(shader == PIPE_SHADER_VERTEX)
- draw_flush(llvmpipe->draw);
+ draw_flush(llvmpipe->draw);
/* note: reference counting */
pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer);
if(shader == PIPE_SHADER_VERTEX) {
- draw_set_mapped_constant_buffer(llvmpipe->draw, data, size);
+ draw_set_mapped_constant_buffer(llvmpipe->draw, PIPE_SHADER_VERTEX,
+ data, size);
}
llvmpipe->dirty |= LP_NEW_CONSTANTS;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
index 282ed2e9ea3..7d4c310aae8 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
@@ -42,14 +42,17 @@ llvmpipe_create_rasterizer_state(struct pipe_context *pipe,
}
void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe,
- void *setup)
+ void *rasterizer)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ if (llvmpipe->rasterizer == rasterizer)
+ return;
+
/* pass-through to draw module */
- draw_set_rasterizer_state(llvmpipe->draw, setup);
+ draw_set_rasterizer_state(llvmpipe->draw, rasterizer);
- llvmpipe->rasterizer = (struct pipe_rasterizer_state *)setup;
+ llvmpipe->rasterizer = rasterizer;
/* Note: we can immediately set the triangle state here and
* not worry about binning because we handle culling during
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index e19394a4c92..976f81113fd 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -77,6 +77,34 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe,
void
+llvmpipe_bind_vertex_sampler_states(struct pipe_context *pipe,
+ unsigned num_samplers,
+ void **samplers)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ unsigned i;
+
+ assert(num_samplers <= PIPE_MAX_VERTEX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num_samplers == llvmpipe->num_vertex_samplers &&
+ !memcmp(llvmpipe->vertex_samplers, samplers, num_samplers * sizeof(void *)))
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ for (i = 0; i < num_samplers; ++i)
+ llvmpipe->vertex_samplers[i] = samplers[i];
+ for (i = num_samplers; i < PIPE_MAX_VERTEX_SAMPLERS; ++i)
+ llvmpipe->vertex_samplers[i] = NULL;
+
+ llvmpipe->num_vertex_samplers = num_samplers;
+
+ llvmpipe->dirty |= LP_NEW_SAMPLER;
+}
+
+
+void
llvmpipe_set_sampler_textures(struct pipe_context *pipe,
unsigned num, struct pipe_texture **texture)
{
@@ -105,6 +133,36 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe,
void
+llvmpipe_set_vertex_sampler_textures(struct pipe_context *pipe,
+ unsigned num_textures,
+ struct pipe_texture **textures)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ uint i;
+
+ assert(num_textures <= PIPE_MAX_VERTEX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num_textures == llvmpipe->num_vertex_textures &&
+ !memcmp(llvmpipe->vertex_textures, textures, num_textures * sizeof(struct pipe_texture *))) {
+ return;
+ }
+
+ draw_flush(llvmpipe->draw);
+
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ struct pipe_texture *tex = i < num_textures ? textures[i] : NULL;
+
+ pipe_texture_reference(&llvmpipe->vertex_textures[i], tex);
+ }
+
+ llvmpipe->num_vertex_textures = num_textures;
+
+ llvmpipe->dirty |= LP_NEW_TEXTURE;
+}
+
+
+void
llvmpipe_delete_sampler_state(struct pipe_context *pipe,
void *sampler)
{
diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c
index 957e947fe02..0e9f03b90b8 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c
@@ -37,6 +37,8 @@
#include "draw/draw_context.h"
+#include "util/u_format.h"
+
/**
* Set the framebuffer surface info: color buffers, zbuffer, stencil buffer.
@@ -57,8 +59,9 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
if (lp->framebuffer.zsbuf) {
int depth_bits;
double mrd;
- depth_bits = pf_get_component_bits(lp->framebuffer.zsbuf->format,
- PIPE_FORMAT_COMP_Z);
+ depth_bits = util_format_get_component_bits(lp->framebuffer.zsbuf->format,
+ UTIL_FORMAT_COLORSPACE_ZS,
+ 0);
if (depth_bits > 16) {
mrd = 0.0000001;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c
index 15c30296144..884e3878e62 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_vs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c
@@ -70,14 +70,18 @@ fail:
void
-llvmpipe_bind_vs_state(struct pipe_context *pipe, void *vs)
+llvmpipe_bind_vs_state(struct pipe_context *pipe, void *_vs)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ const struct lp_vertex_shader *vs = (const struct lp_vertex_shader *)_vs;
- llvmpipe->vs = (const struct lp_vertex_shader *)vs;
+ if (llvmpipe->vs == vs)
+ return;
- draw_bind_vertex_shader(llvmpipe->draw,
- (llvmpipe->vs ? llvmpipe->vs->draw_data : NULL));
+ draw_bind_vertex_shader(llvmpipe->draw,
+ vs ? vs->draw_data : NULL);
+
+ llvmpipe->vs = vs;
llvmpipe->dirty |= LP_NEW_VS;
}
@@ -92,5 +96,6 @@ llvmpipe_delete_vs_state(struct pipe_context *pipe, void *vs)
(struct lp_vertex_shader *)vs;
draw_delete_vertex_shader(llvmpipe->draw, state->draw_data);
+ FREE( (void *)state->shader.tokens );
FREE( state );
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h
index a88e110c663..39d80726e65 100644
--- a/src/gallium/drivers/llvmpipe/lp_test.h
+++ b/src/gallium/drivers/llvmpipe/lp_test.h
@@ -56,6 +56,9 @@
#include "lp_bld_type.h"
+#define LP_TEST_NUM_SAMPLES 32
+
+
void
write_tsv_header(FILE *fp);
@@ -68,17 +71,28 @@ boolean
test_all(unsigned verbose, FILE *fp);
+#if defined(PIPE_CC_MSVC)
+
+unsigned __int64 __rdtsc();
+#pragma intrinsic(__rdtsc)
+#define rdtsc() __rdtsc()
+
+#elif defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
+
static INLINE uint64_t
rdtsc(void)
{
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
uint32_t hi, lo;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
#else
- return 0;
+
+#define rdtsc() 0
+
#endif
-}
+
float
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index 94b661dcba4..29fff91981a 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -462,6 +462,7 @@ compute_blend_ref(const struct pipe_blend_state *blend,
}
+ALIGN_STACK
static boolean
test_one(unsigned verbose,
FILE *fp,
@@ -477,8 +478,8 @@ test_one(unsigned verbose,
char *error = NULL;
blend_test_ptr_t blend_test_ptr;
boolean success;
- const unsigned n = 32;
- int64_t cycles[n];
+ const unsigned n = LP_TEST_NUM_SAMPLES;
+ int64_t cycles[LP_TEST_NUM_SAMPLES];
double cycles_avg = 0.0;
unsigned i, j;
@@ -530,11 +531,11 @@ test_one(unsigned verbose,
success = TRUE;
for(i = 0; i < n && success; ++i) {
if(mode == AoS) {
- uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
int64_t start_counter = 0;
int64_t end_counter = 0;
@@ -595,11 +596,11 @@ test_one(unsigned verbose,
if(mode == SoA) {
const unsigned stride = type.length*type.width/8;
- uint8_t src[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t dst[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t con[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t res[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t ref[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
int64_t start_counter = 0;
int64_t end_counter = 0;
boolean mismatch;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index 9dcf58e5dcd..faddfb96779 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -142,6 +142,7 @@ add_conv_test(LLVMModuleRef module,
}
+ALIGN_STACK
static boolean
test_one(unsigned verbose,
FILE *fp,
@@ -156,8 +157,8 @@ test_one(unsigned verbose,
char *error = NULL;
conv_test_ptr_t conv_test_ptr;
boolean success;
- const unsigned n = 32;
- int64_t cycles[n];
+ const unsigned n = LP_TEST_NUM_SAMPLES;
+ int64_t cycles[LP_TEST_NUM_SAMPLES];
double cycles_avg = 0.0;
unsigned num_srcs;
unsigned num_dsts;
@@ -229,8 +230,8 @@ test_one(unsigned verbose,
for(i = 0; i < n && success; ++i) {
unsigned src_stride = src_type.length*src_type.width/8;
unsigned dst_stride = dst_type.length*dst_type.width/8;
- uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
- uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+ ALIGN16_ATTRIB uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+ ALIGN16_ATTRIB uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
int64_t start_counter = 0;
@@ -329,7 +330,7 @@ test_one(unsigned verbose,
fprintf(stderr, "conv.bc written\n");
fprintf(stderr, "Invoke as \"llc -o - conv.bc\"\n");
firsttime = FALSE;
- //abort();
+ /* abort(); */
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index ab80c0143f9..23ea9ebbe7d 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -35,9 +35,11 @@
#include <llvm-c/Target.h>
#include <llvm-c/Transforms/Scalar.h>
+#include "util/u_cpu_detect.h"
#include "util/u_format.h"
#include "lp_bld_format.h"
+#include "lp_test.h"
struct pixel_test_case
@@ -88,34 +90,62 @@ struct pixel_test_case test_cases[] =
};
-typedef void (*load_ptr_t)(const void *, float *);
+void
+write_tsv_header(FILE *fp)
+{
+ fprintf(fp,
+ "result\t"
+ "format\n");
+
+ fflush(fp);
+}
+
+
+static void
+write_tsv_row(FILE *fp,
+ const struct util_format_description *desc,
+ boolean success)
+{
+ fprintf(fp, "%s\t", success ? "pass" : "fail");
+
+ fprintf(fp, "%s\n", desc->name);
+
+ fflush(fp);
+}
+
+
+typedef void (*load_ptr_t)(const uint32_t packed, float *);
static LLVMValueRef
add_load_rgba_test(LLVMModuleRef module,
- enum pipe_format format)
+ const struct util_format_description *desc)
{
LLVMTypeRef args[2];
LLVMValueRef func;
- LLVMValueRef ptr;
+ LLVMValueRef packed;
LLVMValueRef rgba_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
LLVMValueRef rgba;
- args[0] = LLVMPointerType(LLVMInt8Type(), 0);
+ args[0] = LLVMInt32Type();
args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
func = LLVMAddFunction(module, "load", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
LLVMSetFunctionCallConv(func, LLVMCCallConv);
- ptr = LLVMGetParam(func, 0);
+ packed = LLVMGetParam(func, 0);
rgba_ptr = LLVMGetParam(func, 1);
block = LLVMAppendBasicBlock(func, "entry");
builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, block);
- rgba = lp_build_load_rgba_aos(builder, format, ptr);
+ if(desc->block.bits < 32)
+ packed = LLVMBuildTrunc(builder, packed, LLVMIntType(desc->block.bits), "");
+
+ rgba = lp_build_unpack_rgba_aos(builder, desc, packed);
+
LLVMBuildStore(builder, rgba, rgba_ptr);
LLVMBuildRetVoid(builder);
@@ -125,27 +155,28 @@ add_load_rgba_test(LLVMModuleRef module,
}
-typedef void (*store_ptr_t)(void *, const float *);
+typedef void (*store_ptr_t)(uint32_t *, const float *);
static LLVMValueRef
add_store_rgba_test(LLVMModuleRef module,
- enum pipe_format format)
+ const struct util_format_description *desc)
{
LLVMTypeRef args[2];
LLVMValueRef func;
- LLVMValueRef ptr;
+ LLVMValueRef packed_ptr;
LLVMValueRef rgba_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
LLVMValueRef rgba;
+ LLVMValueRef packed;
- args[0] = LLVMPointerType(LLVMInt8Type(), 0);
+ args[0] = LLVMPointerType(LLVMInt32Type(), 0);
args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
func = LLVMAddFunction(module, "store", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
LLVMSetFunctionCallConv(func, LLVMCCallConv);
- ptr = LLVMGetParam(func, 0);
+ packed_ptr = LLVMGetParam(func, 0);
rgba_ptr = LLVMGetParam(func, 1);
block = LLVMAppendBasicBlock(func, "entry");
@@ -154,7 +185,12 @@ add_store_rgba_test(LLVMModuleRef module,
rgba = LLVMBuildLoad(builder, rgba_ptr, "");
- lp_build_store_rgba_aos(builder, format, ptr, rgba);
+ packed = lp_build_pack_rgba_aos(builder, desc, rgba);
+
+ if(desc->block.bits < 32)
+ packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
+
+ LLVMBuildStore(builder, packed, packed_ptr);
LLVMBuildRetVoid(builder);
@@ -163,8 +199,9 @@ add_store_rgba_test(LLVMModuleRef module,
}
+ALIGN_STACK
static boolean
-test_format(const struct pixel_test_case *test)
+test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test)
{
LLVMModuleRef module = NULL;
LLVMValueRef load = NULL;
@@ -186,8 +223,8 @@ test_format(const struct pixel_test_case *test)
module = LLVMModuleCreateWithName("test");
- load = add_load_rgba_test(module, test->format);
- store = add_store_rgba_test(module, test->format);
+ load = add_load_rgba_test(module, desc);
+ store = add_store_rgba_test(module, desc);
if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
LLVMDumpModule(module);
@@ -223,7 +260,7 @@ test_format(const struct pixel_test_case *test)
memset(unpacked, 0, sizeof unpacked);
packed = 0;
- load_ptr(&test->packed, unpacked);
+ load_ptr(test->packed, unpacked);
store_ptr(&packed, unpacked);
success = TRUE;
@@ -249,23 +286,29 @@ test_format(const struct pixel_test_case *test)
if(pass)
LLVMDisposePassManager(pass);
+ if(fp)
+ write_tsv_row(fp, desc, success);
+
return success;
}
-int main(int argc, char **argv)
+boolean
+test_all(unsigned verbose, FILE *fp)
{
unsigned i;
- int ret;
-
-#ifdef LLVM_NATIVE_ARCH
- LLVMLinkInJIT();
- LLVMInitializeNativeTarget();
-#endif
+ bool success = TRUE;
for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i)
- if(!test_format(&test_cases[i]))
- ret = 1;
+ if(!test_format(verbose, fp, &test_cases[i]))
+ success = FALSE;
- return ret;
+ return success;
+}
+
+
+boolean
+test_some(unsigned verbose, FILE *fp, unsigned long n)
+{
+ return test_all(verbose, fp);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c
index f07fa256f16..314544aa9a6 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_main.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_main.c
@@ -34,10 +34,25 @@
*/
+#include "util/u_cpu_detect.h"
+
#include "lp_bld_const.h"
+#include "lp_bld_misc.h"
#include "lp_test.h"
+#ifdef PIPE_CC_MSVC
+static INLINE double
+round(double x)
+{
+ if (x >= 0.0)
+ return floor(x + 0.5);
+ else
+ return ceil(x - 0.5);
+}
+#endif
+
+
void
dump_type(FILE *fp,
struct lp_type type)
@@ -365,10 +380,10 @@ int main(int argc, char **argv)
n = atoi(argv[i]);
}
-#ifdef LLVM_NATIVE_ARCH
LLVMLinkInJIT();
LLVMInitializeNativeTarget();
-#endif
+
+ util_cpu_detect();
if(fp) {
/* Warm up the caches */
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
index dfc9c0e6f04..cb59a94464a 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
@@ -31,32 +31,10 @@
#include <llvm-c/Core.h>
-#include "tgsi/tgsi_exec.h"
-
struct lp_sampler_static_state;
-
-extern void
-lp_get_samples(struct tgsi_sampler *tgsi_sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE]);
-
-
-/**
- * Texture sampling code generator that just calls lp_get_samples C function
- * for the actual sampling computation.
- *
- * @param context_ptr LLVM value with the pointer to the struct lp_jit_context.
- */
-struct lp_build_sampler_soa *
-lp_c_sampler_soa_create(LLVMValueRef context_ptr);
-
-
/**
* Pure-LLVM texture sampling code generator.
*
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 1682e37354a..2462378152a 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -34,6 +34,8 @@
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
#include "pipe/internal/p_winsys_screen.h"
+
+#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -47,7 +49,6 @@
/* Simple, maximally packed layout.
*/
-
/* Conventional allocation path for non-display textures:
*/
static boolean
@@ -56,31 +57,31 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
{
struct pipe_texture *pt = &lpt->base;
unsigned level;
- unsigned width = pt->width[0];
- unsigned height = pt->height[0];
- unsigned depth = pt->depth[0];
+ unsigned width = pt->width0;
+ unsigned height = pt->height0;
+ unsigned depth = pt->depth0;
unsigned buffer_size = 0;
- pf_get_block(lpt->base.format, &lpt->base.block);
-
for (level = 0; level <= pt->last_level; level++) {
- pt->width[level] = width;
- pt->height[level] = height;
- pt->depth[level] = depth;
- pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width);
- pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height);
- lpt->stride[level] = align(pt->nblocksx[level]*pt->block.size, 16);
+ unsigned nblocksx, nblocksy;
+
+ /* Allocate storage for whole quads. This is particularly important
+ * for depth surfaces, which are currently stored in a swizzled format. */
+ nblocksx = util_format_get_nblocksx(pt->format, align(width, 2));
+ nblocksy = util_format_get_nblocksy(pt->format, align(height, 2));
+
+ lpt->stride[level] = align(nblocksx * util_format_get_blocksize(pt->format), 16);
lpt->level_offset[level] = buffer_size;
- buffer_size += (pt->nblocksy[level] *
+ buffer_size += (nblocksy *
((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) *
lpt->stride[level]);
- width = minify(width);
- height = minify(height);
- depth = minify(depth);
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ depth = u_minify(depth, 1);
}
lpt->data = align_malloc(buffer_size, 16);
@@ -94,14 +95,10 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
{
struct llvmpipe_winsys *winsys = screen->winsys;
- pf_get_block(lpt->base.format, &lpt->base.block);
- lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]);
- lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]);
-
lpt->dt = winsys->displaytarget_create(winsys,
lpt->base.format,
- lpt->base.width[0],
- lpt->base.height[0],
+ lpt->base.width0,
+ lpt->base.height0,
16,
&lpt->stride[0] );
@@ -163,7 +160,7 @@ llvmpipe_texture_blanket(struct pipe_screen * screen,
/* Only supports one type */
if (base->target != PIPE_TEXTURE_2D ||
base->last_level != 0 ||
- base->depth[0] != 1) {
+ base->depth0 != 1) {
return NULL;
}
@@ -174,8 +171,6 @@ llvmpipe_texture_blanket(struct pipe_screen * screen,
lpt->base = *base;
pipe_reference_init(&lpt->base.reference, 1);
lpt->base.screen = screen;
- lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]);
- lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]);
lpt->stride[0] = stride[0];
pipe_buffer_reference(&lpt->buffer, buffer);
@@ -220,8 +215,8 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen,
pipe_reference_init(&ps->reference, 1);
pipe_texture_reference(&ps->texture, pt);
ps->format = pt->format;
- ps->width = pt->width[level];
- ps->height = pt->height[level];
+ ps->width = u_minify(pt->width0, level);
+ ps->height = u_minify(pt->height0, level);
ps->offset = lpt->level_offset[level];
ps->usage = usage;
@@ -249,11 +244,17 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen,
ps->level = level;
ps->zslice = zslice;
+ /* XXX shouldn't that rather be
+ tex_height = align(ps->height, 2);
+ to account for alignment done in llvmpipe_texture_layout ?
+ */
if (pt->target == PIPE_TEXTURE_CUBE) {
- ps->offset += face * pt->nblocksy[level] * lpt->stride[level];
+ unsigned tex_height = ps->height;
+ ps->offset += face * util_format_get_nblocksy(pt->format, tex_height) * lpt->stride[level];
}
else if (pt->target == PIPE_TEXTURE_3D) {
- ps->offset += zslice * pt->nblocksy[level] * lpt->stride[level];
+ unsigned tex_height = ps->height;
+ ps->offset += zslice * util_format_get_nblocksy(pt->format, tex_height) * lpt->stride[level];
}
else {
assert(face == 0);
@@ -294,14 +295,10 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen,
if (lpt) {
struct pipe_transfer *pt = &lpt->base;
pipe_texture_reference(&pt->texture, texture);
- pt->format = texture->format;
- pt->block = texture->block;
pt->x = x;
pt->y = y;
pt->width = w;
pt->height = h;
- pt->nblocksx = texture->nblocksx[level];
- pt->nblocksy = texture->nblocksy[level];
pt->stride = lptex->stride[level];
pt->usage = usage;
pt->face = face;
@@ -310,11 +307,17 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen,
lpt->offset = lptex->level_offset[level];
+ /* XXX shouldn't that rather be
+ tex_height = align(u_minify(texture->height0, level), 2)
+ to account for alignment done in llvmpipe_texture_layout ?
+ */
if (texture->target == PIPE_TEXTURE_CUBE) {
- lpt->offset += face * pt->nblocksy * pt->stride;
+ unsigned tex_height = u_minify(texture->height0, level);
+ lpt->offset += face * util_format_get_nblocksy(texture->format, tex_height) * pt->stride;
}
else if (texture->target == PIPE_TEXTURE_3D) {
- lpt->offset += zslice * pt->nblocksy * pt->stride;
+ unsigned tex_height = u_minify(texture->height0, level);
+ lpt->offset += zslice * util_format_get_nblocksy(texture->format, tex_height) * pt->stride;
}
else {
assert(face == 0);
@@ -346,9 +349,11 @@ llvmpipe_transfer_map( struct pipe_screen *_screen,
struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
ubyte *map, *xfer_map;
struct llvmpipe_texture *lpt;
+ enum pipe_format format;
assert(transfer->texture);
lpt = llvmpipe_texture(transfer->texture);
+ format = lpt->base.format;
if(lpt->dt) {
struct llvmpipe_winsys *winsys = screen->winsys;
@@ -372,8 +377,8 @@ llvmpipe_transfer_map( struct pipe_screen *_screen,
}
xfer_map = map + llvmpipe_transfer(transfer)->offset +
- transfer->y / transfer->block.height * transfer->stride +
- transfer->x / transfer->block.width * transfer->block.size;
+ transfer->y / util_format_get_blockheight(format) * transfer->stride +
+ transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
/*printf("map = %p xfer map = %p\n", map, xfer_map);*/
return xfer_map;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
index 660cc30c820..1b7be3cce0d 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
@@ -29,7 +29,7 @@
#define LP_TILE_SOA_H
#include "pipe/p_compiler.h"
-#include "tgsi/tgsi_exec.h" // for NUM_CHANNELS
+#include "tgsi/tgsi_exec.h" /* for NUM_CHANNELS */
#ifdef __cplusplus
diff --git a/src/gallium/drivers/llvmpipe/lp_winsys.h b/src/gallium/drivers/llvmpipe/lp_winsys.h
index 595481c2cbc..74b472b6531 100644
--- a/src/gallium/drivers/llvmpipe/lp_winsys.h
+++ b/src/gallium/drivers/llvmpipe/lp_winsys.h
@@ -35,7 +35,7 @@
#define LP_WINSYS_H
-#include "pipe/p_compiler.h" // for boolean
+#include "pipe/p_compiler.h" /* for boolean */
#include "pipe/p_format.h"
diff --git a/src/gallium/drivers/nouveau/Makefile b/src/gallium/drivers/nouveau/Makefile
index dbe8a6e7bf5..0cb66041d50 100644
--- a/src/gallium/drivers/nouveau/Makefile
+++ b/src/gallium/drivers/nouveau/Makefile
@@ -3,6 +3,7 @@ include $(TOP)/configs/current
LIBNAME = nouveau
-C_SOURCES = nouveau_screen.c
+C_SOURCES = nouveau_screen.c \
+ nouveau_context.c
include ../../Makefile.template
diff --git a/src/gallium/drivers/nouveau/nouveau_context.c b/src/gallium/drivers/nouveau/nouveau_context.c
new file mode 100644
index 00000000000..23443869e68
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_context.c
@@ -0,0 +1,41 @@
+#include <pipe/p_defines.h>
+#include <pipe/p_context.h>
+
+#include "nouveau/nouveau_screen.h"
+#include "nouveau/nouveau_context.h"
+
+#include "nouveau/nouveau_bo.h"
+
+static unsigned int
+nouveau_reference_flags(struct nouveau_bo *bo)
+{
+ uint32_t bo_flags;
+ int flags = 0;
+
+ bo_flags = nouveau_bo_pending(bo);
+ if (bo_flags & NOUVEAU_BO_RD)
+ flags |= PIPE_REFERENCED_FOR_READ;
+ if (bo_flags & NOUVEAU_BO_WR)
+ flags |= PIPE_REFERENCED_FOR_WRITE;
+
+ return flags;
+}
+
+unsigned int
+nouveau_is_texture_referenced(struct pipe_context *pipe,
+ struct pipe_texture *pt,
+ unsigned face, unsigned level)
+{
+ struct nouveau_miptree *mt = nouveau_miptree(pt);
+
+ return nouveau_reference_flags(mt->bo);
+}
+
+unsigned int
+nouveau_is_buffer_referenced(struct pipe_context *pipe, struct pipe_buffer *pb)
+{
+ struct nouveau_bo *bo = nouveau_bo(pb);
+
+ return nouveau_reference_flags(bo);
+}
+
diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h
new file mode 100644
index 00000000000..6a28d40da7b
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_context.h
@@ -0,0 +1,11 @@
+#ifndef __NOUVEAU_CONTEXT_H__
+#define __NOUVEAU_CONTEXT_H__
+
+unsigned int
+nouveau_is_texture_referenced(struct pipe_context *, struct pipe_texture *,
+ unsigned face, unsigned level);
+
+unsigned int
+nouveau_is_buffer_referenced(struct pipe_context *, struct pipe_buffer *);
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_push.h b/src/gallium/drivers/nouveau/nouveau_push.h
deleted file mode 100644
index 9c235080a55..00000000000
--- a/src/gallium/drivers/nouveau/nouveau_push.h
+++ /dev/null
@@ -1,93 +0,0 @@
-#ifndef __NOUVEAU_PUSH_H__
-#define __NOUVEAU_PUSH_H__
-
-#include "nouveau/nouveau_winsys.h"
-
-#ifndef NOUVEAU_PUSH_CONTEXT
-#error undefined push context
-#endif
-
-#define OUT_RING(data) do { \
- NOUVEAU_PUSH_CONTEXT(pc); \
- (*pc->base.channel->pushbuf->cur++) = (data); \
-} while(0)
-
-#define OUT_RINGp(src,size) do { \
- NOUVEAU_PUSH_CONTEXT(pc); \
- memcpy(pc->base.channel->pushbuf->cur, (src), (size) * 4); \
- pc->base.channel->pushbuf->cur += (size); \
-} while(0)
-
-#define OUT_RINGf(data) do { \
- union { float v; uint32_t u; } c; \
- c.v = (data); \
- OUT_RING(c.u); \
-} while(0)
-
-#define BEGIN_RING(obj,mthd,size) do { \
- NOUVEAU_PUSH_CONTEXT(pc); \
- struct nouveau_channel *chan = pc->base.channel; \
- if (chan->pushbuf->remaining < ((size) + 1)) \
- nouveau_pushbuf_flush(chan, ((size) + 1)); \
- OUT_RING((pc->obj->subc << 13) | ((size) << 18) | (mthd)); \
- chan->pushbuf->remaining -= ((size) + 1); \
-} while(0)
-
-#define BEGIN_RING_NI(obj,mthd,size) do { \
- BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \
-} while(0)
-
-static inline void
-DO_FIRE_RING(struct nouveau_channel *chan, struct pipe_fence_handle **fence)
-{
- nouveau_pushbuf_flush(chan, 0);
- if (fence)
- *fence = NULL;
-}
-
-#define FIRE_RING(fence) do { \
- NOUVEAU_PUSH_CONTEXT(pc); \
- DO_FIRE_RING(pc->base.channel, fence); \
-} while(0)
-
-#define OUT_RELOC(bo,data,flags,vor,tor) do { \
- NOUVEAU_PUSH_CONTEXT(pc); \
- struct nouveau_channel *chan = pc->base.channel; \
- nouveau_pushbuf_emit_reloc(chan, chan->pushbuf->cur++, nouveau_bo(bo), \
- (data), 0, (flags), (vor), (tor)); \
-} while(0)
-
-/* Raw data + flags depending on FB/TT buffer */
-#define OUT_RELOCd(bo,data,flags,vor,tor) do { \
- OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \
-} while(0)
-
-/* FB/TT object handle */
-#define OUT_RELOCo(bo,flags) do { \
- OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \
- pc->base.channel->vram->handle, \
- pc->base.channel->gart->handle); \
-} while(0)
-
-/* Low 32-bits of offset */
-#define OUT_RELOCl(bo,delta,flags) do { \
- OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \
-} while(0)
-
-/* High 32-bits of offset */
-#define OUT_RELOCh(bo,delta,flags) do { \
- OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \
-} while(0)
-
-/* A reloc which'll recombine into a NV_DMA_METHOD packet header */
-#define OUT_RELOCm(bo, flags, obj, mthd, size) do { \
- NOUVEAU_PUSH_CONTEXT(pc); \
- struct nouveau_channel *chan = pc->base.channel; \
- if (chan->pushbuf->remaining < ((size) + 1)) \
- nouveau_pushbuf_flush(chan, ((size) + 1)); \
- OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd), \
- (flags), 0, 0); \
- chan->pushbuf->remaining -= ((size) + 1); \
-} while(0)
-
-#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index e4cf91c005c..7ebc94ed6c7 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -31,7 +31,7 @@ nouveau_screen_bo_skel(struct pipe_screen *pscreen, struct nouveau_bo *bo,
unsigned alignment, unsigned usage, unsigned size)
{
struct pipe_buffer *pb;
-
+
pb = CALLOC(1, sizeof(struct pipe_buffer)+sizeof(struct nouveau_bo *));
if (!pb) {
nouveau_bo_ref(NULL, &bo);
@@ -127,8 +127,18 @@ nouveau_screen_bo_map(struct pipe_screen *pscreen, struct pipe_buffer *pb,
unsigned usage)
{
struct nouveau_bo *bo = nouveau_bo(pb);
+ struct nouveau_screen *nscreen = nouveau_screen(pscreen);
int ret;
+ if (nscreen->pre_pipebuffer_map_callback) {
+ ret = nscreen->pre_pipebuffer_map_callback(pscreen, pb, usage);
+ if (ret) {
+ debug_printf("pre_pipebuffer_map_callback failed %d\n",
+ ret);
+ return NULL;
+ }
+ }
+
ret = nouveau_bo_map(bo, nouveau_screen_map_flags(usage));
if (ret) {
debug_printf("map failed: %d\n", ret);
@@ -143,11 +153,22 @@ nouveau_screen_bo_map_range(struct pipe_screen *pscreen, struct pipe_buffer *pb,
unsigned offset, unsigned length, unsigned usage)
{
struct nouveau_bo *bo = nouveau_bo(pb);
+ struct nouveau_screen *nscreen = nouveau_screen(pscreen);
uint32_t flags = nouveau_screen_map_flags(usage);
int ret;
+ if (nscreen->pre_pipebuffer_map_callback) {
+ ret = nscreen->pre_pipebuffer_map_callback(pscreen, pb, usage);
+ if (ret) {
+ debug_printf("pre_pipebuffer_map_callback failed %d\n",
+ ret);
+ return NULL;
+ }
+ }
+
ret = nouveau_bo_map_range(bo, offset, length, flags);
if (ret) {
+ nouveau_bo_unmap(bo);
if (!(flags & NOUVEAU_BO_NOWAIT) || ret != -EBUSY)
debug_printf("map_range failed: %d\n", ret);
return NULL;
@@ -239,5 +260,6 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
void
nouveau_screen_fini(struct nouveau_screen *screen)
{
+ nouveau_channel_free(&screen->channel);
}
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index ebfc67ad1c1..a7927d88dfc 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -5,6 +5,9 @@ struct nouveau_screen {
struct pipe_screen base;
struct nouveau_device *device;
struct nouveau_channel *channel;
+
+ int (*pre_pipebuffer_map_callback) (struct pipe_screen *pscreen,
+ struct pipe_buffer *pb, unsigned usage);
};
static inline struct nouveau_screen *
diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h
index b595405357f..e844f6abb3d 100644
--- a/src/gallium/drivers/nouveau/nouveau_stateobj.h
+++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h
@@ -3,41 +3,95 @@
#include "util/u_debug.h"
+#ifdef DEBUG
+#define DEBUG_NOUVEAU_STATEOBJ
+#endif /* DEBUG */
+
struct nouveau_stateobj_reloc {
struct nouveau_bo *bo;
- unsigned offset;
- unsigned packet;
+ struct nouveau_grobj *gr;
+ uint32_t push_offset;
+ uint32_t mthd;
- unsigned data;
+ uint32_t data;
unsigned flags;
unsigned vor;
unsigned tor;
};
+struct nouveau_stateobj_start {
+ struct nouveau_grobj *gr;
+ uint32_t mthd;
+ uint32_t size;
+ unsigned offset;
+};
+
struct nouveau_stateobj {
struct pipe_reference reference;
- unsigned *push;
+ struct nouveau_stateobj_start *start;
struct nouveau_stateobj_reloc *reloc;
- unsigned *cur;
- unsigned cur_packet;
+ /* Common memory pool for data. */
+ uint32_t *pool;
+ unsigned pool_cur;
+
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ unsigned start_alloc;
+ unsigned reloc_alloc;
+ unsigned pool_alloc;
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
+ unsigned total; /* includes begin_ring */
+ unsigned cur; /* excludes begin_ring, offset from "cur_start" */
+ unsigned cur_start;
unsigned cur_reloc;
};
+static INLINE void
+so_dump(struct nouveau_stateobj *so)
+{
+ unsigned i, nr, total = 0;
+
+ for (i = 0; i < so->cur_start; i++) {
+ if (so->start[i].gr->subc > -1)
+ debug_printf("+0x%04x: 0x%08x\n", total++,
+ (so->start[i].size << 18) | (so->start[i].gr->subc << 13)
+ | so->start[i].mthd);
+ else
+ debug_printf("+0x%04x: 0x%08x\n", total++,
+ (so->start[i].size << 18) | so->start[i].mthd);
+ for (nr = 0; nr < so->start[i].size; nr++, total++)
+ debug_printf("+0x%04x: 0x%08x\n", total,
+ so->pool[so->start[i].offset + nr]);
+ }
+}
+
static INLINE struct nouveau_stateobj *
-so_new(unsigned push, unsigned reloc)
+so_new(unsigned start, unsigned push, unsigned reloc)
{
struct nouveau_stateobj *so;
so = MALLOC(sizeof(struct nouveau_stateobj));
pipe_reference_init(&so->reference, 1);
- so->push = MALLOC(sizeof(unsigned) * push);
- so->reloc = MALLOC(sizeof(struct nouveau_stateobj_reloc) * reloc);
+ so->total = so->cur = so->cur_start = so->cur_reloc = 0;
+
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ so->start_alloc = start;
+ so->reloc_alloc = reloc;
+ so->pool_alloc = push;
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
- so->cur = so->push;
- so->cur_reloc = so->cur_packet = 0;
+ so->start = MALLOC(start * sizeof(struct nouveau_stateobj_start));
+ so->reloc = MALLOC(reloc * sizeof(struct nouveau_stateobj_reloc));
+ so->pool = MALLOC(push * sizeof(uint32_t));
+ so->pool_cur = 0;
+
+ if (!so->start || !so->reloc || !so->pool) {
+ debug_printf("malloc failed\n");
+ assert(0);
+ }
return so;
}
@@ -48,62 +102,128 @@ so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso)
struct nouveau_stateobj *so = *pso;
int i;
- if (pipe_reference((struct pipe_reference**)pso, &ref->reference)) {
- free(so->push);
+ if (pipe_reference(&(*pso)->reference, &ref->reference)) {
+ FREE(so->start);
for (i = 0; i < so->cur_reloc; i++)
nouveau_bo_ref(NULL, &so->reloc[i].bo);
- free(so->reloc);
- free(so);
+ FREE(so->reloc);
+ FREE(so->pool);
+ FREE(so);
}
+ *pso = ref;
}
static INLINE void
-so_data(struct nouveau_stateobj *so, unsigned data)
+so_data(struct nouveau_stateobj *so, uint32_t data)
{
- (*so->cur++) = (data);
- so->cur_packet += 4;
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ if (so->cur >= so->start[so->cur_start - 1].size) {
+ debug_printf("exceeding specified size\n");
+ assert(0);
+ }
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
+ so->pool[so->start[so->cur_start - 1].offset + so->cur++] = data;
}
static INLINE void
-so_datap(struct nouveau_stateobj *so, unsigned *data, unsigned size)
+so_datap(struct nouveau_stateobj *so, uint32_t *data, unsigned size)
{
- so->cur_packet += (4 * size);
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ if ((so->cur + size) > so->start[so->cur_start - 1].size) {
+ debug_printf("exceeding specified size\n");
+ assert(0);
+ }
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
while (size--)
- (*so->cur++) = (*data++);
+ so->pool[so->start[so->cur_start - 1].offset + so->cur++] =
+ *data++;
}
static INLINE void
so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr,
unsigned mthd, unsigned size)
{
- so->cur_packet = (gr->subc << 13) | (1 << 18) | (mthd - 4);
- so_data(so, (gr->subc << 13) | (size << 18) | mthd);
+ struct nouveau_stateobj_start *start;
+
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ if (so->start_alloc <= so->cur_start) {
+ debug_printf("exceeding num_start size\n");
+ assert(0);
+ } else
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+ start = so->start;
+
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ if (so->cur_start > 0 && start[so->cur_start - 1].size > so->cur) {
+ debug_printf("previous so_method was not filled\n");
+ assert(0);
+ }
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
+ so->start = start;
+ start[so->cur_start].gr = gr;
+ start[so->cur_start].mthd = mthd;
+ start[so->cur_start].size = size;
+
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ if (so->pool_alloc < (size + so->pool_cur)) {
+ debug_printf("exceeding num_pool size\n");
+ assert(0);
+ }
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
+ start[so->cur_start].offset = so->pool_cur;
+ so->pool_cur += size;
+
+ so->cur_start++;
+ /* The 1 is for *this* begin_ring. */
+ so->total += so->cur + 1;
+ so->cur = 0;
}
static INLINE void
so_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo,
unsigned data, unsigned flags, unsigned vor, unsigned tor)
{
- struct nouveau_stateobj_reloc *r = &so->reloc[so->cur_reloc++];
-
- r->bo = NULL;
- nouveau_bo_ref(bo, &r->bo);
- r->offset = so->cur - so->push;
- r->packet = so->cur_packet;
- r->data = data;
- r->flags = flags;
- r->vor = vor;
- r->tor = tor;
+ struct nouveau_stateobj_reloc *r;
+
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ if (so->reloc_alloc <= so->cur_reloc) {
+ debug_printf("exceeding num_reloc size\n");
+ assert(0);
+ } else
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+ r = so->reloc;
+
+ so->reloc = r;
+ r[so->cur_reloc].bo = NULL;
+ nouveau_bo_ref(bo, &(r[so->cur_reloc].bo));
+ r[so->cur_reloc].gr = so->start[so->cur_start-1].gr;
+ r[so->cur_reloc].push_offset = so->total + so->cur;
+ r[so->cur_reloc].data = data;
+ r[so->cur_reloc].flags = flags;
+ r[so->cur_reloc].mthd = so->start[so->cur_start-1].mthd +
+ (so->cur << 2);
+ r[so->cur_reloc].vor = vor;
+ r[so->cur_reloc].tor = tor;
+
so_data(so, data);
+ so->cur_reloc++;
}
-static INLINE void
-so_dump(struct nouveau_stateobj *so)
+/* Determine if this buffer object is referenced by this state object. */
+static INLINE boolean
+so_bo_is_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo)
{
- unsigned i, nr = so->cur - so->push;
+ int i;
+
+ for (i = 0; i < so->cur_reloc; i++)
+ if (so->reloc[i].bo == bo)
+ return true;
- for (i = 0; i < nr; i++)
- debug_printf("+0x%04x: 0x%08x\n", i, so->push[i]);
+ return false;
}
static INLINE void
@@ -111,48 +231,95 @@ so_emit(struct nouveau_channel *chan, struct nouveau_stateobj *so)
{
struct nouveau_pushbuf *pb = chan->pushbuf;
unsigned nr, i;
+ int ret = 0;
- nr = so->cur - so->push;
- if (pb->remaining < nr)
- nouveau_pushbuf_flush(chan, nr);
- pb->remaining -= nr;
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ if (so->start[so->cur_start - 1].size > so->cur) {
+ debug_printf("emit: previous so_method was not filled\n");
+ assert(0);
+ }
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
+ /* We cannot update total in case we so_emit again. */
+ nr = so->total + so->cur;
+
+ /* This will flush if we need space.
+ * We don't actually need the marker.
+ */
+ if ((ret = nouveau_pushbuf_marker_emit(chan, nr, so->cur_reloc))) {
+ debug_printf("so_emit failed marker emit with error %d\n", ret);
+ assert(0);
+ }
+
+ /* Submit data. This will ensure proper binding of objects. */
+ for (i = 0; i < so->cur_start; i++) {
+ BEGIN_RING(chan, so->start[i].gr, so->start[i].mthd, so->start[i].size);
+ OUT_RINGp(chan, &(so->pool[so->start[i].offset]), so->start[i].size);
+ }
- memcpy(pb->cur, so->push, nr * 4);
for (i = 0; i < so->cur_reloc; i++) {
struct nouveau_stateobj_reloc *r = &so->reloc[i];
- nouveau_pushbuf_emit_reloc(chan, pb->cur + r->offset,
- r->bo, r->data, 0, r->flags,
- r->vor, r->tor);
+ if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur - nr +
+ r->push_offset, r->bo, r->data,
+ 0, r->flags, r->vor, r->tor))) {
+ debug_printf("so_emit failed reloc with error %d\n", ret);
+ assert(0);
+ }
}
- pb->cur += nr;
}
static INLINE void
so_emit_reloc_markers(struct nouveau_channel *chan, struct nouveau_stateobj *so)
{
struct nouveau_pushbuf *pb = chan->pushbuf;
+ struct nouveau_grobj *gr = NULL;
unsigned i;
+ int ret = 0;
if (!so)
return;
- i = so->cur_reloc << 1;
- if (pb->remaining < i)
- nouveau_pushbuf_flush(chan, i);
- pb->remaining -= i;
-
+ /* If we need to flush in flush notify, then we have a problem anyway. */
for (i = 0; i < so->cur_reloc; i++) {
struct nouveau_stateobj_reloc *r = &so->reloc[i];
- nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, r->packet, 0,
- (r->flags & (NOUVEAU_BO_VRAM |
- NOUVEAU_BO_GART |
- NOUVEAU_BO_RDWR)) |
- NOUVEAU_BO_DUMMY, 0, 0);
- nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, r->data, 0,
- r->flags | NOUVEAU_BO_DUMMY,
- r->vor, r->tor);
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+ if (r->mthd & 0x40000000) {
+ debug_printf("error: NI mthd 0x%08X\n", r->mthd);
+ continue;
+ }
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
+ /* The object needs to be bound and the system must know the
+ * subchannel is being used. Otherwise it will discard it.
+ */
+ if (gr != r->gr) {
+ BEGIN_RING(chan, r->gr, 0x100, 1);
+ OUT_RING(chan, 0);
+ gr = r->gr;
+ }
+
+ /* Some relocs really don't like to be hammered,
+ * NOUVEAU_BO_DUMMY makes sure it only
+ * happens when needed.
+ */
+ ret = OUT_RELOC(chan, r->bo, (r->gr->subc << 13) | (1<< 18) |
+ r->mthd, (r->flags & (NOUVEAU_BO_VRAM | NOUVEAU_BO_GART
+ | NOUVEAU_BO_RDWR)) | NOUVEAU_BO_DUMMY, 0, 0);
+ if (ret) {
+ debug_printf("OUT_RELOC failed %d\n", ret);
+ assert(0);
+ }
+
+ ret = OUT_RELOC(chan, r->bo, r->data, r->flags |
+ NOUVEAU_BO_DUMMY, r->vor, r->tor);
+ if (ret) {
+ debug_printf("OUT_RELOC failed %d\n", ret);
+ assert(0);
+ }
+
+ pb->remaining -= 2;
}
}
diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
index 42c77e5e778..4c3e08a43f5 100644
--- a/src/gallium/drivers/nouveau/nouveau_winsys.h
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -23,6 +23,9 @@
#define NOUVEAU_BUFFER_USAGE_ZETA (1 << 17)
#define NOUVEAU_BUFFER_USAGE_TRANSFER (1 << 18)
+/* use along with GPU_WRITE for 2D-only writes */
+#define NOUVEAU_BUFFER_USAGE_NO_RENDER (1 << 19)
+
extern struct pipe_screen *
nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c
index 17166c9f51d..edd96859cf8 100644
--- a/src/gallium/drivers/nv04/nv04_context.c
+++ b/src/gallium/drivers/nv04/nv04_context.c
@@ -10,10 +10,14 @@ nv04_flush(struct pipe_context *pipe, unsigned flags,
struct pipe_fence_handle **fence)
{
struct nv04_context *nv04 = nv04_context(pipe);
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
draw_flush(nv04->draw);
- FIRE_RING(fence);
+ FIRE_RING(chan);
+ if (fence)
+ *fence = NULL;
}
static void
@@ -27,67 +31,42 @@ nv04_destroy(struct pipe_context *pipe)
FREE(nv04);
}
-static void
-nv04_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
-{
-}
-
static boolean
nv04_init_hwctx(struct nv04_context *nv04)
{
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
+
// requires a valid handle
-// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOTIFY, 1);
+// BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_NOTIFY, 1);
// OUT_RING(0);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOP, 1);
- OUT_RING(0);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_NOP, 1);
+ OUT_RING(chan, 0);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
- OUT_RING(0x40182800);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1);
+ OUT_RING(chan, 0x40182800);
// OUT_RING(1<<20/*no cull*/);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1);
// OUT_RING(0x24|(1<<6)|(1<<8));
- OUT_RING(0x120001a4);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FORMAT, 1);
- OUT_RING(0x332213a1);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FILTER, 1);
- OUT_RING(0x11001010);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_COLORKEY, 1);
- OUT_RING(0x0);
-// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 1);
+ OUT_RING(chan, 0x120001a4);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FORMAT, 1);
+ OUT_RING(chan, 0x332213a1);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FILTER, 1);
+ OUT_RING(chan, 0x11001010);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_COLORKEY, 1);
+ OUT_RING(chan, 0x0);
+// BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 1);
// OUT_RING(SCREEN_OFFSET);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR, 1);
- OUT_RING(0xff000000);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FOGCOLOR, 1);
+ OUT_RING(chan, 0xff000000);
- FIRE_RING (NULL);
+ FIRE_RING (chan);
return TRUE;
}
-static unsigned int
-nv04_is_texture_referenced( struct pipe_context *pipe,
- struct pipe_texture *texture,
- unsigned face, unsigned level)
-{
- /**
- * FIXME: Optimize.
- */
-
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
-static unsigned int
-nv04_is_buffer_referenced( struct pipe_context *pipe,
- struct pipe_buffer *buf)
-{
- /**
- * FIXME: Optimize.
- */
-
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
-
struct pipe_context *
nv04_create(struct pipe_screen *pscreen, unsigned pctx_id)
{
@@ -107,14 +86,13 @@ nv04_create(struct pipe_screen *pscreen, unsigned pctx_id)
nv04->pipe.winsys = ws;
nv04->pipe.screen = pscreen;
nv04->pipe.destroy = nv04_destroy;
- nv04->pipe.set_edgeflags = nv04_set_edgeflags;
nv04->pipe.draw_arrays = nv04_draw_arrays;
nv04->pipe.draw_elements = nv04_draw_elements;
nv04->pipe.clear = nv04_clear;
nv04->pipe.flush = nv04_flush;
- nv04->pipe.is_texture_referenced = nv04_is_texture_referenced;
- nv04->pipe.is_buffer_referenced = nv04_is_buffer_referenced;
+ nv04->pipe.is_texture_referenced = nouveau_is_texture_referenced;
+ nv04->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
nv04_init_surface_functions(nv04);
nv04_init_state_functions(nv04);
diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h
index 2842b2c90db..fe3b527423c 100644
--- a/src/gallium/drivers/nv04/nv04_context.h
+++ b/src/gallium/drivers/nv04/nv04_context.h
@@ -13,10 +13,7 @@
#include "nouveau/nouveau_winsys.h"
#include "nouveau/nouveau_gldefs.h"
-
-#define NOUVEAU_PUSH_CONTEXT(ctx) \
- struct nv04_screen *ctx = nv04->screen
-#include "nouveau/nouveau_push.h"
+#include "nouveau/nouveau_context.h"
#include "nv04_state.h"
@@ -140,9 +137,9 @@ extern void nv04_emit_hw_state(struct nv04_context *nv04);
extern void nv04_state_tex_update(struct nv04_context *nv04);
/* nv04_vbo.c */
-extern boolean nv04_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv04_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
-extern boolean nv04_draw_elements( struct pipe_context *pipe,
+extern void nv04_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count);
diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c
index 21f990fd536..c152b52119a 100644
--- a/src/gallium/drivers/nv04/nv04_fragtex.c
+++ b/src/gallium/drivers/nv04/nv04_fragtex.c
@@ -4,7 +4,7 @@
#define _(m,tf) \
{ \
PIPE_FORMAT_##m, \
- NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \
+ NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \
}
struct nv04_texture_format {
@@ -53,14 +53,14 @@ nv04_fragtex_build(struct nv04_context *nv04, int unit)
return;
}
- nv04->fragtex.format = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER
- | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER
+ nv04->fragtex.format = NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER
+ | NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER
| nv04_fragtex_format(pt->format)
- | ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT )
- | ( log2i(pt->width[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT )
- | ( log2i(pt->height[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT )
- | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE
- | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE
+ | ( (pt->last_level + 1) << NV04_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT )
+ | ( log2i(pt->width0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT )
+ | ( log2i(pt->height0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT )
+ | NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE
+ | NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE
;
}
diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c
index 93f752faec9..e0a6948aeb4 100644
--- a/src/gallium/drivers/nv04/nv04_miptree.c
+++ b/src/gallium/drivers/nv04/nv04_miptree.c
@@ -1,6 +1,7 @@
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
+#include "util/u_math.h"
#include "nv04_context.h"
#include "nv04_screen.h"
@@ -9,31 +10,22 @@ static void
nv04_miptree_layout(struct nv04_miptree *nv04mt)
{
struct pipe_texture *pt = &nv04mt->base;
- uint width = pt->width[0], height = pt->height[0];
uint offset = 0;
int nr_faces, l;
nr_faces = 1;
for (l = 0; l <= pt->last_level; l++) {
- pt->width[l] = width;
- pt->height[l] = height;
-
- pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
- pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
-
- nv04mt->level[l].pitch = pt->width[0];
+ nv04mt->level[l].pitch = pt->width0;
nv04mt->level[l].pitch = (nv04mt->level[l].pitch + 63) & ~63;
-
- width = MAX2(1, width >> 1);
- height = MAX2(1, height >> 1);
}
for (l = 0; l <= pt->last_level; l++) {
-
nv04mt->level[l].image_offset =
CALLOC(nr_faces, sizeof(unsigned));
- offset += nv04mt->level[l].pitch * pt->height[l];
+ /* XXX guess was obviously missing */
+ nv04mt->level[l].image_offset[0] = offset;
+ offset += nv04mt->level[l].pitch * u_minify(pt->height0, l);
}
nv04mt->total_size = offset;
@@ -63,7 +55,7 @@ nv04_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
FREE(mt);
return NULL;
}
-
+ mt->bo = nouveau_bo(mt->buffer);
return &mt->base;
}
@@ -75,7 +67,7 @@ nv04_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
/* Only supports 2D, non-mipmapped textures for the moment */
if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
- pt->depth[0] != 1)
+ pt->depth0 != 1)
return NULL;
mt = CALLOC_STRUCT(nv04_miptree);
@@ -89,6 +81,7 @@ nv04_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
pipe_buffer_reference(&mt->buffer, pb);
+ mt->bo = nouveau_bo(mt->buffer);
return &mt->base;
}
@@ -120,8 +113,8 @@ nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
pipe_texture_reference(&ns->base.texture, pt);
ns->base.format = pt->format;
- ns->base.width = pt->width[level];
- ns->base.height = pt->height[level];
+ ns->base.width = u_minify(pt->width0, level);
+ ns->base.height = u_minify(pt->height0, level);
ns->base.usage = flags;
pipe_reference_init(&ns->base.reference, 1);
ns->base.face = face;
@@ -129,7 +122,7 @@ nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
ns->base.zslice = zslice;
ns->pitch = nv04mt->level[level].pitch;
- ns->base.offset = nv04mt->level[level].image_offset;
+ ns->base.offset = nv04mt->level[level].image_offset[0];
return &ns->base;
}
diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c
index f6458232ae5..0b795ea2430 100644
--- a/src/gallium/drivers/nv04/nv04_prim_vbuf.c
+++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c
@@ -93,33 +93,45 @@ nv04_vbuf_render_set_primitive( struct vbuf_render *render,
static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5)
{
- BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49);
- OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v3,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v4,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v5,8);
- OUT_RING(0xFEDCBA);
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
+
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA), 49);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v3,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v4,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v5,8);
+ OUT_RING(chan, 0xFEDCBA);
}
static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2)
{
- BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25);
- OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
- OUT_RING(0xFED);
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
+
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD), 25);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8);
+ OUT_RING(chan, 0xFED);
}
static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3)
{
- BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33);
- OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
- OUT_RINGp(buffer + VERTEX_SIZE * v3,8);
- OUT_RING(0xFECEDC);
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
+
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC), 33);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * v3,8);
+ OUT_RING(chan, 0xFECEDC);
}
static void nv04_vbuf_render_triangles_elts(struct nv04_vbuf_render * render, const ushort * indices, uint nr_indices)
@@ -156,7 +168,10 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con
{
const uint32_t striptbl[]={0x321210,0x543432,0x765654,0x987876,0xBA9A98,0xDCBCBA,0xFEDEDC};
unsigned char* buffer = render->buffer;
- struct nv04_context* nv04 = render->nv04;
+ struct nv04_context *nv04 = render->nv04;
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
int i,j;
for(i = 0; i<nr_indices; i+=14)
@@ -166,15 +181,15 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con
if (numvert<3)
break;
- BEGIN_RING( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 );
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8);
for(j = 0; j<numvert; j++)
- OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 );
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * indices [i+j], 8 );
- BEGIN_RING_NI( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2 );
+ BEGIN_RING_NI(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2 );
for(j = 0; j<numtri/2; j++ )
- OUT_RING(striptbl[j]);
+ OUT_RING(chan, striptbl[j]);
if (numtri%2)
- OUT_RING(striptbl[numtri/2]&0xFFF);
+ OUT_RING(chan, striptbl[numtri/2]&0xFFF);
}
}
@@ -182,11 +197,14 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const
{
const uint32_t fantbl[]={0x320210,0x540430,0x760650,0x980870,0xBA0A90,0xDC0CB0,0xFE0ED0};
unsigned char* buffer = render->buffer;
- struct nv04_context* nv04 = render->nv04;
+ struct nv04_context *nv04 = render->nv04;
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
int i,j;
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8);
- OUT_RINGp(buffer + VERTEX_SIZE * indices[0], 8);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8);
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * indices[0], 8);
for(i = 1; i<nr_indices; i+=14)
{
@@ -195,16 +213,16 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const
if (numvert < 3)
break;
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8);
for(j=0;j<numvert;j++)
- OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 );
+ OUT_RINGp(chan, buffer + VERTEX_SIZE * indices[ i+j ], 8 );
- BEGIN_RING_NI(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2);
+ BEGIN_RING_NI(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2);
for(j = 0; j<numtri/2; j++)
- OUT_RING(fantbl[j]);
+ OUT_RING(chan, fantbl[j]);
if (numtri%2)
- OUT_RING(fantbl[numtri/2]&0xFFF);
+ OUT_RING(chan, fantbl[numtri/2]&0xFFF);
}
}
diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c
index 170ce3eb7e5..7c5b6e8229a 100644
--- a/src/gallium/drivers/nv04/nv04_screen.c
+++ b/src/gallium/drivers/nv04/nv04_screen.c
@@ -119,6 +119,8 @@ nv04_screen_destroy(struct pipe_screen *pscreen)
nouveau_grobj_free(&screen->fahrenheit);
nv04_surface_2d_takedown(&screen->eng2d);
+ nouveau_screen_fini(&screen->base);
+
FREE(pscreen);
}
@@ -163,10 +165,10 @@ nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
fahrenheit_class = 0;
sub3d_class = 0;
} else if (dev->chipset >= 0x10) {
- fahrenheit_class = NV10_DX5_TEXTURED_TRIANGLE;
+ fahrenheit_class = NV10_TEXTURED_TRIANGLE;
sub3d_class = NV10_CONTEXT_SURFACES_3D;
} else {
- fahrenheit_class=NV04_DX5_TEXTURED_TRIANGLE;
+ fahrenheit_class=NV04_TEXTURED_TRIANGLE;
sub3d_class = NV04_CONTEXT_SURFACES_3D;
}
diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c
index d356ebd8b36..e3dc4c5bf44 100644
--- a/src/gallium/drivers/nv04/nv04_state.c
+++ b/src/gallium/drivers/nv04/nv04_state.c
@@ -50,28 +50,28 @@ wrap_mode(unsigned wrap) {
switch (wrap) {
case PIPE_TEX_WRAP_REPEAT:
- ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT;
+ ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT;
break;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
- ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT;
+ ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT;
break;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE;
+ ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE;
break;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER;
+ ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER;
break;
case PIPE_TEX_WRAP_CLAMP:
- ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
+ ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
case PIPE_TEX_WRAP_MIRROR_CLAMP:
default:
NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
- ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
+ ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
}
- return ret >> NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT;
+ return ret >> NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT;
}
static void *
@@ -84,20 +84,20 @@ nv04_sampler_state_create(struct pipe_context *pipe,
ss = MALLOC(sizeof(struct nv04_sampler_state));
- ss->format = ((wrap_mode(cso->wrap_s) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) |
- (wrap_mode(cso->wrap_t) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT));
+ ss->format = ((wrap_mode(cso->wrap_s) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) |
+ (wrap_mode(cso->wrap_t) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT));
if (cso->max_anisotropy > 1.0) {
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE;
}
switch (cso->mag_img_filter) {
case PIPE_TEX_FILTER_LINEAR:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR;
break;
case PIPE_TEX_FILTER_NEAREST:
default:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST;
break;
}
@@ -105,14 +105,14 @@ nv04_sampler_state_create(struct pipe_context *pipe,
case PIPE_TEX_FILTER_LINEAR:
switch (cso->min_mip_filter) {
case PIPE_TEX_MIPFILTER_NEAREST:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
break;
case PIPE_TEX_MIPFILTER_LINEAR:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
break;
case PIPE_TEX_MIPFILTER_NONE:
default:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR;
break;
}
break;
@@ -120,14 +120,14 @@ nv04_sampler_state_create(struct pipe_context *pipe,
default:
switch (cso->min_mip_filter) {
case PIPE_TEX_MIPFILTER_NEAREST:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
break;
case PIPE_TEX_MIPFILTER_LINEAR:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
break;
case PIPE_TEX_MIPFILTER_NONE:
default:
- filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST;
+ filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST;
break;
}
break;
@@ -181,7 +181,7 @@ nv04_rasterizer_state_create(struct pipe_context *pipe,
*/
rs = MALLOC(sizeof(struct nv04_rasterizer_state));
- rs->blend = cso->flatshade ? NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD;
+ rs->blend = cso->flatshade ? NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD;
return (void *)rs;
}
@@ -229,16 +229,16 @@ nv04_depth_stencil_alpha_state_create(struct pipe_context *pipe,
hw = MALLOC(sizeof(struct nv04_depth_stencil_alpha_state));
hw->control = float_to_ubyte(cso->alpha.ref_value);
- hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT );
- hw->control |= cso->alpha.enabled ? NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE : 0;
- hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN;
- hw->control |= cso->depth.enabled ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT) : 0;
- hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT );
- hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module
- hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE;
- hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE;
- hw->control |= cso->depth.writemask ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT) : 0;
- hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format
+ hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT );
+ hw->control |= cso->alpha.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_ENABLE : 0;
+ hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_ORIGIN;
+ hw->control |= cso->depth.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE : 0;
+ hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT );
+ hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module
+ hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE;
+ hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE;
+ hw->control |= cso->depth.writemask ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_WRITE : 0;
+ hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format
return (void *)hw;
}
@@ -377,7 +377,7 @@ nv04_set_scissor_state(struct pipe_context *pipe,
/* struct nv04_context *nv04 = nv04_context(pipe);
// XXX
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2);
+ BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2);
OUT_RING (((s->maxx - s->minx) << 16) | s->minx);
OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/
}
@@ -425,9 +425,9 @@ nv04_init_state_functions(struct nv04_context *nv04)
nv04->pipe.delete_blend_state = nv04_blend_state_delete;
nv04->pipe.create_sampler_state = nv04_sampler_state_create;
- nv04->pipe.bind_sampler_states = nv04_sampler_state_bind;
+ nv04->pipe.bind_fragment_sampler_states = nv04_sampler_state_bind;
nv04->pipe.delete_sampler_state = nv04_sampler_state_delete;
- nv04->pipe.set_sampler_textures = nv04_set_sampler_texture;
+ nv04->pipe.set_fragment_sampler_textures = nv04_set_sampler_texture;
nv04->pipe.create_rasterizer_state = nv04_rasterizer_state_create;
nv04->pipe.bind_rasterizer_state = nv04_rasterizer_state_bind;
diff --git a/src/gallium/drivers/nv04/nv04_state.h b/src/gallium/drivers/nv04/nv04_state.h
index 399f750dbe7..81d1d2ebaa9 100644
--- a/src/gallium/drivers/nv04/nv04_state.h
+++ b/src/gallium/drivers/nv04/nv04_state.h
@@ -31,6 +31,7 @@ struct nv04_rasterizer_state {
struct nv04_miptree {
struct pipe_texture base;
+ struct nouveau_bo *bo;
struct pipe_buffer *buffer;
uint total_size;
diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c
index eb2c1c57c67..b8d6dc560f0 100644
--- a/src/gallium/drivers/nv04/nv04_state_emit.c
+++ b/src/gallium/drivers/nv04/nv04_state_emit.c
@@ -57,13 +57,19 @@ static uint32_t nv04_blend_func(uint32_t f)
static void nv04_emit_control(struct nv04_context* nv04)
{
uint32_t control = nv04->dsa->control;
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
- OUT_RING(control);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1);
+ OUT_RING(chan, control);
}
static void nv04_emit_blend(struct nv04_context* nv04)
{
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
uint32_t blend;
blend=0x4; // texture MODULATE_ALPHA
@@ -75,19 +81,23 @@ static void nv04_emit_blend(struct nv04_context* nv04)
blend|=(nv04_blend_func(nv04->blend->b_src)<<24);
blend|=(nv04_blend_func(nv04->blend->b_dst)<<28);
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1);
- OUT_RING(blend);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1);
+ OUT_RING(chan, blend);
}
static void nv04_emit_sampler(struct nv04_context *nv04, int unit)
{
struct nv04_miptree *nv04mt = nv04->tex_miptree[unit];
struct pipe_texture *pt = &nv04mt->base;
-
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 3);
- OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
- OUT_RING(nv04->sampler[unit]->filter);
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
+ struct nouveau_bo *bo = nouveau_bo(nv04mt->buffer);
+
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 3);
+ OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(chan, bo, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ OUT_RING(chan, nv04->sampler[unit]->filter);
}
static void nv04_state_emit_framebuffer(struct nv04_context* nv04)
@@ -97,6 +107,10 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04)
uint32_t rt_format, w, h;
int colour_format = 0, zeta_format = 0;
struct nv04_miptree *nv04mt = 0;
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *context_surfaces_3d = screen->context_surfaces_3d;
+ struct nouveau_bo *bo;
w = fb->cbufs[0]->width;
h = fb->cbufs[0]->height;
@@ -128,24 +142,29 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04)
assert(0);
}
- BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1);
- OUT_RING(rt_format);
+ BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1);
+ OUT_RING(chan, rt_format);
nv04mt = (struct nv04_miptree *)rt->base.texture;
+ bo = nouveau_bo(nv04mt->buffer);
/* FIXME pitches have to be aligned ! */
- BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2);
- OUT_RING(rt->pitch|(zeta->pitch<<16));
- OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2);
+ OUT_RING(chan, rt->pitch|(zeta->pitch<<16));
+ OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
if (fb->zsbuf) {
nv04mt = (struct nv04_miptree *)zeta->base.texture;
- BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1);
- OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1);
+ OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
}
}
void
nv04_emit_hw_state(struct nv04_context *nv04)
{
+ struct nv04_screen *screen = nv04->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *fahrenheit = screen->fahrenheit;
+ struct nouveau_grobj *context_surfaces_3d = screen->context_surfaces_3d;
int i;
if (nv04->dirty & NV04_NEW_VERTPROG) {
@@ -163,8 +182,8 @@ nv04_emit_hw_state(struct nv04_context *nv04)
if (nv04->dirty & NV04_NEW_CONTROL) {
nv04->dirty &= ~NV04_NEW_CONTROL;
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
- OUT_RING(nv04->dsa->control);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1);
+ OUT_RING(chan, nv04->dsa->control);
}
if (nv04->dirty & NV04_NEW_BLEND) {
@@ -205,12 +224,12 @@ nv04_emit_hw_state(struct nv04_context *nv04)
unsigned rt_pitch = ((struct nv04_surface *)nv04->rt)->pitch;
unsigned zeta_pitch = ((struct nv04_surface *)nv04->zeta)->pitch;
- BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2);
- OUT_RING(rt_pitch|(zeta_pitch<<16));
- OUT_RELOCl(nv04->rt, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2);
+ OUT_RING(chan, rt_pitch|(zeta_pitch<<16));
+ OUT_RELOCl(chan, nouveau_bo(nv04->rt), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
if (nv04->zeta) {
- BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1);
- OUT_RELOCl(nv04->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1);
+ OUT_RELOCl(chan, nouveau_bo(nv04->zeta), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
}
/* Texture images */
@@ -218,9 +237,10 @@ nv04_emit_hw_state(struct nv04_context *nv04)
if (!(nv04->fp_samplers & (1 << i)))
continue;
struct nv04_miptree *nv04mt = nv04->tex_miptree[i];
- BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 2);
- OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ struct nouveau_bo *bo = nouveau_bo(nv04mt->buffer);
+ BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 2);
+ OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(chan, bo, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
}
}
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index b2ab50ee21f..b24a9cee5ae 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -1,5 +1,6 @@
#include "pipe/p_context.h"
#include "pipe/p_format.h"
+#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -13,10 +14,13 @@ nv04_surface_format(enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8;
case PIPE_FORMAT_R16_SNORM:
case PIPE_FORMAT_R5G6B5_UNORM:
case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_A8L8_UNORM:
return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5;
case PIPE_FORMAT_X8R8G8B8_UNORM:
case PIPE_FORMAT_A8R8G8B8_UNORM:
@@ -36,8 +40,10 @@ nv04_rect_format(enum pipe_format format)
case PIPE_FORMAT_A8_UNORM:
return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_A8L8_UNORM:
case PIPE_FORMAT_Z16_UNORM:
return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5;
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
case PIPE_FORMAT_A8R8G8B8_UNORM:
case PIPE_FORMAT_Z24S8_UNORM:
case PIPE_FORMAT_Z24X8_UNORM:
@@ -51,6 +57,10 @@ static INLINE int
nv04_scaled_image_format(enum pipe_format format)
{
switch (format) {
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8;
case PIPE_FORMAT_A1R5G5B5_UNORM:
return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5;
case PIPE_FORMAT_A8R8G8B8_UNORM:
@@ -59,6 +69,7 @@ nv04_scaled_image_format(enum pipe_format format)
return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8;
case PIPE_FORMAT_R5G6B5_UNORM:
case PIPE_FORMAT_R16_SNORM:
+ case PIPE_FORMAT_A8L8_UNORM:
return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5;
default:
return -1;
@@ -66,7 +77,7 @@ nv04_scaled_image_format(enum pipe_format format)
}
static INLINE unsigned
-nv04_swizzle_bits(unsigned x, unsigned y)
+nv04_swizzle_bits_square(unsigned x, unsigned y)
{
unsigned u = (x & 0x001) << 0 |
(x & 0x002) << 1 |
@@ -96,6 +107,15 @@ nv04_swizzle_bits(unsigned x, unsigned y)
return v | u;
}
+/* rectangular swizzled textures are linear concatenations of swizzled square tiles */
+static INLINE unsigned
+nv04_swizzle_bits(unsigned x, unsigned y, unsigned w, unsigned h)
+{
+ unsigned s = MIN2(w, h);
+ unsigned m = s - 1;
+ return (((x | y) & ~m) * s) | nv04_swizzle_bits_square(x & m, y & m);
+}
+
static int
nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
struct pipe_surface *dst, int dx, int dy,
@@ -123,6 +143,9 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
assert(sub_w == w || util_is_pot(sub_w));
assert(sub_h == h || util_is_pot(sub_h));
+ MARK_RING (chan, 8 + ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*17, 2 +
+ ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*2);
+
BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1);
OUT_RELOCo(chan, dst_bo,
NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
@@ -131,7 +154,7 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
OUT_RING (chan, nv04_surface_format(dst->format) |
log2i(dst->width) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT |
log2i(dst->height) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT);
-
+
BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1);
OUT_RELOCo(chan, src_bo,
NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
@@ -144,20 +167,19 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
for (x = 0; x < w; x += sub_w) {
sub_w = MIN2(sub_w, w - x);
- /* Must be 64-byte aligned */
- assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y) * dst->texture->block.size) & 63));
+ assert(!(dst->offset & 63));
BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1);
- OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y) * dst->texture->block.size,
+ OUT_RELOCl(chan, dst_bo, dst->offset,
NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9);
OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE);
OUT_RING (chan, nv04_scaled_image_format(src->format));
OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY);
- OUT_RING (chan, 0);
+ OUT_RING (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT));
OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w);
- OUT_RING (chan, 0);
+ OUT_RING (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT));
OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w);
OUT_RING (chan, 1 << 20);
OUT_RING (chan, 1 << 20);
@@ -167,7 +189,7 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
OUT_RING (chan, src_pitch |
NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER |
NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE);
- OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * src->texture->block.size,
+ OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * util_format_get_blocksize(src->texture->format),
NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
OUT_RING (chan, 0);
}
@@ -188,11 +210,11 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx,
unsigned src_pitch = ((struct nv04_surface *)src)->pitch;
unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch;
unsigned dst_offset = dst->offset + dy * dst_pitch +
- dx * dst->texture->block.size;
+ dx * util_format_get_blocksize(dst->texture->format);
unsigned src_offset = src->offset + sy * src_pitch +
- sx * src->texture->block.size;
+ sx * util_format_get_blocksize(src->texture->format);
- WAIT_RING (chan, 3 + ((h / 2047) + 1) * 9);
+ MARK_RING (chan, 3 + ((h / 2047) + 1) * 9, 2 + ((h / 2047) + 1) * 2);
BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2);
OUT_RELOCo(chan, src_bo,
NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
@@ -209,7 +231,7 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx,
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR);
OUT_RING (chan, src_pitch);
OUT_RING (chan, dst_pitch);
- OUT_RING (chan, w * src->texture->block.size);
+ OUT_RING (chan, w * util_format_get_blocksize(src->texture->format));
OUT_RING (chan, count);
OUT_RING (chan, 0x0101);
OUT_RING (chan, 0);
@@ -240,7 +262,7 @@ nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
if (format < 0)
return 1;
- WAIT_RING (chan, 12);
+ MARK_RING (chan, 12, 4);
BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
OUT_RELOCo(chan, src_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
@@ -305,7 +327,7 @@ nv04_surface_fill(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
gdirect_format = nv04_rect_format(dst->format);
assert(gdirect_format >= 0);
- WAIT_RING (chan, 16);
+ MARK_RING (chan, 16, 4);
BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
@@ -477,3 +499,49 @@ nv04_surface_2d_init(struct nouveau_screen *screen)
ctx->fill = nv04_surface_fill;
return ctx;
}
+
+struct nv04_surface*
+nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns)
+{
+ int temp_flags;
+
+ // printf("creating temp, flags is %i!\n", flags);
+
+ if(ns->base.usage & PIPE_BUFFER_USAGE_DISCARD)
+ {
+ temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ;
+ ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_DISCARD;
+ }
+ else
+ {
+ temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE;
+ ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_GPU_READ;
+ }
+
+ struct nv40_screen* screen = (struct nv40_screen*)pscreen;
+ ns->base.usage = PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE;
+
+ struct pipe_texture templ;
+ memset(&templ, 0, sizeof(templ));
+ templ.format = ns->base.texture->format;
+ templ.target = PIPE_TEXTURE_2D;
+ templ.width0 = ns->base.width;
+ templ.height0 = ns->base.height;
+ templ.depth0 = 1;
+ templ.last_level = 0;
+
+ // TODO: this is probably wrong and we should specifically handle multisampling somehow once it is implemented
+ templ.nr_samples = ns->base.texture->nr_samples;
+
+ templ.tex_usage = ns->base.texture->tex_usage | PIPE_TEXTURE_USAGE_RENDER_TARGET;
+
+ struct pipe_texture* temp_tex = pscreen->texture_create(pscreen, &templ);
+ struct nv04_surface* temp_ns = (struct nv04_surface*)pscreen->get_tex_surface(pscreen, temp_tex, 0, 0, 0, temp_flags);
+ temp_ns->backing = ns;
+
+ if(ns->base.usage & PIPE_BUFFER_USAGE_GPU_READ)
+ eng2d->copy(eng2d, &temp_ns->backing->base, 0, 0, &ns->base, 0, 0, ns->base.width, ns->base.height);
+
+ return temp_ns;
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.h b/src/gallium/drivers/nv04/nv04_surface_2d.h
index 02b3f56ba8b..ce696a11a39 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.h
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.h
@@ -4,6 +4,7 @@
struct nv04_surface {
struct pipe_surface base;
unsigned pitch;
+ struct nv04_surface* backing;
};
struct nv04_surface_2d {
@@ -30,4 +31,7 @@ nv04_surface_2d_init(struct nouveau_screen *screen);
void
nv04_surface_2d_takedown(struct nv04_surface_2d **);
+struct nv04_surface*
+nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns);
+
#endif
diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c
index 6618660743d..2dd2e146a8f 100644
--- a/src/gallium/drivers/nv04/nv04_transfer.c
+++ b/src/gallium/drivers/nv04/nv04_transfer.c
@@ -1,7 +1,9 @@
#include <pipe/p_state.h>
#include <pipe/p_defines.h>
#include <pipe/p_inlines.h>
+#include <util/u_format.h>
#include <util/u_memory.h>
+#include <util/u_math.h>
#include <nouveau/nouveau_winsys.h>
#include "nv04_context.h"
#include "nv04_screen.h"
@@ -10,22 +12,19 @@
struct nv04_transfer {
struct pipe_transfer base;
struct pipe_surface *surface;
- bool direct;
+ boolean direct;
};
static void
-nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
struct pipe_texture *template)
{
memset(template, 0, sizeof(struct pipe_texture));
template->target = pt->target;
template->format = pt->format;
- template->width[0] = pt->width[level];
- template->height[0] = pt->height[level];
- template->depth[0] = 1;
- template->block = pt->block;
- template->nblocksx[0] = pt->nblocksx[level];
- template->nblocksy[0] = pt->nblocksx[level];
+ template->width0 = width;
+ template->height0 = height;
+ template->depth0 = 1;
template->last_level = 0;
template->nr_samples = pt->nr_samples;
@@ -48,14 +47,10 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
pipe_texture_reference(&tx->base.texture, pt);
- tx->base.format = pt->format;
tx->base.x = x;
tx->base.y = y;
tx->base.width = w;
tx->base.height = h;
- tx->base.block = pt->block;
- tx->base.nblocksx = pt->nblocksx[level];
- tx->base.nblocksy = pt->nblocksy[level];
tx->base.stride = mt->level[level].pitch;
tx->base.usage = usage;
tx->base.face = face;
@@ -76,7 +71,7 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
tx->direct = false;
- nv04_compatible_transfer_tex(pt, level, &tx_tex_template);
+ nv04_compatible_transfer_tex(pt, w, h, &tx_tex_template);
tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
if (!tx_tex)
@@ -85,6 +80,8 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
}
+ tx->base.stride = ((struct nv04_miptree*)tx_tex)->level[0].pitch;
+
tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
face, level, zslice,
pipe_transfer_buffer_flags(&tx->base));
@@ -110,8 +107,8 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
/* TODO: Check if SIFM can un-swizzle */
nvscreen->eng2d->copy(nvscreen->eng2d,
tx->surface, 0, 0,
- src, 0, 0,
- src->width, src->height);
+ src, x, y,
+ w, h);
pipe_surface_reference(&src, NULL);
}
@@ -135,9 +132,9 @@ nv04_transfer_del(struct pipe_transfer *ptx)
/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
nvscreen->eng2d->copy(nvscreen->eng2d,
- dst, 0, 0,
+ dst, tx->base.x, tx->base.y,
tx->surface, 0, 0,
- dst->width, dst->height);
+ tx->base.width, tx->base.height);
pipe_surface_reference(&dst, NULL);
}
@@ -156,8 +153,10 @@ nv04_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
void *map = pipe_buffer_map(pscreen, mt->buffer,
pipe_transfer_buffer_flags(ptx));
- return map + ns->base.offset +
- ptx->y * ns->pitch + ptx->x * ptx->block.size;
+ if(!tx->direct)
+ return map + ns->base.offset;
+ else
+ return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
}
static void
diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c
index e3167814f2b..34847718145 100644
--- a/src/gallium/drivers/nv04/nv04_vbo.c
+++ b/src/gallium/drivers/nv04/nv04_vbo.c
@@ -9,7 +9,7 @@
#include "nouveau/nouveau_channel.h"
#include "nouveau/nouveau_pushbuf.h"
-boolean nv04_draw_elements( struct pipe_context *pipe,
+void nv04_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count)
@@ -45,7 +45,7 @@ boolean nv04_draw_elements( struct pipe_context *pipe,
draw_set_mapped_element_buffer(draw, 0, NULL);
}
- draw_set_mapped_constant_buffer(draw,
+ draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX,
nv04->constbuf[PIPE_SHADER_VERTEX],
nv04->constbuf_nr[PIPE_SHADER_VERTEX]);
@@ -65,15 +65,13 @@ boolean nv04_draw_elements( struct pipe_context *pipe,
pipe_buffer_unmap(pscreen, indexBuffer);
draw_set_mapped_element_buffer(draw, 0, NULL);
}
-
- return TRUE;
}
-boolean nv04_draw_arrays( struct pipe_context *pipe,
- unsigned prim, unsigned start, unsigned count)
+void nv04_draw_arrays( struct pipe_context *pipe,
+ unsigned prim, unsigned start, unsigned count)
{
printf("coucou in draw arrays\n");
- return nv04_draw_elements(pipe, NULL, 0, prim, start, count);
+ nv04_draw_elements(pipe, NULL, 0, prim, start, count);
}
diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c
index a127b134ecd..1ecb73d06e8 100644
--- a/src/gallium/drivers/nv10/nv10_context.c
+++ b/src/gallium/drivers/nv10/nv10_context.c
@@ -10,10 +10,14 @@ nv10_flush(struct pipe_context *pipe, unsigned flags,
struct pipe_fence_handle **fence)
{
struct nv10_context *nv10 = nv10_context(pipe);
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
draw_flush(nv10->draw);
- FIRE_RING(fence);
+ FIRE_RING(chan);
+ if (fence)
+ *fence = NULL;
}
static void
@@ -31,253 +35,226 @@ static void nv10_init_hwctx(struct nv10_context *nv10)
{
struct nv10_screen *screen = nv10->screen;
struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
int i;
float projectionmatrix[16];
- BEGIN_RING(celsius, NV10TCL_DMA_NOTIFY, 1);
- OUT_RING (screen->sync->handle);
- BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY0, 2);
- OUT_RING (chan->vram->handle);
- OUT_RING (chan->gart->handle);
- BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY2, 2);
- OUT_RING (chan->vram->handle);
- OUT_RING (chan->vram->handle);
+ BEGIN_RING(chan, celsius, NV10TCL_DMA_NOTIFY, 1);
+ OUT_RING (chan, screen->sync->handle);
+ BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY0, 2);
+ OUT_RING (chan, chan->vram->handle);
+ OUT_RING (chan, chan->gart->handle);
+ BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY2, 2);
+ OUT_RING (chan, chan->vram->handle);
+ OUT_RING (chan, chan->vram->handle);
- BEGIN_RING(celsius, NV10TCL_NOP, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+ OUT_RING (chan, 0);
- BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 2);
- OUT_RING (0);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
- BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1);
- OUT_RING ((0x7ff<<16)|0x800);
- BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1);
- OUT_RING ((0x7ff<<16)|0x800);
+ BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1);
+ OUT_RING (chan, (0x7ff<<16)|0x800);
+ BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1);
+ OUT_RING (chan, (0x7ff<<16)|0x800);
for (i=1;i<8;i++) {
- BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1);
+ OUT_RING (chan, 0);
}
- BEGIN_RING(celsius, 0x290, 1);
- OUT_RING ((0x10<<16)|1);
- BEGIN_RING(celsius, 0x3f4, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, 0x290, 1);
+ OUT_RING (chan, (0x10<<16)|1);
+ BEGIN_RING(chan, celsius, 0x3f4, 1);
+ OUT_RING (chan, 0);
- BEGIN_RING(celsius, NV10TCL_NOP, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+ OUT_RING (chan, 0);
if (nv10->screen->celsius->grclass != NV10TCL) {
/* For nv11, nv17 */
- BEGIN_RING(celsius, 0x120, 3);
- OUT_RING (0);
- OUT_RING (1);
- OUT_RING (2);
+ BEGIN_RING(chan, celsius, 0x120, 3);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 2);
- BEGIN_RING(celsius, NV10TCL_NOP, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+ OUT_RING (chan, 0);
}
- BEGIN_RING(celsius, NV10TCL_NOP, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+ OUT_RING (chan, 0);
/* Set state */
- BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 2);
- OUT_RING (0x207);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_TX_ENABLE(0), 2);
- OUT_RING (0);
- OUT_RING (0);
-
- BEGIN_RING(celsius, NV10TCL_RC_IN_ALPHA(0), 12);
- OUT_RING (0x30141010);
- OUT_RING (0);
- OUT_RING (0x20040000);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RING (0x00000c00);
- OUT_RING (0);
- OUT_RING (0x00000c00);
- OUT_RING (0x18000000);
- OUT_RING (0x300e0300);
- OUT_RING (0x0c091c80);
-
- BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 2);
- OUT_RING (1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_SRC, 4);
- OUT_RING (1);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RING (0x8006);
- BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 8);
- OUT_RING (0xff);
- OUT_RING (0x207);
- OUT_RING (0);
- OUT_RING (0xff);
- OUT_RING (0x1e00);
- OUT_RING (0x1e00);
- OUT_RING (0x1e00);
- OUT_RING (0x1d01);
- BEGIN_RING(celsius, NV10TCL_NORMALIZE_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 2);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_LIGHT_MODEL, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_COLOR_CONTROL, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_ENABLED_LIGHTS, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1);
- OUT_RING (0x201);
- BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1);
- OUT_RING (8);
- BEGIN_RING(celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_LINE_WIDTH, 1);
- OUT_RING (8);
- BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
- OUT_RING (0x1b02);
- OUT_RING (0x1b02);
- BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2);
- OUT_RING (0x405);
- OUT_RING (0x901);
- BEGIN_RING(celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_TX_GEN_S(0), 8);
+ BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 2);
+ OUT_RING (chan, 0x207);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(0), 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+
+ BEGIN_RING(chan, celsius, NV10TCL_RC_IN_ALPHA(0), 12);
+ OUT_RING (chan, 0x30141010);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0x20040000);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0x00000c00);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0x00000c00);
+ OUT_RING (chan, 0x18000000);
+ OUT_RING (chan, 0x300e0300);
+ OUT_RING (chan, 0x0c091c80);
+
+ BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 2);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_SRC, 4);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0x8006);
+ BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 8);
+ OUT_RING (chan, 0xff);
+ OUT_RING (chan, 0x207);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0xff);
+ OUT_RING (chan, 0x1e00);
+ OUT_RING (chan, 0x1e00);
+ OUT_RING (chan, 0x1e00);
+ OUT_RING (chan, 0x1d01);
+ BEGIN_RING(chan, celsius, NV10TCL_NORMALIZE_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_LIGHT_MODEL, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_COLOR_CONTROL, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_ENABLED_LIGHTS, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1);
+ OUT_RING (chan, 0x201);
+ BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1);
+ OUT_RING (chan, 8);
+ BEGIN_RING(chan, celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_LINE_WIDTH, 1);
+ OUT_RING (chan, 8);
+ BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (chan, 0x1b02);
+ OUT_RING (chan, 0x1b02);
+ BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 2);
+ OUT_RING (chan, 0x405);
+ OUT_RING (chan, 0x901);
+ BEGIN_RING(chan, celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_TX_GEN_S(0), 8);
for (i=0;i<8;i++) {
- OUT_RING (0);
+ OUT_RING (chan, 0);
}
- BEGIN_RING(celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3);
- OUT_RING (0x3fc00000); /* -1.50 */
- OUT_RING (0xbdb8aa0a); /* -0.09 */
- OUT_RING (0); /* 0.00 */
+ BEGIN_RING(chan, celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3);
+ OUT_RING (chan, 0x3fc00000); /* -1.50 */
+ OUT_RING (chan, 0xbdb8aa0a); /* -0.09 */
+ OUT_RING (chan, 0); /* 0.00 */
- BEGIN_RING(celsius, NV10TCL_NOP, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+ OUT_RING (chan, 0);
- BEGIN_RING(celsius, NV10TCL_FOG_MODE, 2);
- OUT_RING (0x802);
- OUT_RING (2);
+ BEGIN_RING(chan, celsius, NV10TCL_FOG_MODE, 2);
+ OUT_RING (chan, 0x802);
+ OUT_RING (chan, 2);
/* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when
* using texturing, except when using the texture matrix
*/
- BEGIN_RING(celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1);
- OUT_RING (6);
- BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1);
- OUT_RING (0x01010101);
+ BEGIN_RING(chan, celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1);
+ OUT_RING (chan, 6);
+ BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1);
+ OUT_RING (chan, 0x01010101);
/* Set vertex component */
- BEGIN_RING(celsius, NV10TCL_VERTEX_COL_4F_R, 4);
- OUT_RINGf (1.0);
- OUT_RINGf (1.0);
- OUT_RINGf (1.0);
- OUT_RINGf (1.0);
- BEGIN_RING(celsius, NV10TCL_VERTEX_COL2_3F_R, 3);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(celsius, NV10TCL_VERTEX_NOR_3F_X, 3);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RINGf (1.0);
- BEGIN_RING(celsius, NV10TCL_VERTEX_TX0_4F_S, 4);
- OUT_RINGf (0.0);
- OUT_RINGf (0.0);
- OUT_RINGf (0.0);
- OUT_RINGf (1.0);
- BEGIN_RING(celsius, NV10TCL_VERTEX_TX1_4F_S, 4);
- OUT_RINGf (0.0);
- OUT_RINGf (0.0);
- OUT_RINGf (0.0);
- OUT_RINGf (1.0);
- BEGIN_RING(celsius, NV10TCL_VERTEX_FOG_1F, 1);
- OUT_RINGf (0.0);
- BEGIN_RING(celsius, NV10TCL_EDGEFLAG_ENABLE, 1);
- OUT_RING (1);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL_4F_R, 4);
+ OUT_RINGf (chan, 1.0);
+ OUT_RINGf (chan, 1.0);
+ OUT_RINGf (chan, 1.0);
+ OUT_RINGf (chan, 1.0);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL2_3F_R, 3);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_NOR_3F_X, 3);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RINGf (chan, 1.0);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX0_4F_S, 4);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 1.0);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX1_4F_S, 4);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 1.0);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_FOG_1F, 1);
+ OUT_RINGf (chan, 0.0);
+ BEGIN_RING(chan, celsius, NV10TCL_EDGEFLAG_ENABLE, 1);
+ OUT_RING (chan, 1);
memset(projectionmatrix, 0, sizeof(projectionmatrix));
- BEGIN_RING(celsius, NV10TCL_PROJECTION_MATRIX(0), 16);
+ BEGIN_RING(chan, celsius, NV10TCL_PROJECTION_MATRIX(0), 16);
projectionmatrix[0*4+0] = 1.0;
projectionmatrix[1*4+1] = 1.0;
projectionmatrix[2*4+2] = 1.0;
projectionmatrix[3*4+3] = 1.0;
for (i=0;i<16;i++) {
- OUT_RINGf (projectionmatrix[i]);
+ OUT_RINGf (chan, projectionmatrix[i]);
}
- BEGIN_RING(celsius, NV10TCL_DEPTH_RANGE_NEAR, 2);
- OUT_RING (0.0);
- OUT_RINGf (16777216.0);
-
- BEGIN_RING(celsius, NV10TCL_VIEWPORT_SCALE_X, 4);
- OUT_RINGf (-2048.0);
- OUT_RINGf (-2048.0);
- OUT_RINGf (16777215.0 * 0.5);
- OUT_RING (0);
-
- FIRE_RING (NULL);
-}
-
-static void
-nv10_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
-{
-}
-
-static unsigned int
-nv10_is_texture_referenced( struct pipe_context *pipe,
- struct pipe_texture *texture,
- unsigned face, unsigned level)
-{
- /**
- * FIXME: Optimize.
- */
+ BEGIN_RING(chan, celsius, NV10TCL_DEPTH_RANGE_NEAR, 2);
+ OUT_RING (chan, 0.0);
+ OUT_RINGf (chan, 16777216.0);
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
-static unsigned int
-nv10_is_buffer_referenced( struct pipe_context *pipe,
- struct pipe_buffer *buf)
-{
- /**
- * FIXME: Optimize.
- */
+ BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4);
+ OUT_RINGf (chan, -2048.0);
+ OUT_RINGf (chan, -2048.0);
+ OUT_RINGf (chan, 16777215.0 * 0.5);
+ OUT_RING (chan, 0);
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+ FIRE_RING (chan);
}
struct pipe_context *
@@ -299,14 +276,13 @@ nv10_create(struct pipe_screen *pscreen, unsigned pctx_id)
nv10->pipe.winsys = ws;
nv10->pipe.screen = pscreen;
nv10->pipe.destroy = nv10_destroy;
- nv10->pipe.set_edgeflags = nv10_set_edgeflags;
nv10->pipe.draw_arrays = nv10_draw_arrays;
nv10->pipe.draw_elements = nv10_draw_elements;
nv10->pipe.clear = nv10_clear;
nv10->pipe.flush = nv10_flush;
- nv10->pipe.is_texture_referenced = nv10_is_texture_referenced;
- nv10->pipe.is_buffer_referenced = nv10_is_buffer_referenced;
+ nv10->pipe.is_texture_referenced = nouveau_is_texture_referenced;
+ nv10->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
nv10_init_surface_functions(nv10);
nv10_init_state_functions(nv10);
diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h
index f1e003c9537..ab4b825487d 100644
--- a/src/gallium/drivers/nv10/nv10_context.h
+++ b/src/gallium/drivers/nv10/nv10_context.h
@@ -13,10 +13,7 @@
#include "nouveau/nouveau_winsys.h"
#include "nouveau/nouveau_gldefs.h"
-
-#define NOUVEAU_PUSH_CONTEXT(ctx) \
- struct nv10_screen *ctx = nv10->screen
-#include "nouveau/nouveau_push.h"
+#include "nouveau/nouveau_context.h"
#include "nv10_state.h"
@@ -143,9 +140,9 @@ extern void nv10_emit_hw_state(struct nv10_context *nv10);
extern void nv10_state_tex_update(struct nv10_context *nv10);
/* nv10_vbo.c */
-extern boolean nv10_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv10_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
-extern boolean nv10_draw_elements( struct pipe_context *pipe,
+extern void nv10_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count);
diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c
index 27f2f875847..c1f7ccb9ab6 100644
--- a/src/gallium/drivers/nv10/nv10_fragtex.c
+++ b/src/gallium/drivers/nv10/nv10_fragtex.c
@@ -52,6 +52,9 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit)
struct nv10_miptree *nv10mt = nv10->tex_miptree[unit];
struct pipe_texture *pt = &nv10mt->base;
struct nv10_texture_format *tf;
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
uint32_t txf, txs, txp;
tf = nv10_fragtex_format(pt->format);
@@ -62,9 +65,9 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit)
txf = tf->format << 8;
txf |= (pt->last_level + 1) << 16;
- txf |= log2i(pt->width[0]) << 20;
- txf |= log2i(pt->height[0]) << 24;
- txf |= log2i(pt->depth[0]) << 28;
+ txf |= log2i(pt->width0) << 20;
+ txf |= log2i(pt->height0) << 24;
+ txf |= log2i(pt->depth0) << 28;
txf |= 8;
switch (pt->target) {
@@ -82,15 +85,15 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit)
return;
}
- BEGIN_RING(celsius, NV10TCL_TX_OFFSET(unit), 8);
- OUT_RELOCl(nv10mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- OUT_RELOCd(nv10mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
- OUT_RING (ps->wrap);
- OUT_RING (0x40000000); /* enable */
- OUT_RING (txs);
- OUT_RING (ps->filt | 0x2000 /* magic */);
- OUT_RING ((pt->width[0] << 16) | pt->height[0]);
- OUT_RING (ps->bcol);
+ BEGIN_RING(chan, celsius, NV10TCL_TX_OFFSET(unit), 8);
+ OUT_RELOCl(chan, nouveau_bo(nv10mt->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(chan, nouveau_bo(nv10mt->buffer),txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ OUT_RING (chan, ps->wrap);
+ OUT_RING (chan, 0x40000000); /* enable */
+ OUT_RING (chan, txs);
+ OUT_RING (chan, ps->filt | 0x2000 /* magic */);
+ OUT_RING (chan, (pt->width0 << 16) | pt->height0);
+ OUT_RING (chan, ps->bcol);
#endif
}
@@ -99,6 +102,9 @@ nv10_fragtex_bind(struct nv10_context *nv10)
{
#if 0
struct nv10_fragment_program *fp = nv10->fragprog.active;
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
unsigned samplers, unit;
samplers = nv10->fp_samplers & ~fp->samplers;
@@ -106,8 +112,8 @@ nv10_fragtex_bind(struct nv10_context *nv10)
unit = ffs(samplers) - 1;
samplers &= ~(1 << unit);
- BEGIN_RING(celsius, NV10TCL_TX_ENABLE(unit), 1);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(unit), 1);
+ OUT_RING (chan, 0);
}
samplers = nv10->dirty_samplers & fp->samplers;
diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c
index 34e3c2ebd77..908482ad854 100644
--- a/src/gallium/drivers/nv10/nv10_miptree.c
+++ b/src/gallium/drivers/nv10/nv10_miptree.c
@@ -1,6 +1,8 @@
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
#include "nv10_context.h"
#include "nv10_screen.h"
@@ -10,7 +12,7 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt)
{
struct pipe_texture *pt = &nv10mt->base;
boolean swizzled = FALSE;
- uint width = pt->width[0], height = pt->height[0];
+ uint width = pt->width0;
uint offset = 0;
int nr_faces, l, f;
@@ -21,29 +23,23 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt)
}
for (l = 0; l <= pt->last_level; l++) {
- pt->width[l] = width;
- pt->height[l] = height;
- pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
- pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
-
if (swizzled)
- nv10mt->level[l].pitch = pt->nblocksx[l] * pt->block.size;
+ nv10mt->level[l].pitch = util_format_get_stride(pt->format, width);
else
- nv10mt->level[l].pitch = pt->nblocksx[0] * pt->block.size;
+ nv10mt->level[l].pitch = util_format_get_stride(pt->format, pt->width0);
nv10mt->level[l].pitch = (nv10mt->level[l].pitch + 63) & ~63;
nv10mt->level[l].image_offset =
CALLOC(nr_faces, sizeof(unsigned));
- width = MAX2(1, width >> 1);
- height = MAX2(1, height >> 1);
+ width = u_minify(width, 1);
}
for (f = 0; f < nr_faces; f++) {
for (l = 0; l <= pt->last_level; l++) {
nv10mt->level[l].image_offset[f] = offset;
- offset += nv10mt->level[l].pitch * pt->height[l];
+ offset += nv10mt->level[l].pitch * u_minify(pt->height0, l);
}
}
@@ -58,7 +54,7 @@ nv10_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
/* Only supports 2D, non-mipmapped textures for the moment */
if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
- pt->depth[0] != 1)
+ pt->depth0 != 1)
return NULL;
mt = CALLOC_STRUCT(nv10_miptree);
@@ -72,6 +68,7 @@ nv10_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
pipe_buffer_reference(&mt->buffer, pb);
+ mt->bo = nouveau_bo(mt->buffer);
return &mt->base;
}
@@ -95,6 +92,7 @@ nv10_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
FREE(mt);
return NULL;
}
+ mt->bo = nouveau_bo(mt->buffer);
return &mt->base;
}
@@ -133,8 +131,8 @@ nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt,
return NULL;
pipe_texture_reference(&ns->base.texture, pt);
ns->base.format = pt->format;
- ns->base.width = pt->width[level];
- ns->base.height = pt->height[level];
+ ns->base.width = u_minify(pt->width0, level);
+ ns->base.height = u_minify(pt->height0, level);
ns->base.usage = flags;
pipe_reference_init(&ns->base.reference, 1);
ns->base.face = face;
diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c
index 1806d5f8ccc..c5dbe43dbc8 100644
--- a/src/gallium/drivers/nv10/nv10_prim_vbuf.c
+++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c
@@ -67,12 +67,15 @@ struct nv10_vbuf_render {
void nv10_vtxbuf_bind( struct nv10_context* nv10 )
{
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
int i;
for(i = 0; i < 8; i++) {
- BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_OFFSET(i), 1);
- OUT_RING(0/*nv10->vtxbuf*/);
- BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT(i) ,1);
- OUT_RING(0/*XXX*/);
+ BEGIN_RING(chan, celsius, NV10TCL_VTXBUF_ADDRESS(i), 1);
+ OUT_RING(chan, 0/*nv10->vtxbuf*/);
+ BEGIN_RING(chan, celsius, NV10TCL_VTXFMT(i), 1);
+ OUT_RING(chan, 0/*XXX*/);
}
}
@@ -163,19 +166,22 @@ nv10_vbuf_render_draw( struct vbuf_render *render,
{
struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
struct nv10_context *nv10 = nv10_render->nv10;
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
int push, i;
nv10_emit_hw_state(nv10);
- BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1);
- OUT_RELOCl(nv10_render->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1);
+ OUT_RELOCl(chan, nouveau_bo(nv10_render->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
- OUT_RING(nv10_render->hwprim);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING(chan, nv10_render->hwprim);
if (nr_indices & 1) {
- BEGIN_RING(celsius, NV10TCL_VB_ELEMENT_U32, 1);
- OUT_RING (indices[0]);
+ BEGIN_RING(chan, celsius, NV10TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (chan, indices[0]);
indices++; nr_indices--;
}
@@ -183,16 +189,16 @@ nv10_vbuf_render_draw( struct vbuf_render *render,
// XXX too big/small ? check the size
push = MIN2(nr_indices, 1200 * 2);
- BEGIN_RING_NI(celsius, NV10TCL_VB_ELEMENT_U16, push >> 1);
+ BEGIN_RING_NI(chan, celsius, NV10TCL_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
- OUT_RING((indices[i+1] << 16) | indices[i]);
+ OUT_RING(chan, (indices[i+1] << 16) | indices[i]);
nr_indices -= push;
indices += push;
}
- BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING (chan, 0);
}
diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c
index ee5901e743e..69a6dab866a 100644
--- a/src/gallium/drivers/nv10/nv10_screen.c
+++ b/src/gallium/drivers/nv10/nv10_screen.c
@@ -115,6 +115,9 @@ nv10_screen_destroy(struct pipe_screen *pscreen)
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->celsius);
+ nv04_surface_2d_takedown(&screen->eng2d);
+
+ nouveau_screen_fini(&screen->base);
FREE(pscreen);
}
@@ -177,7 +180,6 @@ nv10_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
return FALSE;
}
- BIND_RING(chan, screen->celsius, 7);
/* 2D engine setup */
screen->eng2d = nv04_surface_2d_init(&screen->base);
diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c
index 9b38219b996..ffc6be3c401 100644
--- a/src/gallium/drivers/nv10/nv10_state.c
+++ b/src/gallium/drivers/nv10/nv10_state.c
@@ -553,9 +553,9 @@ nv10_init_state_functions(struct nv10_context *nv10)
nv10->pipe.delete_blend_state = nv10_blend_state_delete;
nv10->pipe.create_sampler_state = nv10_sampler_state_create;
- nv10->pipe.bind_sampler_states = nv10_sampler_state_bind;
+ nv10->pipe.bind_fragment_sampler_states = nv10_sampler_state_bind;
nv10->pipe.delete_sampler_state = nv10_sampler_state_delete;
- nv10->pipe.set_sampler_textures = nv10_set_sampler_texture;
+ nv10->pipe.set_fragment_sampler_textures = nv10_set_sampler_texture;
nv10->pipe.create_rasterizer_state = nv10_rasterizer_state_create;
nv10->pipe.bind_rasterizer_state = nv10_rasterizer_state_bind;
diff --git a/src/gallium/drivers/nv10/nv10_state.h b/src/gallium/drivers/nv10/nv10_state.h
index 3a3fd0d4f4f..2524ac02e29 100644
--- a/src/gallium/drivers/nv10/nv10_state.h
+++ b/src/gallium/drivers/nv10/nv10_state.h
@@ -126,6 +126,7 @@ struct nv10_depth_stencil_alpha_state {
struct nv10_miptree {
struct pipe_texture base;
+ struct nouveau_bo *bo;
struct pipe_buffer *buffer;
uint total_size;
diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c
index d8691ef9c67..30a596ca604 100644
--- a/src/gallium/drivers/nv10/nv10_state_emit.c
+++ b/src/gallium/drivers/nv10/nv10_state_emit.c
@@ -4,25 +4,32 @@
static void nv10_state_emit_blend(struct nv10_context* nv10)
{
struct nv10_blend_state *b = nv10->blend;
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
- BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 1);
- OUT_RING (b->d_enable);
+ BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 1);
+ OUT_RING (chan, b->d_enable);
- BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 3);
- OUT_RING (b->b_enable);
- OUT_RING (b->b_srcfunc);
- OUT_RING (b->b_dstfunc);
+ BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 3);
+ OUT_RING (chan, b->b_enable);
+ OUT_RING (chan, b->b_srcfunc);
+ OUT_RING (chan, b->b_dstfunc);
- BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1);
- OUT_RING (b->c_mask);
+ BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1);
+ OUT_RING (chan, b->c_mask);
}
static void nv10_state_emit_blend_color(struct nv10_context* nv10)
{
struct pipe_blend_color *c = nv10->blend_color;
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
- BEGIN_RING(celsius, NV10TCL_BLEND_COLOR, 1);
- OUT_RING ((float_to_ubyte(c->color[3]) << 24)|
+ BEGIN_RING(chan, celsius, NV10TCL_BLEND_COLOR, 1);
+ OUT_RING (chan,
+ (float_to_ubyte(c->color[3]) << 24)|
(float_to_ubyte(c->color[0]) << 16)|
(float_to_ubyte(c->color[1]) << 8) |
(float_to_ubyte(c->color[2]) << 0));
@@ -31,60 +38,66 @@ static void nv10_state_emit_blend_color(struct nv10_context* nv10)
static void nv10_state_emit_rast(struct nv10_context* nv10)
{
struct nv10_rasterizer_state *r = nv10->rast;
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
- BEGIN_RING(celsius, NV10TCL_SHADE_MODEL, 2);
- OUT_RING (r->shade_model);
- OUT_RING (r->line_width);
+ BEGIN_RING(chan, celsius, NV10TCL_SHADE_MODEL, 2);
+ OUT_RING (chan, r->shade_model);
+ OUT_RING (chan, r->line_width);
- BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1);
- OUT_RING (r->point_size);
+ BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1);
+ OUT_RING (chan, r->point_size);
- BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
- OUT_RING (r->poly_mode_front);
- OUT_RING (r->poly_mode_back);
+ BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (chan, r->poly_mode_front);
+ OUT_RING (chan, r->poly_mode_back);
- BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2);
- OUT_RING (r->cull_face);
- OUT_RING (r->front_face);
+ BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 2);
+ OUT_RING (chan, r->cull_face);
+ OUT_RING (chan, r->front_face);
- BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2);
- OUT_RING (r->line_smooth_en);
- OUT_RING (r->poly_smooth_en);
+ BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2);
+ OUT_RING (chan, r->line_smooth_en);
+ OUT_RING (chan, r->poly_smooth_en);
- BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1);
- OUT_RING (r->cull_face_en);
+ BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (chan, r->cull_face_en);
}
static void nv10_state_emit_dsa(struct nv10_context* nv10)
{
struct nv10_depth_stencil_alpha_state *d = nv10->dsa;
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
- BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1);
- OUT_RING (d->depth.func);
+ BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1);
+ OUT_RING (chan, d->depth.func);
- BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
- OUT_RING (d->depth.write_enable);
+ BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (chan, d->depth.write_enable);
- BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
- OUT_RING (d->depth.test_enable);
+ BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (chan, d->depth.test_enable);
#if 0
- BEGIN_RING(celsius, NV10TCL_STENCIL_ENABLE, 1);
- OUT_RING (d->stencil.enable);
- BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 7);
- OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7);
+ BEGIN_RING(chan, celsius, NV10TCL_STENCIL_ENABLE, 1);
+ OUT_RING (chan, d->stencil.enable);
+ BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 7);
+ OUT_RINGp (chan, (uint32_t *)&(d->stencil.wmask), 7);
#endif
- BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
- OUT_RING (d->alpha.enabled);
+ BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (chan, d->alpha.enabled);
- BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 1);
- OUT_RING (d->alpha.func);
+ BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 1);
+ OUT_RING (chan, d->alpha.func);
- BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_REF, 1);
- OUT_RING (d->alpha.ref);
+ BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_REF, 1);
+ OUT_RING (chan, d->alpha.ref);
}
static void nv10_state_emit_viewport(struct nv10_context* nv10)
@@ -108,6 +121,10 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10)
int colour_format = 0, zeta_format = 0;
struct nv10_miptree *nv10mt = 0;
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
+
w = fb->cbufs[0]->width;
h = fb->cbufs[0]->height;
colour_format = fb->cbufs[0]->format;
@@ -129,6 +146,9 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10)
rt_format = NV10TCL_RT_FORMAT_TYPE_LINEAR;
switch (colour_format) {
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ rt_format |= NV10TCL_RT_FORMAT_COLOR_X8R8G8B8;
+ break;
case PIPE_FORMAT_A8R8G8B8_UNORM:
case 0:
rt_format |= NV10TCL_RT_FORMAT_COLOR_A8R8G8B8;
@@ -141,11 +161,11 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10)
}
if (zeta) {
- BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1);
- OUT_RING (rt->pitch | (zeta->pitch << 16));
+ BEGIN_RING(chan, celsius, NV10TCL_RT_PITCH, 1);
+ OUT_RING (chan, rt->pitch | (zeta->pitch << 16));
} else {
- BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1);
- OUT_RING (rt->pitch | (rt->pitch << 16));
+ BEGIN_RING(chan, celsius, NV10TCL_RT_PITCH, 1);
+ OUT_RING (chan, rt->pitch | (rt->pitch << 16));
}
nv10mt = (struct nv10_miptree *)rt->base.texture;
@@ -157,13 +177,13 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10)
nv10->zeta = nv10mt->buffer;
}
- BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 3);
- OUT_RING ((w << 16) | 0);
- OUT_RING ((h << 16) | 0);
- OUT_RING (rt_format);
- BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2);
- OUT_RING (((w - 1) << 16) | 0 | 0x08000800);
- OUT_RING (((h - 1) << 16) | 0 | 0x08000800);
+ BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 3);
+ OUT_RING (chan, (w << 16) | 0);
+ OUT_RING (chan, (h << 16) | 0);
+ OUT_RING (chan, rt_format);
+ BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+ OUT_RING (chan, ((w - 1) << 16) | 0 | 0x08000800);
+ OUT_RING (chan, ((h - 1) << 16) | 0 | 0x08000800);
}
static void nv10_vertex_layout(struct nv10_context *nv10)
@@ -198,6 +218,10 @@ static void nv10_vertex_layout(struct nv10_context *nv10)
void
nv10_emit_hw_state(struct nv10_context *nv10)
{
+ struct nv10_screen *screen = nv10->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *celsius = screen->celsius;
+ struct nouveau_bo *rt_bo;
int i;
if (nv10->dirty & NV10_NEW_VERTPROG) {
@@ -266,38 +290,41 @@ nv10_emit_hw_state(struct nv10_context *nv10)
*/
/* Render target */
+ rt_bo = nouveau_bo(nv10->rt[0]);
// XXX figre out who's who for NV10TCL_DMA_* and fill accordingly
-// BEGIN_RING(celsius, NV10TCL_DMA_COLOR0, 1);
-// OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1);
- OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+// BEGIN_RING(chan, celsius, NV10TCL_DMA_COLOR0, 1);
+// OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, celsius, NV10TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
if (nv10->zeta) {
+ struct nouveau_bo *zeta_bo = nouveau_bo(nv10->zeta);
// XXX
-// BEGIN_RING(celsius, NV10TCL_DMA_ZETA, 1);
-// OUT_RELOCo(nv10->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(celsius, NV10TCL_ZETA_OFFSET, 1);
- OUT_RELOCl(nv10->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+// BEGIN_RING(chan, celsius, NV10TCL_DMA_ZETA, 1);
+// OUT_RELOCo(chan, zeta_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, celsius, NV10TCL_ZETA_OFFSET, 1);
+ OUT_RELOCl(chan, zeta_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
/* XXX for when we allocate LMA on nv17 */
-/* BEGIN_RING(celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1);
- OUT_RELOCl(nv10->zeta + lma_offset);*/
+/* BEGIN_RING(chan, celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1);
+ OUT_RELOCl(chan, nouveau_bo(nv10->zeta + lma_offset));*/
}
/* Vertex buffer */
- BEGIN_RING(celsius, NV10TCL_DMA_VTXBUF0, 1);
- OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1);
- OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, celsius, NV10TCL_DMA_VTXBUF0, 1);
+ OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, celsius, NV10TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
/* Texture images */
for (i = 0; i < 2; i++) {
if (!(nv10->fp_samplers & (1 << i)))
continue;
- BEGIN_RING(celsius, NV10TCL_TX_OFFSET(i), 1);
- OUT_RELOCl(nv10->tex[i].buffer, 0, NOUVEAU_BO_VRAM |
+ struct nouveau_bo *bo = nouveau_bo(nv10->tex[i].buffer);
+ BEGIN_RING(chan, celsius, NV10TCL_TX_OFFSET(i), 1);
+ OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- BEGIN_RING(celsius, NV10TCL_TX_FORMAT(i), 1);
- OUT_RELOCd(nv10->tex[i].buffer, nv10->tex[i].format,
+ BEGIN_RING(chan, celsius, NV10TCL_TX_FORMAT(i), 1);
+ OUT_RELOCd(chan, bo, nv10->tex[i].format,
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
NOUVEAU_BO_OR, NV10TCL_TX_FORMAT_DMA0,
NV10TCL_TX_FORMAT_DMA1);
diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c
index 8feb85e4bda..eb04af9782e 100644
--- a/src/gallium/drivers/nv10/nv10_transfer.c
+++ b/src/gallium/drivers/nv10/nv10_transfer.c
@@ -1,7 +1,9 @@
#include <pipe/p_state.h>
#include <pipe/p_defines.h>
#include <pipe/p_inlines.h>
+#include <util/u_format.h>
#include <util/u_memory.h>
+#include <util/u_math.h>
#include <nouveau/nouveau_winsys.h>
#include "nv10_context.h"
#include "nv10_screen.h"
@@ -10,22 +12,19 @@
struct nv10_transfer {
struct pipe_transfer base;
struct pipe_surface *surface;
- bool direct;
+ boolean direct;
};
static void
-nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
struct pipe_texture *template)
{
memset(template, 0, sizeof(struct pipe_texture));
template->target = pt->target;
template->format = pt->format;
- template->width[0] = pt->width[level];
- template->height[0] = pt->height[level];
- template->depth[0] = 1;
- template->block = pt->block;
- template->nblocksx[0] = pt->nblocksx[level];
- template->nblocksy[0] = pt->nblocksx[level];
+ template->width0 = width;
+ template->height0 = height;
+ template->depth0 = 1;
template->last_level = 0;
template->nr_samples = pt->nr_samples;
@@ -48,14 +47,10 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
pipe_texture_reference(&tx->base.texture, pt);
- tx->base.format = pt->format;
tx->base.x = x;
tx->base.y = y;
tx->base.width = w;
tx->base.height = h;
- tx->base.block = pt->block;
- tx->base.nblocksx = pt->nblocksx[level];
- tx->base.nblocksy = pt->nblocksy[level];
tx->base.stride = mt->level[level].pitch;
tx->base.usage = usage;
tx->base.face = face;
@@ -76,7 +71,7 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
tx->direct = false;
- nv10_compatible_transfer_tex(pt, level, &tx_tex_template);
+ nv10_compatible_transfer_tex(pt, w, h, &tx_tex_template);
tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
if (!tx_tex)
@@ -85,6 +80,8 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
}
+ tx->base.stride = ((struct nv10_miptree*)tx_tex)->level[0].pitch;
+
tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
face, level, zslice,
pipe_transfer_buffer_flags(&tx->base));
@@ -110,8 +107,8 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
/* TODO: Check if SIFM can un-swizzle */
nvscreen->eng2d->copy(nvscreen->eng2d,
tx->surface, 0, 0,
- src, 0, 0,
- src->width, src->height);
+ src, x, y,
+ w, h);
pipe_surface_reference(&src, NULL);
}
@@ -135,9 +132,9 @@ nv10_transfer_del(struct pipe_transfer *ptx)
/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
nvscreen->eng2d->copy(nvscreen->eng2d,
- dst, 0, 0,
+ dst, tx->base.x, tx->base.y,
tx->surface, 0, 0,
- dst->width, dst->height);
+ tx->base.width, tx->base.height);
pipe_surface_reference(&dst, NULL);
}
@@ -156,8 +153,10 @@ nv10_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
void *map = pipe_buffer_map(pscreen, mt->buffer,
pipe_transfer_buffer_flags(ptx));
- return map + ns->base.offset +
- ptx->y * ns->pitch + ptx->x * ptx->block.size;
+ if(!tx->direct)
+ return map + ns->base.offset;
+ else
+ return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
}
static void
diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c
index 441a4f75f3f..9180c72c9b0 100644
--- a/src/gallium/drivers/nv10/nv10_vbo.c
+++ b/src/gallium/drivers/nv10/nv10_vbo.c
@@ -9,7 +9,7 @@
#include "nouveau/nouveau_channel.h"
#include "nouveau/nouveau_pushbuf.h"
-boolean nv10_draw_elements( struct pipe_context *pipe,
+void nv10_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count)
@@ -45,6 +45,7 @@ boolean nv10_draw_elements( struct pipe_context *pipe,
}
draw_set_mapped_constant_buffer(draw,
+ PIPE_SHADER_VERTEX,
nv10->constbuf[PIPE_SHADER_VERTEX],
nv10->constbuf_nr[PIPE_SHADER_VERTEX]);
@@ -64,14 +65,12 @@ boolean nv10_draw_elements( struct pipe_context *pipe,
pipe_buffer_unmap(pscreen, indexBuffer);
draw_set_mapped_element_buffer(draw, 0, NULL);
}
-
- return TRUE;
}
-boolean nv10_draw_arrays( struct pipe_context *pipe,
- unsigned prim, unsigned start, unsigned count)
+void nv10_draw_arrays( struct pipe_context *pipe,
+ unsigned prim, unsigned start, unsigned count)
{
- return nv10_draw_elements(pipe, NULL, 0, prim, start, count);
+ nv10_draw_elements(pipe, NULL, 0, prim, start, count);
}
diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c
index b32d0d83ba0..5b80af2d22a 100644
--- a/src/gallium/drivers/nv20/nv20_context.c
+++ b/src/gallium/drivers/nv20/nv20_context.c
@@ -10,10 +10,14 @@ nv20_flush(struct pipe_context *pipe, unsigned flags,
struct pipe_fence_handle **fence)
{
struct nv20_context *nv20 = nv20_context(pipe);
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
draw_flush(nv20->draw);
- FIRE_RING(fence);
+ FIRE_RING(chan);
+ if (fence)
+ *fence = NULL;
}
static void
@@ -31,377 +35,352 @@ static void nv20_init_hwctx(struct nv20_context *nv20)
{
struct nv20_screen *screen = nv20->screen;
struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
int i;
float projectionmatrix[16];
- const boolean is_nv25tcl = (nv20->screen->kelvin->grclass == NV25TCL);
+ const boolean is_nv25tcl = (kelvin->grclass == NV25TCL);
- BEGIN_RING(kelvin, NV20TCL_DMA_NOTIFY, 1);
- OUT_RING (screen->sync->handle);
- BEGIN_RING(kelvin, NV20TCL_DMA_TEXTURE0, 2);
- OUT_RING (chan->vram->handle);
- OUT_RING (chan->gart->handle); /* TEXTURE1 */
- BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 2);
- OUT_RING (chan->vram->handle);
- OUT_RING (chan->vram->handle); /* ZETA */
+ BEGIN_RING(chan, kelvin, NV20TCL_DMA_NOTIFY, 1);
+ OUT_RING (chan, screen->sync->handle);
+ BEGIN_RING(chan, kelvin, NV20TCL_DMA_TEXTURE0, 2);
+ OUT_RING (chan, chan->vram->handle);
+ OUT_RING (chan, chan->gart->handle); /* TEXTURE1 */
+ BEGIN_RING(chan, kelvin, NV20TCL_DMA_COLOR, 2);
+ OUT_RING (chan, chan->vram->handle);
+ OUT_RING (chan, chan->vram->handle); /* ZETA */
- BEGIN_RING(kelvin, NV20TCL_DMA_QUERY, 1);
- OUT_RING (0); /* renouveau: beef0351, unique */
+ BEGIN_RING(chan, kelvin, NV20TCL_DMA_QUERY, 1);
+ OUT_RING (chan, 0); /* renouveau: beef0351, unique */
- BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2);
- OUT_RING (0);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
- BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1);
- OUT_RING ((0xfff << 16) | 0x0);
- BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1);
- OUT_RING ((0xfff << 16) | 0x0);
+ BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1);
+ OUT_RING (chan, (0xfff << 16) | 0x0);
+ BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1);
+ OUT_RING (chan, (0xfff << 16) | 0x0);
for (i = 1; i < NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE; i++) {
- BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1);
+ OUT_RING (chan, 0);
}
- BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1);
+ OUT_RING (chan, 0);
- BEGIN_RING(kelvin, 0x17e0, 3);
- OUT_RINGf (0.0);
- OUT_RINGf (0.0);
- OUT_RINGf (1.0);
+ BEGIN_RING(chan, kelvin, 0x17e0, 3);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 1.0);
if (is_nv25tcl) {
- BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1);
- OUT_RING (NV20TCL_TX_RCOMP_LEQUAL | 0xdb0);
+ BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1);
+ OUT_RING (chan, NV20TCL_TX_RCOMP_LEQUAL | 0xdb0);
} else {
- BEGIN_RING(kelvin, 0x1e68, 1);
- OUT_RING (0x4b800000); /* 16777216.000000 */
- BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1);
- OUT_RING (NV20TCL_TX_RCOMP_LEQUAL);
+ BEGIN_RING(chan, kelvin, 0x1e68, 1);
+ OUT_RING (chan, 0x4b800000); /* 16777216.000000 */
+ BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1);
+ OUT_RING (chan, NV20TCL_TX_RCOMP_LEQUAL);
}
- BEGIN_RING(kelvin, 0x290, 1);
- OUT_RING ((0x10 << 16) | 1);
- BEGIN_RING(kelvin, 0x9fc, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, 0x1d80, 1);
- OUT_RING (1);
- BEGIN_RING(kelvin, 0x9f8, 1);
- OUT_RING (4);
- BEGIN_RING(kelvin, 0x17ec, 3);
- OUT_RINGf (0.0);
- OUT_RINGf (1.0);
- OUT_RINGf (0.0);
+ BEGIN_RING(chan, kelvin, 0x290, 1);
+ OUT_RING (chan, (0x10 << 16) | 1);
+ BEGIN_RING(chan, kelvin, 0x9fc, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, 0x1d80, 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, kelvin, 0x9f8, 1);
+ OUT_RING (chan, 4);
+ BEGIN_RING(chan, kelvin, 0x17ec, 3);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 1.0);
+ OUT_RINGf (chan, 0.0);
if (is_nv25tcl) {
- BEGIN_RING(kelvin, 0x1d88, 1);
- OUT_RING (3);
+ BEGIN_RING(chan, kelvin, 0x1d88, 1);
+ OUT_RING (chan, 3);
- BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY9, 1);
- OUT_RING (chan->vram->handle);
- BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY8, 1);
- OUT_RING (chan->vram->handle);
+ BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY9, 1);
+ OUT_RING (chan, chan->vram->handle);
+ BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY8, 1);
+ OUT_RING (chan, chan->vram->handle);
}
- BEGIN_RING(kelvin, NV20TCL_DMA_FENCE, 1);
- OUT_RING (0); /* renouveau: beef1e10 */
+ BEGIN_RING(chan, kelvin, NV20TCL_DMA_FENCE, 1);
+ OUT_RING (chan, 0); /* renouveau: beef1e10 */
- BEGIN_RING(kelvin, 0x1e98, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, 0x1e98, 1);
+ OUT_RING (chan, 0);
#if 0
if (is_nv25tcl) {
- BEGIN_RING(NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2);
- OUT_RING (NvDmaTT); /* renouveau: beef0202 */
- OUT_RING (NvDmaFB); /* renouveau: beef0201 */
+ BEGIN_RING(chan, NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2);
+ OUT_RING (chan, NvDmaTT); /* renouveau: beef0202 */
+ OUT_RING (chan, NvDmaFB); /* renouveau: beef0201 */
- BEGIN_RING(NvSub3D, NV20TCL_DMA_TEXTURE1, 1);
- OUT_RING (NvDmaTT); /* renouveau: beef0202 */
+ BEGIN_RING(chan, NvSub3D, NV20TCL_DMA_TEXTURE1, 1);
+ OUT_RING (chan, NvDmaTT); /* renouveau: beef0202 */
}
#endif
- BEGIN_RING(kelvin, NV20TCL_NOTIFY, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV20TCL_NOTIFY, 1);
+ OUT_RING (chan, 0);
- BEGIN_RING(kelvin, 0x120, 3);
- OUT_RING (0);
- OUT_RING (1);
- OUT_RING (2);
+ BEGIN_RING(chan, kelvin, 0x120, 3);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 2);
/* error: ILLEGAL_MTHD, PROTECTION_FAULT
- BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4);
- OUT_RINGf (0.0);
- OUT_RINGf (512.0);
- OUT_RINGf (0.0);
- OUT_RINGf (0.0);
+ BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 512.0);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 0.0);
*/
if (is_nv25tcl) {
- BEGIN_RING(kelvin, 0x022c, 2);
- OUT_RING (0x280);
- OUT_RING (0x07d28000);
+ BEGIN_RING(chan, kelvin, 0x022c, 2);
+ OUT_RING (chan, 0x280);
+ OUT_RING (chan, 0x07d28000);
}
/* * illegal method, protection fault
- BEGIN_RING(NvSub3D, 0x1c2c, 1);
- OUT_RING (0); */
+ BEGIN_RING(chan, NvSub3D, 0x1c2c, 1);
+ OUT_RING (chan, 0); */
if (is_nv25tcl) {
- BEGIN_RING(kelvin, 0x1da4, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, 0x1da4, 1);
+ OUT_RING (chan, 0);
}
/* * crashes with illegal method, protection fault
- BEGIN_RING(NvSub3D, 0x1c18, 1);
- OUT_RING (0x200); */
+ BEGIN_RING(chan, NvSub3D, 0x1c18, 1);
+ OUT_RING (chan, 0x200); */
- BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2);
- OUT_RING ((0 << 16) | 0);
- OUT_RING ((0 << 16) | 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2);
+ OUT_RING (chan, (0 << 16) | 0);
+ OUT_RING (chan, (0 << 16) | 0);
/* *** Set state *** */
- BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2);
- OUT_RING (NV20TCL_ALPHA_FUNC_FUNC_ALWAYS);
- OUT_RING (0); /* NV20TCL_ALPHA_FUNC_REF */
+ BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2);
+ OUT_RING (chan, NV20TCL_ALPHA_FUNC_FUNC_ALWAYS);
+ OUT_RING (chan, 0); /* NV20TCL_ALPHA_FUNC_REF */
for (i = 0; i < NV20TCL_TX_ENABLE__SIZE; ++i) {
- BEGIN_RING(kelvin, NV20TCL_TX_ENABLE(i), 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV20TCL_TX_ENABLE(i), 1);
+ OUT_RING (chan, 0);
}
- BEGIN_RING(kelvin, NV20TCL_TX_SHADER_OP, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_RC_IN_ALPHA(0), 4);
- OUT_RING (0x30d410d0);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_RC_OUT_RGB(0), 4);
- OUT_RING (0x00000c00);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_RC_ENABLE, 1);
- OUT_RING (0x00011101);
- BEGIN_RING(kelvin, NV20TCL_RC_FINAL0, 2);
- OUT_RING (0x130e0300);
- OUT_RING (0x0c091c80);
- BEGIN_RING(kelvin, NV20TCL_RC_OUT_ALPHA(0), 4);
- OUT_RING (0x00000c00);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_RC_IN_RGB(0), 4);
- OUT_RING (0x20c400c0);
- OUT_RING (0);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_RC_COLOR0, 2);
- OUT_RING (0);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4);
- OUT_RING (0x035125a0);
- OUT_RING (0);
- OUT_RING (0x40002000);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1);
- OUT_RING (0xffff0000);
-
- BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 4);
- OUT_RING (NV20TCL_BLEND_FUNC_SRC_ONE);
- OUT_RING (NV20TCL_BLEND_FUNC_DST_ZERO);
- OUT_RING (0); /* NV20TCL_BLEND_COLOR */
- OUT_RING (NV20TCL_BLEND_EQUATION_FUNC_ADD);
- BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7);
- OUT_RING (0xff);
- OUT_RING (NV20TCL_STENCIL_FUNC_FUNC_ALWAYS);
- OUT_RING (0); /* NV20TCL_STENCIL_FUNC_REF */
- OUT_RING (0xff); /* NV20TCL_STENCIL_FUNC_MASK */
- OUT_RING (NV20TCL_STENCIL_OP_FAIL_KEEP);
- OUT_RING (NV20TCL_STENCIL_OP_ZFAIL_KEEP);
- OUT_RING (NV20TCL_STENCIL_OP_ZPASS_KEEP);
-
- BEGIN_RING(kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2);
- OUT_RING (0);
- OUT_RING (NV20TCL_COLOR_LOGIC_OP_OP_COPY);
- BEGIN_RING(kelvin, 0x17cc, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_OP, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_ALPHA(0), 4);
+ OUT_RING (chan, 0x30d410d0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_RGB(0), 4);
+ OUT_RING (chan, 0x00000c00);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_RC_ENABLE, 1);
+ OUT_RING (chan, 0x00011101);
+ BEGIN_RING(chan, kelvin, NV20TCL_RC_FINAL0, 2);
+ OUT_RING (chan, 0x130e0300);
+ OUT_RING (chan, 0x0c091c80);
+ BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_ALPHA(0), 4);
+ OUT_RING (chan, 0x00000c00);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_RGB(0), 4);
+ OUT_RING (chan, 0x20c400c0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_RC_COLOR0, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4);
+ OUT_RING (chan, 0x035125a0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0x40002000);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1);
+ OUT_RING (chan, 0xffff0000);
+
+ BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_DITHER_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_SRC, 4);
+ OUT_RING (chan, NV20TCL_BLEND_FUNC_SRC_ONE);
+ OUT_RING (chan, NV20TCL_BLEND_FUNC_DST_ZERO);
+ OUT_RING (chan, 0); /* NV20TCL_BLEND_COLOR */
+ OUT_RING (chan, NV20TCL_BLEND_EQUATION_FUNC_ADD);
+ BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_MASK, 7);
+ OUT_RING (chan, 0xff);
+ OUT_RING (chan, NV20TCL_STENCIL_FUNC_FUNC_ALWAYS);
+ OUT_RING (chan, 0); /* NV20TCL_STENCIL_FUNC_REF */
+ OUT_RING (chan, 0xff); /* NV20TCL_STENCIL_FUNC_MASK */
+ OUT_RING (chan, NV20TCL_STENCIL_OP_FAIL_KEEP);
+ OUT_RING (chan, NV20TCL_STENCIL_OP_ZFAIL_KEEP);
+ OUT_RING (chan, NV20TCL_STENCIL_OP_ZPASS_KEEP);
+
+ BEGIN_RING(chan, kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, NV20TCL_COLOR_LOGIC_OP_OP_COPY);
+ BEGIN_RING(chan, kelvin, 0x17cc, 1);
+ OUT_RING (chan, 0);
if (is_nv25tcl) {
- BEGIN_RING(kelvin, 0x1d84, 1);
- OUT_RING (1);
+ BEGIN_RING(chan, kelvin, 0x1d84, 1);
+ OUT_RING (chan, 1);
}
- BEGIN_RING(kelvin, NV20TCL_LIGHTING_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_LIGHT_CONTROL, 1);
- OUT_RING (0x00020000);
- BEGIN_RING(kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_ENABLED_LIGHTS, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_NORMALIZE_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0),
+ BEGIN_RING(chan, kelvin, NV20TCL_LIGHTING_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_CONTROL, 1);
+ OUT_RING (chan, 0x00020000);
+ BEGIN_RING(chan, kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_ENABLED_LIGHTS, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_NORMALIZE_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0),
NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE);
for (i = 0; i < NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE; ++i) {
- OUT_RING(0xffffffff);
+ OUT_RING(chan, 0xffffffff);
}
- BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
- OUT_RING (0);
- OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */
- OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */
- BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1);
- OUT_RING (NV20TCL_DEPTH_FUNC_LESS);
- BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2);
- OUT_RINGf (0.0);
- OUT_RINGf (0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */
- BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1);
- OUT_RING (1);
+ BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */
+ OUT_RING (chan, 0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */
+ BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_FUNC, 1);
+ OUT_RING (chan, NV20TCL_DEPTH_FUNC_LESS);
+ BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */
+ BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_UNK17D8, 1);
+ OUT_RING (chan, 1);
if (!is_nv25tcl) {
- BEGIN_RING(kelvin, 0x1d84, 1);
- OUT_RING (3);
+ BEGIN_RING(chan, kelvin, 0x1d84, 1);
+ OUT_RING (chan, 3);
}
- BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1);
+ BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1);
if (!is_nv25tcl) {
- OUT_RING (8);
+ OUT_RING (chan, 8);
} else {
- OUT_RINGf (1.0);
+ OUT_RINGf (chan, 1.0);
}
if (!is_nv25tcl) {
- BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2);
- OUT_RING (0);
- OUT_RING (0); /* NV20TCL.POINT_SMOOTH_ENABLE */
+ BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0); /* NV20TCL.POINT_SMOOTH_ENABLE */
} else {
- BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, 0x0a1c, 1);
- OUT_RING (0x800);
+ BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, 0x0a1c, 1);
+ OUT_RING (chan, 0x800);
}
- BEGIN_RING(kelvin, NV20TCL_LINE_WIDTH, 1);
- OUT_RING (8);
- BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2);
- OUT_RING (NV20TCL_POLYGON_MODE_FRONT_FILL);
- OUT_RING (NV20TCL_POLYGON_MODE_BACK_FILL);
- BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2);
- OUT_RING (NV20TCL_CULL_FACE_BACK);
- OUT_RING (NV20TCL_FRONT_FACE_CCW);
- BEGIN_RING(kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 1);
- OUT_RING (NV20TCL_SHADE_MODEL_SMOOTH);
- BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1);
- OUT_RING (0);
- BEGIN_RING(kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE);
+ BEGIN_RING(chan, kelvin, NV20TCL_LINE_WIDTH, 1);
+ OUT_RING (chan, 8);
+ BEGIN_RING(chan, kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (chan, NV20TCL_POLYGON_MODE_FRONT_FILL);
+ OUT_RING (chan, NV20TCL_POLYGON_MODE_BACK_FILL);
+ BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE, 2);
+ OUT_RING (chan, NV20TCL_CULL_FACE_BACK);
+ OUT_RING (chan, NV20TCL_FRONT_FACE_CCW);
+ BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_SHADE_MODEL, 1);
+ OUT_RING (chan, NV20TCL_SHADE_MODEL_SMOOTH);
+ BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE);
for (i=0; i < 4 * NV20TCL_TX_GEN_S__SIZE; ++i) {
- OUT_RING(0);
+ OUT_RING(chan, 0);
}
- BEGIN_RING(kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3);
- OUT_RINGf (1.5);
- OUT_RINGf (-0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */
- OUT_RINGf (0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */
- BEGIN_RING(kelvin, NV20TCL_FOG_MODE, 2);
- OUT_RING (NV20TCL_FOG_MODE_EXP_2);
- OUT_RING (NV20TCL_FOG_COORD_DIST_COORD_FOG);
- BEGIN_RING(kelvin, NV20TCL_FOG_ENABLE, 2);
- OUT_RING (0);
- OUT_RING (0); /* NV20TCL.FOG_COLOR */
- BEGIN_RING(kelvin, NV20TCL_ENGINE, 1);
- OUT_RING (NV20TCL_ENGINE_FIXED);
+ BEGIN_RING(chan, kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3);
+ OUT_RINGf (chan, 1.5);
+ OUT_RINGf (chan, -0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */
+ OUT_RINGf (chan, 0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */
+ BEGIN_RING(chan, kelvin, NV20TCL_FOG_MODE, 2);
+ OUT_RING (chan, NV20TCL_FOG_MODE_EXP_SIGNED);
+ OUT_RING (chan, NV20TCL_FOG_COORD_FOG);
+ BEGIN_RING(chan, kelvin, NV20TCL_FOG_ENABLE, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0); /* NV20TCL.FOG_COLOR */
+ BEGIN_RING(chan, kelvin, NV20TCL_ENGINE, 1);
+ OUT_RING (chan, NV20TCL_ENGINE_FIXED);
for (i = 0; i < NV20TCL_TX_MATRIX_ENABLE__SIZE; ++i) {
- BEGIN_RING(kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1);
+ OUT_RING (chan, 0);
}
- BEGIN_RING(kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15);
- OUT_RINGf(1.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0);
- OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); OUT_RINGf(1.0);
- OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0);
+ BEGIN_RING(chan, kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15);
+ OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 1.0);
+ OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0);
+ OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0);
for (i = 4; i < 16; ++i) {
- OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0);
+ OUT_RINGf(chan, 0.0);
+ OUT_RINGf(chan, 0.0);
+ OUT_RINGf(chan, 0.0);
+ OUT_RINGf(chan, 1.0);
}
- BEGIN_RING(kelvin, NV20TCL_EDGEFLAG_ENABLE, 1);
- OUT_RING (1);
- BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1);
- OUT_RING (0x00010101);
- BEGIN_RING(kelvin, NV20TCL_CLEAR_VALUE, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV20TCL_EDGEFLAG_ENABLE, 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MASK, 1);
+ OUT_RING (chan, 0x00010101);
+ BEGIN_RING(chan, kelvin, NV20TCL_CLEAR_VALUE, 1);
+ OUT_RING (chan, 0);
memset(projectionmatrix, 0, sizeof(projectionmatrix));
projectionmatrix[0*4+0] = 1.0;
projectionmatrix[1*4+1] = 1.0;
projectionmatrix[2*4+2] = 16777215.0;
projectionmatrix[3*4+3] = 1.0;
- BEGIN_RING(kelvin, NV20TCL_PROJECTION_MATRIX(0), 16);
+ BEGIN_RING(chan, kelvin, NV20TCL_PROJECTION_MATRIX(0), 16);
for (i = 0; i < 16; i++) {
- OUT_RINGf (projectionmatrix[i]);
+ OUT_RINGf (chan, projectionmatrix[i]);
}
- BEGIN_RING(kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2);
- OUT_RINGf (0.0);
- OUT_RINGf (16777216.0); /* [0, 1] scaled approx to [0, 2^24] */
-
- BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE0_X, 4);
- OUT_RINGf (0.0); /* x-offset, w/2 + 1.031250 */
- OUT_RINGf (0.0); /* y-offset, h/2 + 0.030762 */
- OUT_RINGf (0.0);
- OUT_RINGf (16777215.0);
-
- BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE1_X, 4);
- OUT_RINGf (0.0); /* no effect?, w/2 */
- OUT_RINGf (0.0); /* no effect?, h/2 */
- OUT_RINGf (16777215.0 * 0.5);
- OUT_RINGf (65535.0);
+ BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2);
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 16777216.0); /* [0, 1] scaled approx to [0, 2^24] */
- FIRE_RING (NULL);
-}
+ BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4);
+ OUT_RINGf (chan, 0.0); /* x-offset, w/2 + 1.031250 */
+ OUT_RINGf (chan, 0.0); /* y-offset, h/2 + 0.030762 */
+ OUT_RINGf (chan, 0.0);
+ OUT_RINGf (chan, 16777215.0);
-static void
-nv20_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
-{
-}
-
-
-static unsigned int
-nv20_is_texture_referenced( struct pipe_context *pipe,
- struct pipe_texture *texture,
- unsigned face, unsigned level)
-{
- /**
- * FIXME: Optimize.
- */
-
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
-static unsigned int
-nv20_is_buffer_referenced( struct pipe_context *pipe,
- struct pipe_buffer *buf)
-{
- /**
- * FIXME: Optimize.
- */
+ BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_SCALE_X, 4);
+ OUT_RINGf (chan, 0.0); /* no effect?, w/2 */
+ OUT_RINGf (chan, 0.0); /* no effect?, h/2 */
+ OUT_RINGf (chan, 16777215.0 * 0.5);
+ OUT_RINGf (chan, 65535.0);
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+ FIRE_RING (chan);
}
struct pipe_context *
@@ -423,14 +402,13 @@ nv20_create(struct pipe_screen *pscreen, unsigned pctx_id)
nv20->pipe.winsys = ws;
nv20->pipe.screen = pscreen;
nv20->pipe.destroy = nv20_destroy;
- nv20->pipe.set_edgeflags = nv20_set_edgeflags;
nv20->pipe.draw_arrays = nv20_draw_arrays;
nv20->pipe.draw_elements = nv20_draw_elements;
nv20->pipe.clear = nv20_clear;
nv20->pipe.flush = nv20_flush;
- nv20->pipe.is_texture_referenced = nv20_is_texture_referenced;
- nv20->pipe.is_buffer_referenced = nv20_is_buffer_referenced;
+ nv20->pipe.is_texture_referenced = nouveau_is_texture_referenced;
+ nv20->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
nv20_init_surface_functions(nv20);
nv20_init_state_functions(nv20);
diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h
index fc932f1f90e..c7dfadaa311 100644
--- a/src/gallium/drivers/nv20/nv20_context.h
+++ b/src/gallium/drivers/nv20/nv20_context.h
@@ -13,10 +13,7 @@
#include "nouveau/nouveau_winsys.h"
#include "nouveau/nouveau_gldefs.h"
-
-#define NOUVEAU_PUSH_CONTEXT(ctx) \
- struct nv20_screen *ctx = nv20->screen
-#include "nouveau/nouveau_push.h"
+#include "nouveau/nouveau_context.h"
#include "nv20_state.h"
@@ -142,9 +139,9 @@ extern void nv20_emit_hw_state(struct nv20_context *nv20);
extern void nv20_state_tex_update(struct nv20_context *nv20);
/* nv20_vbo.c */
-extern boolean nv20_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv20_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
-extern boolean nv20_draw_elements( struct pipe_context *pipe,
+extern void nv20_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count);
diff --git a/src/gallium/drivers/nv20/nv20_fragtex.c b/src/gallium/drivers/nv20/nv20_fragtex.c
index 495a7be9127..dedbec73f39 100644
--- a/src/gallium/drivers/nv20/nv20_fragtex.c
+++ b/src/gallium/drivers/nv20/nv20_fragtex.c
@@ -52,6 +52,9 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit)
struct nv20_miptree *nv20mt = nv20->tex_miptree[unit];
struct pipe_texture *pt = &nv20mt->base;
struct nv20_texture_format *tf;
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
uint32_t txf, txs, txp;
tf = nv20_fragtex_format(pt->format);
@@ -62,9 +65,9 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit)
txf = tf->format << 8;
txf |= (pt->last_level + 1) << 16;
- txf |= log2i(pt->width[0]) << 20;
- txf |= log2i(pt->height[0]) << 24;
- txf |= log2i(pt->depth[0]) << 28;
+ txf |= log2i(pt->width0) << 20;
+ txf |= log2i(pt->height0) << 24;
+ txf |= log2i(pt->depth0) << 28;
txf |= 8;
switch (pt->target) {
@@ -82,15 +85,15 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit)
return;
}
- BEGIN_RING(kelvin, NV10TCL_TX_OFFSET(unit), 8);
- OUT_RELOCl(nv20mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- OUT_RELOCd(nv20mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
- OUT_RING (ps->wrap);
- OUT_RING (0x40000000); /* enable */
- OUT_RING (txs);
- OUT_RING (ps->filt | 0x2000 /* magic */);
- OUT_RING ((pt->width[0] << 16) | pt->height[0]);
- OUT_RING (ps->bcol);
+ BEGIN_RING(chan, kelvin, NV10TCL_TX_OFFSET(unit), 8);
+ OUT_RELOCl(chan, nouveau_bo(nv20mt->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCd(chan, nouveau_bo(nv20mt->buffer),txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+ OUT_RING (chan, ps->wrap);
+ OUT_RING (chan, 0x40000000); /* enable */
+ OUT_RING (chan, txs);
+ OUT_RING (chan, ps->filt | 0x2000 /* magic */);
+ OUT_RING (chan, (pt->width0 << 16) | pt->height0);
+ OUT_RING (chan, ps->bcol);
#endif
}
@@ -99,6 +102,9 @@ nv20_fragtex_bind(struct nv20_context *nv20)
{
#if 0
struct nv20_fragment_program *fp = nv20->fragprog.active;
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
unsigned samplers, unit;
samplers = nv20->fp_samplers & ~fp->samplers;
@@ -106,8 +112,8 @@ nv20_fragtex_bind(struct nv20_context *nv20)
unit = ffs(samplers) - 1;
samplers &= ~(1 << unit);
- BEGIN_RING(kelvin, NV10TCL_TX_ENABLE(unit), 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV10TCL_TX_ENABLE(unit), 1);
+ OUT_RING (chan, 0);
}
samplers = nv20->dirty_samplers & fp->samplers;
diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c
index 185fbf53e0f..8f7538e7f57 100644
--- a/src/gallium/drivers/nv20/nv20_miptree.c
+++ b/src/gallium/drivers/nv20/nv20_miptree.c
@@ -1,15 +1,18 @@
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
#include "nv20_context.h"
#include "nv20_screen.h"
+#include "../nv04/nv04_surface_2d.h"
static void
nv20_miptree_layout(struct nv20_miptree *nv20mt)
{
struct pipe_texture *pt = &nv20mt->base;
- uint width = pt->width[0], height = pt->height[0];
+ uint width = pt->width0;
uint offset = 0;
int nr_faces, l, f;
uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER |
@@ -25,21 +28,15 @@ nv20_miptree_layout(struct nv20_miptree *nv20mt)
}
for (l = 0; l <= pt->last_level; l++) {
- pt->width[l] = width;
- pt->height[l] = height;
- pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
- pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
-
if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
- nv20mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64);
+ nv20mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64);
else
- nv20mt->level[l].pitch = pt->width[l] * pt->block.size;
+ nv20mt->level[l].pitch = util_format_get_stride(pt->format, width);
nv20mt->level[l].image_offset =
CALLOC(nr_faces, sizeof(unsigned));
- width = MAX2(1, width >> 1);
- height = MAX2(1, height >> 1);
+ width = u_minify(width, 1);
}
for (f = 0; f < nr_faces; f++) {
@@ -47,14 +44,14 @@ nv20_miptree_layout(struct nv20_miptree *nv20mt)
nv20mt->level[l].image_offset[f] = offset;
if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) &&
- pt->width[l + 1] > 1 && pt->height[l + 1] > 1)
- offset += align(nv20mt->level[l].pitch * pt->height[l], 64);
+ u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1)
+ offset += align(nv20mt->level[l].pitch * u_minify(pt->height0, l), 64);
else
- offset += nv20mt->level[l].pitch * pt->height[l];
+ offset += nv20mt->level[l].pitch * u_minify(pt->height0, l);
}
nv20mt->level[l].image_offset[f] = offset;
- offset += nv20mt->level[l].pitch * pt->height[l];
+ offset += nv20mt->level[l].pitch * u_minify(pt->height0, l);
}
nv20mt->total_size = offset;
@@ -68,7 +65,7 @@ nv20_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
/* Only supports 2D, non-mipmapped textures for the moment */
if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
- pt->depth[0] != 1)
+ pt->depth0 != 1)
return NULL;
mt = CALLOC_STRUCT(nv20_miptree);
@@ -82,6 +79,7 @@ nv20_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
pipe_buffer_reference(&mt->buffer, pb);
+ mt->bo = nouveau_bo(mt->buffer);
return &mt->base;
}
@@ -100,8 +98,8 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
mt->base.screen = screen;
/* Swizzled textures must be POT */
- if (pt->width[0] & (pt->width[0] - 1) ||
- pt->height[0] & (pt->height[0] - 1))
+ if (pt->width0 & (pt->width0 - 1) ||
+ pt->height0 & (pt->height0 - 1))
mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
else
if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY |
@@ -130,6 +128,12 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
+ /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear.
+ * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy.
+ * This also happens for small mipmaps of large textures. */
+ if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64)
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
nv20_miptree_layout(mt);
mt->buffer = screen->buffer_create(screen, 256, buf_usage, mt->total_size);
@@ -137,6 +141,7 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
FREE(mt);
return NULL;
}
+ mt->bo = nouveau_bo(mt->buffer);
return &mt->base;
}
@@ -167,8 +172,8 @@ nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt,
return NULL;
pipe_texture_reference(&ns->base.texture, pt);
ns->base.format = pt->format;
- ns->base.width = pt->width[level];
- ns->base.height = pt->height[level];
+ ns->base.width = u_minify(pt->width0, level);
+ ns->base.height = u_minify(pt->height0, level);
ns->base.usage = flags;
pipe_reference_init(&ns->base.reference, 1);
ns->base.face = face;
@@ -185,12 +190,27 @@ nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt,
ns->base.offset = nv20mt->level[level].image_offset[0];
}
+ /* create a linear temporary that we can render into if necessary.
+ * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so
+ * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/
+ if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE)
+ return &nv04_surface_wrap_for_render(screen, ((struct nv20_screen*)screen)->eng2d, ns)->base;
+
return &ns->base;
}
static void
nv20_miptree_surface_destroy(struct pipe_surface *ps)
{
+ struct nv04_surface* ns = (struct nv04_surface*)ps;
+ if(ns->backing)
+ {
+ struct nv20_screen* screen = (struct nv20_screen*)ps->texture->screen;
+ if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE)
+ screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
+ nv20_miptree_surface_destroy(&ns->backing->base);
+ }
+
pipe_texture_reference(&ps->texture, NULL);
FREE(ps);
}
diff --git a/src/gallium/drivers/nv20/nv20_prim_vbuf.c b/src/gallium/drivers/nv20/nv20_prim_vbuf.c
index ddfcdb8057a..2e145672da1 100644
--- a/src/gallium/drivers/nv20/nv20_prim_vbuf.c
+++ b/src/gallium/drivers/nv20/nv20_prim_vbuf.c
@@ -81,12 +81,15 @@ nv20_vbuf_render(struct vbuf_render *render)
void nv20_vtxbuf_bind( struct nv20_context* nv20 )
{
#if 0
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
int i;
for(i = 0; i < NV20TCL_VTXBUF_ADDRESS__SIZE; i++) {
- BEGIN_RING(kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1);
- OUT_RING(0/*nv20->vtxbuf*/);
- BEGIN_RING(kelvin, NV20TCL_VTXFMT(i) ,1);
- OUT_RING(0/*XXX*/);
+ BEGIN_RING(chan, kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1);
+ OUT_RING(chan, 0/*nv20->vtxbuf*/);
+ BEGIN_RING(chan, kelvin, NV20TCL_VTXFMT(i) ,1);
+ OUT_RING(chan, 0/*XXX*/);
}
#endif
}
@@ -202,6 +205,9 @@ nv20__vtxhwformat(unsigned stride, unsigned fields, unsigned type)
static unsigned
nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr)
{
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
uint32_t hwfmt = 0;
unsigned fields;
@@ -231,8 +237,8 @@ nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr)
return 0;
}
- BEGIN_RING(kelvin, NV20TCL_VTXFMT(hwattr), 1);
- OUT_RING(hwfmt);
+ BEGIN_RING(chan, kelvin, NV20TCL_VTXFMT(hwattr), 1);
+ OUT_RING(chan, hwfmt);
return fields;
}
@@ -262,6 +268,9 @@ nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render,
uint nr_indices)
{
struct nv20_context *nv20 = nv20_render->nv20;
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
struct vertex_info *vinfo = &nv20->vertex_info;
unsigned nr_fields;
int max_push;
@@ -270,29 +279,29 @@ nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render,
nr_fields = nv20__emit_vertex_array_format(nv20);
- BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1);
- OUT_RING(nv20_render->hwprim);
+ BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING(chan, nv20_render->hwprim);
max_push = 1200 / nr_fields;
while (nr_indices) {
int i;
int push = MIN2(nr_indices, max_push);
- BEGIN_RING_NI(kelvin, NV20TCL_VERTEX_DATA, push * nr_fields);
+ BEGIN_RING_NI(chan, kelvin, NV20TCL_VERTEX_DATA, push * nr_fields);
for (i = 0; i < push; i++) {
/* XXX: fixme to handle other than floats? */
int f = nr_fields;
float *attrv = (float*)&data[indices[i] * vsz];
while (f-- > 0)
- OUT_RINGf(*attrv++);
+ OUT_RINGf(chan, *attrv++);
}
nr_indices -= push;
indices += push;
}
- BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1);
- OUT_RING(NV20TCL_VERTEX_BEGIN_END_STOP);
+ BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING(chan, NV20TCL_VERTEX_BEGIN_END_STOP);
}
static void
@@ -301,20 +310,23 @@ nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render,
uint nr_indices)
{
struct nv20_context *nv20 = nv20_render->nv20;
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
int push, i;
NOUVEAU_ERR("nv20__draw_pbuffer: this path is broken.\n");
- BEGIN_RING(kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1);
- OUT_RELOCl(nv20_render->pbuffer, 0,
+ BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1);
+ OUT_RELOCl(chan, nouveau_bo(nv20_render->pbuffer), 0,
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
- OUT_RING(nv20_render->hwprim);
+ BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING(chan, nv20_render->hwprim);
if (nr_indices & 1) {
- BEGIN_RING(kelvin, NV10TCL_VB_ELEMENT_U32, 1);
- OUT_RING (indices[0]);
+ BEGIN_RING(chan, kelvin, NV10TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (chan, indices[0]);
indices++; nr_indices--;
}
@@ -322,16 +334,16 @@ nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render,
// XXX too big/small ? check the size
push = MIN2(nr_indices, 1200 * 2);
- BEGIN_RING_NI(kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1);
+ BEGIN_RING_NI(chan, kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
- OUT_RING((indices[i+1] << 16) | indices[i]);
+ OUT_RING(chan, (indices[i+1] << 16) | indices[i]);
nr_indices -= push;
indices += push;
}
- BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+ OUT_RING (chan, 0);
}
static void
diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c
index 4eeacd1afd5..d091335063b 100644
--- a/src/gallium/drivers/nv20/nv20_screen.c
+++ b/src/gallium/drivers/nv20/nv20_screen.c
@@ -115,6 +115,9 @@ nv20_screen_destroy(struct pipe_screen *pscreen)
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->kelvin);
+ nv04_surface_2d_takedown(&screen->eng2d);
+
+ nouveau_screen_fini(&screen->base);
FREE(pscreen);
}
@@ -173,7 +176,6 @@ nv20_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
return FALSE;
}
- BIND_RING(chan, screen->kelvin, 7);
/* 2D engine setup */
screen->eng2d = nv04_surface_2d_init(&screen->base);
diff --git a/src/gallium/drivers/nv20/nv20_state.c b/src/gallium/drivers/nv20/nv20_state.c
index ed4084980f2..3a82e63423d 100644
--- a/src/gallium/drivers/nv20/nv20_state.c
+++ b/src/gallium/drivers/nv20/nv20_state.c
@@ -546,9 +546,9 @@ nv20_init_state_functions(struct nv20_context *nv20)
nv20->pipe.delete_blend_state = nv20_blend_state_delete;
nv20->pipe.create_sampler_state = nv20_sampler_state_create;
- nv20->pipe.bind_sampler_states = nv20_sampler_state_bind;
+ nv20->pipe.bind_fragment_sampler_states = nv20_sampler_state_bind;
nv20->pipe.delete_sampler_state = nv20_sampler_state_delete;
- nv20->pipe.set_sampler_textures = nv20_set_sampler_texture;
+ nv20->pipe.set_fragment_sampler_textures = nv20_set_sampler_texture;
nv20->pipe.create_rasterizer_state = nv20_rasterizer_state_create;
nv20->pipe.bind_rasterizer_state = nv20_rasterizer_state_bind;
diff --git a/src/gallium/drivers/nv20/nv20_state.h b/src/gallium/drivers/nv20/nv20_state.h
index 34f402fdcbf..dde41065685 100644
--- a/src/gallium/drivers/nv20/nv20_state.h
+++ b/src/gallium/drivers/nv20/nv20_state.h
@@ -126,6 +126,7 @@ struct nv20_depth_stencil_alpha_state {
struct nv20_miptree {
struct pipe_texture base;
+ struct nouveau_bo *bo;
struct pipe_buffer *buffer;
uint total_size;
diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c
index 4042f46d053..6bbd1fdae9d 100644
--- a/src/gallium/drivers/nv20/nv20_state_emit.c
+++ b/src/gallium/drivers/nv20/nv20_state_emit.c
@@ -5,27 +5,34 @@
static void nv20_state_emit_blend(struct nv20_context* nv20)
{
struct nv20_blend_state *b = nv20->blend;
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
- BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1);
- OUT_RING (b->d_enable);
+ BEGIN_RING(chan, kelvin, NV20TCL_DITHER_ENABLE, 1);
+ OUT_RING (chan, b->d_enable);
- BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1);
- OUT_RING (b->b_enable);
+ BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1);
+ OUT_RING (chan, b->b_enable);
- BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 2);
- OUT_RING (b->b_srcfunc);
- OUT_RING (b->b_dstfunc);
+ BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_SRC, 2);
+ OUT_RING (chan, b->b_srcfunc);
+ OUT_RING (chan, b->b_dstfunc);
- BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1);
- OUT_RING (b->c_mask);
+ BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MASK, 1);
+ OUT_RING (chan, b->c_mask);
}
static void nv20_state_emit_blend_color(struct nv20_context* nv20)
{
struct pipe_blend_color *c = nv20->blend_color;
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
- BEGIN_RING(kelvin, NV20TCL_BLEND_COLOR, 1);
- OUT_RING ((float_to_ubyte(c->color[3]) << 24)|
+ BEGIN_RING(chan, kelvin, NV20TCL_BLEND_COLOR, 1);
+ OUT_RING (chan,
+ (float_to_ubyte(c->color[3]) << 24)|
(float_to_ubyte(c->color[0]) << 16)|
(float_to_ubyte(c->color[1]) << 8) |
(float_to_ubyte(c->color[2]) << 0));
@@ -34,63 +41,69 @@ static void nv20_state_emit_blend_color(struct nv20_context* nv20)
static void nv20_state_emit_rast(struct nv20_context* nv20)
{
struct nv20_rasterizer_state *r = nv20->rast;
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
- BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 2);
- OUT_RING (r->shade_model);
- OUT_RING (r->line_width);
+ BEGIN_RING(chan, kelvin, NV20TCL_SHADE_MODEL, 2);
+ OUT_RING (chan, r->shade_model);
+ OUT_RING (chan, r->line_width);
- BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1);
- OUT_RING (r->point_size);
+ BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1);
+ OUT_RING (chan, r->point_size);
- BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2);
- OUT_RING (r->poly_mode_front);
- OUT_RING (r->poly_mode_back);
+ BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_MODE_FRONT, 2);
+ OUT_RING (chan, r->poly_mode_front);
+ OUT_RING (chan, r->poly_mode_back);
- BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2);
- OUT_RING (r->cull_face);
- OUT_RING (r->front_face);
+ BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE, 2);
+ OUT_RING (chan, r->cull_face);
+ OUT_RING (chan, r->front_face);
- BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2);
- OUT_RING (r->line_smooth_en);
- OUT_RING (r->poly_smooth_en);
+ BEGIN_RING(chan, kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2);
+ OUT_RING (chan, r->line_smooth_en);
+ OUT_RING (chan, r->poly_smooth_en);
- BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1);
- OUT_RING (r->cull_face_en);
+ BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE_ENABLE, 1);
+ OUT_RING (chan, r->cull_face_en);
}
static void nv20_state_emit_dsa(struct nv20_context* nv20)
{
struct nv20_depth_stencil_alpha_state *d = nv20->dsa;
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
- BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1);
- OUT_RING (d->depth.func);
+ BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_FUNC, 1);
+ OUT_RING (chan, d->depth.func);
- BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1);
- OUT_RING (d->depth.write_enable);
+ BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1);
+ OUT_RING (chan, d->depth.write_enable);
- BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1);
- OUT_RING (d->depth.test_enable);
+ BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1);
+ OUT_RING (chan, d->depth.test_enable);
- BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1);
- OUT_RING (1);
+ BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_UNK17D8, 1);
+ OUT_RING (chan, 1);
#if 0
- BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1);
- OUT_RING (d->stencil.enable);
- BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7);
- OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7);
+ BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_ENABLE, 1);
+ OUT_RING (chan, d->stencil.enable);
+ BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_MASK, 7);
+ OUT_RINGp (chan, (uint32_t *)&(d->stencil.wmask), 7);
#endif
- BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1);
- OUT_RING (d->alpha.enabled);
+ BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1);
+ OUT_RING (chan, d->alpha.enabled);
- BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1);
- OUT_RING (d->alpha.func);
+ BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1);
+ OUT_RING (chan, d->alpha.func);
- BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_REF, 1);
- OUT_RING (d->alpha.ref);
+ BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_REF, 1);
+ OUT_RING (chan, d->alpha.ref);
}
static void nv20_state_emit_viewport(struct nv20_context* nv20)
@@ -101,9 +114,13 @@ static void nv20_state_emit_scissor(struct nv20_context* nv20)
{
/* NV20TCL_SCISSOR_* is probably a software method */
/* struct pipe_scissor_state *s = nv20->scissor;
- BEGIN_RING(kelvin, NV20TCL_SCISSOR_HORIZ, 2);
- OUT_RING (((s->maxx - s->minx) << 16) | s->minx);
- OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
+
+ BEGIN_RING(chan, kelvin, NV20TCL_SCISSOR_HORIZ, 2);
+ OUT_RING (chan, ((s->maxx - s->minx) << 16) | s->minx);
+ OUT_RING (chan, ((s->maxy - s->miny) << 16) | s->miny);*/
}
static void nv20_state_emit_framebuffer(struct nv20_context* nv20)
@@ -113,6 +130,9 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20)
uint32_t rt_format, w, h;
int colour_format = 0, zeta_format = 0;
struct nv20_miptree *nv20mt = 0;
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
w = fb->cbufs[0]->width;
h = fb->cbufs[0]->height;
@@ -135,6 +155,9 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20)
rt_format = NV20TCL_RT_FORMAT_TYPE_LINEAR | 0x20;
switch (colour_format) {
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ rt_format |= NV20TCL_RT_FORMAT_COLOR_X8R8G8B8;
+ break;
case PIPE_FORMAT_A8R8G8B8_UNORM:
case 0:
rt_format |= NV20TCL_RT_FORMAT_COLOR_A8R8G8B8;
@@ -147,11 +170,11 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20)
}
if (zeta) {
- BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1);
- OUT_RING (rt->pitch | (zeta->pitch << 16));
+ BEGIN_RING(chan, kelvin, NV20TCL_RT_PITCH, 1);
+ OUT_RING (chan, rt->pitch | (zeta->pitch << 16));
} else {
- BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1);
- OUT_RING (rt->pitch | (rt->pitch << 16));
+ BEGIN_RING(chan, kelvin, NV20TCL_RT_PITCH, 1);
+ OUT_RING (chan, rt->pitch | (rt->pitch << 16));
}
nv20mt = (struct nv20_miptree *)rt->base.texture;
@@ -163,13 +186,13 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20)
nv20->zeta = nv20mt->buffer;
}
- BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 3);
- OUT_RING ((w << 16) | 0);
- OUT_RING ((h << 16) | 0); /*NV20TCL_RT_VERT */
- OUT_RING (rt_format); /* NV20TCL_RT_FORMAT */
- BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2);
- OUT_RING (((w - 1) << 16) | 0);
- OUT_RING (((h - 1) << 16) | 0);
+ BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 3);
+ OUT_RING (chan, (w << 16) | 0);
+ OUT_RING (chan, (h << 16) | 0); /*NV20TCL_RT_VERT */
+ OUT_RING (chan, rt_format); /* NV20TCL_RT_FORMAT */
+ BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+ OUT_RING (chan, ((w - 1) << 16) | 0);
+ OUT_RING (chan, ((h - 1) << 16) | 0);
}
static void nv20_vertex_layout(struct nv20_context *nv20)
@@ -225,7 +248,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
}
/* always do position */ {
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_POSITION, 0);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_POSITION, 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src);
vinfo->hwfmt[0] |= (1 << 0);
}
@@ -234,19 +257,19 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
for (i = 4; i < 6; i++) {
if (!generics[i])
continue;
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
vinfo->hwfmt[0] |= (1 << (i - 3));
}
if (colors[0]) {
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 0);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 0);
draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
vinfo->hwfmt[0] |= (1 << 3);
}
if (colors[1]) {
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 1);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 1);
draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
vinfo->hwfmt[0] |= (1 << 4);
}
@@ -255,7 +278,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
for (i = 6; i < 10; i++) {
if (!generics[i])
continue;
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
vinfo->hwfmt[0] |= (1 << (i - 1));
}
@@ -264,7 +287,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
for (i = 0; i < 4; i++) {
if (!generics[i])
continue;
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
vinfo->hwfmt[0] |= (1 << (i + 9));
}
@@ -273,13 +296,13 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
for (i = 10; i < 12; i++) {
if (!generics[i])
continue;
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i);
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
vinfo->hwfmt[0] |= (1 << (i + 3));
}
if (fog) {
- src = draw_find_vs_output(dc, TGSI_SEMANTIC_FOG, 0);
+ src = draw_find_shader_output(dc, TGSI_SEMANTIC_FOG, 0);
draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
vinfo->hwfmt[0] |= (1 << 15);
}
@@ -290,6 +313,10 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
void
nv20_emit_hw_state(struct nv20_context *nv20)
{
+ struct nv20_screen *screen = nv20->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *kelvin = screen->kelvin;
+ struct nouveau_bo *rt_bo;
int i;
if (nv20->dirty & NV20_NEW_VERTPROG) {
@@ -358,36 +385,39 @@ nv20_emit_hw_state(struct nv20_context *nv20)
*/
/* Render target */
- BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 1);
- OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1);
- OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ rt_bo = nouveau_bo(nv20->rt[0]);
+ BEGIN_RING(chan, kelvin, NV20TCL_DMA_COLOR, 1);
+ OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, kelvin, NV20TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
if (nv20->zeta) {
- BEGIN_RING(kelvin, NV20TCL_DMA_ZETA, 1);
- OUT_RELOCo(nv20->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(kelvin, NV20TCL_ZETA_OFFSET, 1);
- OUT_RELOCl(nv20->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ struct nouveau_bo *zeta_bo = nouveau_bo(nv20->zeta);
+ BEGIN_RING(chan, kelvin, NV20TCL_DMA_ZETA, 1);
+ OUT_RELOCo(chan, zeta_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, kelvin, NV20TCL_ZETA_OFFSET, 1);
+ OUT_RELOCl(chan, zeta_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
/* XXX for when we allocate LMA on nv17 */
-/* BEGIN_RING(kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1);
- OUT_RELOCl(nv20->zeta + lma_offset);*/
+/* BEGIN_RING(chan, kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1);
+ OUT_RELOCl(chan, nouveau_bo(nv20->zeta + lma_offset));*/
}
/* Vertex buffer */
- BEGIN_RING(kelvin, NV20TCL_DMA_VTXBUF0, 1);
- OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
- BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1);
- OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, kelvin, NV20TCL_DMA_VTXBUF0, 1);
+ OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, kelvin, NV20TCL_COLOR_OFFSET, 1);
+ OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
/* Texture images */
for (i = 0; i < 2; i++) {
if (!(nv20->fp_samplers & (1 << i)))
continue;
- BEGIN_RING(kelvin, NV20TCL_TX_OFFSET(i), 1);
- OUT_RELOCl(nv20->tex[i].buffer, 0, NOUVEAU_BO_VRAM |
+ struct nouveau_bo *bo = nouveau_bo(nv20->tex[i].buffer);
+ BEGIN_RING(chan, kelvin, NV20TCL_TX_OFFSET(i), 1);
+ OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- BEGIN_RING(kelvin, NV20TCL_TX_FORMAT(i), 1);
- OUT_RELOCd(nv20->tex[i].buffer, nv20->tex[i].format,
+ BEGIN_RING(chan, kelvin, NV20TCL_TX_FORMAT(i), 1);
+ OUT_RELOCd(chan, bo, nv20->tex[i].format,
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
NOUVEAU_BO_OR, NV20TCL_TX_FORMAT_DMA0,
NV20TCL_TX_FORMAT_DMA1);
diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c
index 81b4f1a9177..699773e8e6f 100644
--- a/src/gallium/drivers/nv20/nv20_transfer.c
+++ b/src/gallium/drivers/nv20/nv20_transfer.c
@@ -1,7 +1,9 @@
#include <pipe/p_state.h>
#include <pipe/p_defines.h>
#include <pipe/p_inlines.h>
+#include <util/u_format.h>
#include <util/u_memory.h>
+#include <util/u_math.h>
#include <nouveau/nouveau_winsys.h>
#include "nv20_context.h"
#include "nv20_screen.h"
@@ -10,22 +12,19 @@
struct nv20_transfer {
struct pipe_transfer base;
struct pipe_surface *surface;
- bool direct;
+ boolean direct;
};
static void
-nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
struct pipe_texture *template)
{
memset(template, 0, sizeof(struct pipe_texture));
template->target = pt->target;
template->format = pt->format;
- template->width[0] = pt->width[level];
- template->height[0] = pt->height[level];
- template->depth[0] = 1;
- template->block = pt->block;
- template->nblocksx[0] = pt->nblocksx[level];
- template->nblocksy[0] = pt->nblocksx[level];
+ template->width0 = width;
+ template->height0 = height;
+ template->depth0 = 1;
template->last_level = 0;
template->nr_samples = pt->nr_samples;
@@ -48,14 +47,10 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
pipe_texture_reference(&tx->base.texture, pt);
- tx->base.format = pt->format;
tx->base.x = x;
tx->base.y = y;
tx->base.width = w;
tx->base.height = h;
- tx->base.block = pt->block;
- tx->base.nblocksx = pt->nblocksx[level];
- tx->base.nblocksy = pt->nblocksy[level];
tx->base.stride = mt->level[level].pitch;
tx->base.usage = usage;
tx->base.face = face;
@@ -76,7 +71,7 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
tx->direct = false;
- nv20_compatible_transfer_tex(pt, level, &tx_tex_template);
+ nv20_compatible_transfer_tex(pt, w, h, &tx_tex_template);
tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
if (!tx_tex)
@@ -85,6 +80,8 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
}
+ tx->base.stride = ((struct nv20_miptree*)tx_tex)->level[0].pitch;
+
tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
face, level, zslice,
pipe_transfer_buffer_flags(&tx->base));
@@ -110,8 +107,8 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
/* TODO: Check if SIFM can un-swizzle */
nvscreen->eng2d->copy(nvscreen->eng2d,
tx->surface, 0, 0,
- src, 0, 0,
- src->width, src->height);
+ src, x, y,
+ w, h);
pipe_surface_reference(&src, NULL);
}
@@ -131,13 +128,13 @@ nv20_transfer_del(struct pipe_transfer *ptx)
dst = pscreen->get_tex_surface(pscreen, ptx->texture,
ptx->face, ptx->level, ptx->zslice,
- PIPE_BUFFER_USAGE_GPU_WRITE);
+ PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER);
/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
nvscreen->eng2d->copy(nvscreen->eng2d,
- dst, 0, 0,
+ dst, tx->base.x, tx->base.y,
tx->surface, 0, 0,
- dst->width, dst->height);
+ tx->base.width, tx->base.height);
pipe_surface_reference(&dst, NULL);
}
@@ -156,8 +153,10 @@ nv20_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
void *map = pipe_buffer_map(pscreen, mt->buffer,
pipe_transfer_buffer_flags(ptx));
- return map + ns->base.offset +
- ptx->y * ns->pitch + ptx->x * ptx->block.size;
+ if(!tx->direct)
+ return map + ns->base.offset;
+ else
+ return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
}
static void
diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c
index 84d7db6c5e2..52991a0d856 100644
--- a/src/gallium/drivers/nv20/nv20_vbo.c
+++ b/src/gallium/drivers/nv20/nv20_vbo.c
@@ -9,7 +9,7 @@
#include "nouveau/nouveau_channel.h"
#include "nouveau/nouveau_pushbuf.h"
-boolean nv20_draw_elements( struct pipe_context *pipe,
+void nv20_draw_elements( struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned prim, unsigned start, unsigned count)
@@ -45,7 +45,7 @@ boolean nv20_draw_elements( struct pipe_context *pipe,
draw_set_mapped_element_buffer(draw, 0, NULL);
}
- draw_set_mapped_constant_buffer(draw,
+ draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX,
nv20->constbuf[PIPE_SHADER_VERTEX],
nv20->constbuf_nr[PIPE_SHADER_VERTEX]);
@@ -67,13 +67,12 @@ boolean nv20_draw_elements( struct pipe_context *pipe,
}
draw_flush(nv20->draw);
- return TRUE;
}
-boolean nv20_draw_arrays( struct pipe_context *pipe,
+void nv20_draw_arrays( struct pipe_context *pipe,
unsigned prim, unsigned start, unsigned count)
{
- return nv20_draw_elements(pipe, NULL, 0, prim, start, count);
+ nv20_draw_elements(pipe, NULL, 0, prim, start, count);
}
diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c
index 388245ecb04..7886c2af7e6 100644
--- a/src/gallium/drivers/nv20/nv20_vertprog.c
+++ b/src/gallium/drivers/nv20/nv20_vertprog.c
@@ -253,32 +253,32 @@ static INLINE struct nv20_sreg
tgsi_src(struct nv20_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
struct nv20_sreg src;
- switch (fsrc->SrcRegister.File) {
+ switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
- src = nv20_sr(NV30SR_INPUT, fsrc->SrcRegister.Index);
+ src = nv20_sr(NV30SR_INPUT, fsrc->Register.Index);
break;
case TGSI_FILE_CONSTANT:
- src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
+ src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0);
break;
case TGSI_FILE_IMMEDIATE:
- src = vpc->imm[fsrc->SrcRegister.Index];
+ src = vpc->imm[fsrc->Register.Index];
break;
case TGSI_FILE_TEMPORARY:
- if (vpc->high_temp < fsrc->SrcRegister.Index)
- vpc->high_temp = fsrc->SrcRegister.Index;
- src = nv20_sr(NV30SR_TEMP, fsrc->SrcRegister.Index);
+ if (vpc->high_temp < fsrc->Register.Index)
+ vpc->high_temp = fsrc->Register.Index;
+ src = nv20_sr(NV30SR_TEMP, fsrc->Register.Index);
break;
default:
NOUVEAU_ERR("bad src file\n");
break;
}
- src.abs = fsrc->SrcRegisterExtMod.Absolute;
- src.negate = fsrc->SrcRegister.Negate;
- src.swz[0] = fsrc->SrcRegister.SwizzleX;
- src.swz[1] = fsrc->SrcRegister.SwizzleY;
- src.swz[2] = fsrc->SrcRegister.SwizzleZ;
- src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ src.abs = fsrc->Register.Absolute;
+ src.negate = fsrc->Register.Negate;
+ src.swz[0] = fsrc->Register.SwizzleX;
+ src.swz[1] = fsrc->Register.SwizzleY;
+ src.swz[2] = fsrc->Register.SwizzleZ;
+ src.swz[3] = fsrc->Register.SwizzleW;
return src;
}
@@ -286,14 +286,14 @@ static INLINE struct nv20_sreg
tgsi_dst(struct nv20_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
struct nv20_sreg dst;
- switch (fdst->DstRegister.File) {
+ switch (fdst->Register.File) {
case TGSI_FILE_OUTPUT:
dst = nv20_sr(NV30SR_OUTPUT,
- vpc->output_map[fdst->DstRegister.Index]);
+ vpc->output_map[fdst->Register.Index]);
break;
case TGSI_FILE_TEMPORARY:
- dst = nv20_sr(NV30SR_TEMP, fdst->DstRegister.Index);
+ dst = nv20_sr(NV30SR_TEMP, fdst->Register.Index);
if (vpc->high_temp < dst.index)
vpc->high_temp = dst.index;
break;
@@ -334,8 +334,8 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc,
for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fsrc;
- fsrc = &finst->FullSrcRegisters[i];
- if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ fsrc = &finst->Src[i];
+ if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
src[i] = tgsi_src(vpc, fsrc);
}
}
@@ -343,11 +343,11 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc,
for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fsrc;
- fsrc = &finst->FullSrcRegisters[i];
- switch (fsrc->SrcRegister.File) {
+ fsrc = &finst->Src[i];
+ switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
- if (ai == -1 || ai == fsrc->SrcRegister.Index) {
- ai = fsrc->SrcRegister.Index;
+ if (ai == -1 || ai == fsrc->Register.Index) {
+ ai = fsrc->Register.Index;
src[i] = tgsi_src(vpc, fsrc);
} else {
src[i] = temp(vpc);
@@ -360,8 +360,8 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc,
*/
case TGSI_FILE_CONSTANT:
case TGSI_FILE_IMMEDIATE:
- if (ci == -1 || ci == fsrc->SrcRegister.Index) {
- ci = fsrc->SrcRegister.Index;
+ if (ci == -1 || ci == fsrc->Register.Index) {
+ ci = fsrc->Register.Index;
src[i] = tgsi_src(vpc, fsrc);
} else {
src[i] = temp(vpc);
@@ -378,8 +378,8 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc,
}
}
- dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
- mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+ dst = tgsi_dst(vpc, &finst->Dst[0]);
+ mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
switch (finst->Instruction.Opcode) {
case TGSI_OPCODE_ABS:
@@ -490,15 +490,15 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc,
{
int hw;
- switch (fdec->Semantic.SemanticName) {
+ switch (fdec->Semantic.Name) {
case TGSI_SEMANTIC_POSITION:
hw = NV30_VP_INST_DEST_POS;
break;
case TGSI_SEMANTIC_COLOR:
- if (fdec->Semantic.SemanticIndex == 0) {
+ if (fdec->Semantic.Index == 0) {
hw = NV30_VP_INST_DEST_COL0;
} else
- if (fdec->Semantic.SemanticIndex == 1) {
+ if (fdec->Semantic.Index == 1) {
hw = NV30_VP_INST_DEST_COL1;
} else {
NOUVEAU_ERR("bad colour semantic index\n");
@@ -506,10 +506,10 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc,
}
break;
case TGSI_SEMANTIC_BCOLOR:
- if (fdec->Semantic.SemanticIndex == 0) {
+ if (fdec->Semantic.Index == 0) {
hw = NV30_VP_INST_DEST_BFC0;
} else
- if (fdec->Semantic.SemanticIndex == 1) {
+ if (fdec->Semantic.Index == 1) {
hw = NV30_VP_INST_DEST_BFC1;
} else {
NOUVEAU_ERR("bad bcolour semantic index\n");
@@ -523,19 +523,22 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc,
hw = NV30_VP_INST_DEST_PSZ;
break;
case TGSI_SEMANTIC_GENERIC:
- if (fdec->Semantic.SemanticIndex <= 7) {
- hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
+ if (fdec->Semantic.Index <= 7) {
+ hw = NV30_VP_INST_DEST_TC(fdec->Semantic.Index);
} else {
NOUVEAU_ERR("bad generic semantic index\n");
return FALSE;
}
break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ NOUVEAU_ERR("cannot handle edgeflag output\n");
+ return FALSE;
default:
NOUVEAU_ERR("bad output semantic\n");
return FALSE;
}
- vpc->output_map[fdec->DeclarationRange.First] = hw;
+ vpc->output_map[fdec->Range.First] = hw;
return TRUE;
}
diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c
index f827bdc78b1..54572e9ab3a 100644
--- a/src/gallium/drivers/nv30/nv30_context.c
+++ b/src/gallium/drivers/nv30/nv30_context.c
@@ -10,50 +10,38 @@ nv30_flush(struct pipe_context *pipe, unsigned flags,
struct pipe_fence_handle **fence)
{
struct nv30_context *nv30 = nv30_context(pipe);
-
+ struct nv30_screen *screen = nv30->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *rankine = screen->rankine;
+
if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
- BEGIN_RING(rankine, 0x1fd8, 1);
- OUT_RING (2);
- BEGIN_RING(rankine, 0x1fd8, 1);
- OUT_RING (1);
+ BEGIN_RING(chan, rankine, 0x1fd8, 1);
+ OUT_RING (chan, 2);
+ BEGIN_RING(chan, rankine, 0x1fd8, 1);
+ OUT_RING (chan, 1);
}
- FIRE_RING(fence);
+ FIRE_RING(chan);
+ if (fence)
+ *fence = NULL;
}
static void
nv30_destroy(struct pipe_context *pipe)
{
struct nv30_context *nv30 = nv30_context(pipe);
+ unsigned i;
+
+ for (i = 0; i < NV30_STATE_MAX; i++) {
+ if (nv30->state.hw[i])
+ so_ref(NULL, &nv30->state.hw[i]);
+ }
if (nv30->draw)
draw_destroy(nv30->draw);
FREE(nv30);
}
-static unsigned int
-nv30_is_texture_referenced( struct pipe_context *pipe,
- struct pipe_texture *texture,
- unsigned face, unsigned level)
-{
- /**
- * FIXME: Optimize.
- */
-
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
-static unsigned int
-nv30_is_buffer_referenced( struct pipe_context *pipe,
- struct pipe_buffer *buf)
-{
- /**
- * FIXME: Optimize.
- */
-
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
struct pipe_context *
nv30_create(struct pipe_screen *pscreen, unsigned pctx_id)
{
@@ -78,8 +66,11 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id)
nv30->pipe.clear = nv30_clear;
nv30->pipe.flush = nv30_flush;
- nv30->pipe.is_texture_referenced = nv30_is_texture_referenced;
- nv30->pipe.is_buffer_referenced = nv30_is_buffer_referenced;
+ nv30->pipe.is_texture_referenced = nouveau_is_texture_referenced;
+ nv30->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
+
+ screen->base.channel->user_private = nv30;
+ screen->base.channel->flush_notify = nv30_state_flush_notify;
nv30_init_query_functions(nv30);
nv30_init_surface_functions(nv30);
@@ -95,4 +86,3 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id)
return &nv30->pipe;
}
-
diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h
index 4229c0a0e14..e59449287b5 100644
--- a/src/gallium/drivers/nv30/nv30_context.h
+++ b/src/gallium/drivers/nv30/nv30_context.h
@@ -13,10 +13,7 @@
#include "nouveau/nouveau_winsys.h"
#include "nouveau/nouveau_gldefs.h"
-
-#define NOUVEAU_PUSH_CONTEXT(ctx) \
- struct nv30_screen *ctx = nv30->screen
-#include "nouveau/nouveau_push.h"
+#include "nouveau/nouveau_context.h"
#include "nouveau/nouveau_stateobj.h"
#include "nv30_state.h"
@@ -143,7 +140,6 @@ struct nv30_context {
unsigned vtxbuf_nr;
struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
unsigned vtxelt_nr;
- const unsigned *edgeflags;
};
static INLINE struct nv30_context *
@@ -183,6 +179,7 @@ extern void nv30_fragtex_bind(struct nv30_context *);
/* nv30_state.c and friends */
extern boolean nv30_state_validate(struct nv30_context *nv30);
extern void nv30_state_emit(struct nv30_context *nv30);
+extern void nv30_state_flush_notify(struct nouveau_channel *chan);
extern struct nv30_state_entry nv30_state_rasterizer;
extern struct nv30_state_entry nv30_state_scissor;
extern struct nv30_state_entry nv30_state_stipple;
@@ -197,9 +194,9 @@ extern struct nv30_state_entry nv30_state_fragtex;
extern struct nv30_state_entry nv30_state_vbo;
/* nv30_vbo.c */
-extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv30_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
-extern boolean nv30_draw_elements(struct pipe_context *pipe,
+extern void nv30_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start,
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index a48ba9782b3..2d565cb631b 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -4,6 +4,7 @@
#include "pipe/p_inlines.h"
#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
@@ -131,7 +132,7 @@ emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src)
sizeof(uint32_t) * 4);
}
- sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);
+ sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);
break;
case NV30SR_NONE:
sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
@@ -236,20 +237,20 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
{
struct nv30_sreg src;
- switch (fsrc->SrcRegister.File) {
+ switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
src = nv30_sr(NV30SR_INPUT,
- fpc->attrib_map[fsrc->SrcRegister.Index]);
+ fpc->attrib_map[fsrc->Register.Index]);
break;
case TGSI_FILE_CONSTANT:
- src = constant(fpc, fsrc->SrcRegister.Index, NULL);
+ src = constant(fpc, fsrc->Register.Index, NULL);
break;
case TGSI_FILE_IMMEDIATE:
- assert(fsrc->SrcRegister.Index < fpc->nr_imm);
- src = fpc->imm[fsrc->SrcRegister.Index];
+ assert(fsrc->Register.Index < fpc->nr_imm);
+ src = fpc->imm[fsrc->Register.Index];
break;
case TGSI_FILE_TEMPORARY:
- src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index + 1);
+ src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index + 1);
if (fpc->high_temp < src.index)
fpc->high_temp = src.index;
break;
@@ -257,7 +258,7 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
* Luckily fragprog results are just temp regs..
*/
case TGSI_FILE_OUTPUT:
- if (fsrc->SrcRegister.Index == fpc->colour_id)
+ if (fsrc->Register.Index == fpc->colour_id)
return nv30_sr(NV30SR_OUTPUT, 0);
else
return nv30_sr(NV30SR_OUTPUT, 1);
@@ -267,12 +268,12 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
break;
}
- src.abs = fsrc->SrcRegisterExtMod.Absolute;
- src.negate = fsrc->SrcRegister.Negate;
- src.swz[0] = fsrc->SrcRegister.SwizzleX;
- src.swz[1] = fsrc->SrcRegister.SwizzleY;
- src.swz[2] = fsrc->SrcRegister.SwizzleZ;
- src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ src.abs = fsrc->Register.Absolute;
+ src.negate = fsrc->Register.Negate;
+ src.swz[0] = fsrc->Register.SwizzleX;
+ src.swz[1] = fsrc->Register.SwizzleY;
+ src.swz[2] = fsrc->Register.SwizzleZ;
+ src.swz[3] = fsrc->Register.SwizzleW;
return src;
}
@@ -280,22 +281,22 @@ static INLINE struct nv30_sreg
tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
int idx;
- switch (fdst->DstRegister.File) {
+ switch (fdst->Register.File) {
case TGSI_FILE_OUTPUT:
- if (fdst->DstRegister.Index == fpc->colour_id)
+ if (fdst->Register.Index == fpc->colour_id)
return nv30_sr(NV30SR_OUTPUT, 0);
else
return nv30_sr(NV30SR_OUTPUT, 1);
break;
case TGSI_FILE_TEMPORARY:
- idx = fdst->DstRegister.Index + 1;
+ idx = fdst->Register.Index + 1;
if (fpc->high_temp < idx)
fpc->high_temp = idx;
return nv30_sr(NV30SR_TEMP, idx);
case TGSI_FILE_NULL:
return nv30_sr(NV30SR_NONE, 0);
default:
- NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File);
+ NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
return nv30_sr(NV30SR_NONE, 0);
}
}
@@ -318,38 +319,23 @@ src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc,
{
const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
struct nv30_sreg tgsi = tgsi_src(fpc, fsrc);
- uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
- uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
- fsrc->SrcRegisterExtSwz.NegateY,
- fsrc->SrcRegisterExtSwz.NegateZ,
- fsrc->SrcRegisterExtSwz.NegateW };
+ uint mask = 0;
uint c;
for (c = 0; c < 4; c++) {
- switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
- case TGSI_EXTSWIZZLE_X:
- case TGSI_EXTSWIZZLE_Y:
- case TGSI_EXTSWIZZLE_Z:
- case TGSI_EXTSWIZZLE_W:
+ switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) {
+ case TGSI_SWIZZLE_X:
+ case TGSI_SWIZZLE_Y:
+ case TGSI_SWIZZLE_Z:
+ case TGSI_SWIZZLE_W:
mask |= (1 << c);
break;
- case TGSI_EXTSWIZZLE_ZERO:
- zero_mask |= (1 << c);
- tgsi.swz[c] = SWZ_X;
- break;
- case TGSI_EXTSWIZZLE_ONE:
- one_mask |= (1 << c);
- tgsi.swz[c] = SWZ_X;
- break;
default:
assert(0);
}
-
- if (!tgsi.negate && neg[c])
- neg_mask |= (1 << c);
}
- if (mask == MASK_ALL && !neg_mask)
+ if (mask == MASK_ALL)
return TRUE;
*src = temp(fpc);
@@ -357,18 +343,6 @@ src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc,
if (mask)
arith(fpc, 0, MOV, *src, mask, tgsi, none, none);
- if (zero_mask)
- arith(fpc, 0, SFL, *src, zero_mask, *src, none, none);
-
- if (one_mask)
- arith(fpc, 0, STR, *src, one_mask, *src, none, none);
-
- if (neg_mask) {
- struct nv30_sreg one = temp(fpc);
- arith(fpc, 0, STR, one, neg_mask, one, none, none);
- arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none);
- }
-
return FALSE;
}
@@ -389,8 +363,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fsrc;
- fsrc = &finst->FullSrcRegisters[i];
- if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ fsrc = &finst->Src[i];
+ if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
src[i] = tgsi_src(fpc, fsrc);
}
}
@@ -398,9 +372,9 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fsrc;
- fsrc = &finst->FullSrcRegisters[i];
+ fsrc = &finst->Src[i];
- switch (fsrc->SrcRegister.File) {
+ switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
case TGSI_FILE_CONSTANT:
case TGSI_FILE_TEMPORARY:
@@ -411,14 +385,14 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
break;
}
- switch (fsrc->SrcRegister.File) {
+ switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
- if (ai == -1 || ai == fsrc->SrcRegister.Index) {
- ai = fsrc->SrcRegister.Index;
+ if (ai == -1 || ai == fsrc->Register.Index) {
+ ai = fsrc->Register.Index;
src[i] = tgsi_src(fpc, fsrc);
} else {
NOUVEAU_MSG("extra src attr %d\n",
- fsrc->SrcRegister.Index);
+ fsrc->Register.Index);
src[i] = temp(fpc);
arith(fpc, 0, MOV, src[i], MASK_ALL,
tgsi_src(fpc, fsrc), none, none);
@@ -426,8 +400,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
break;
case TGSI_FILE_CONSTANT:
case TGSI_FILE_IMMEDIATE:
- if (ci == -1 || ci == fsrc->SrcRegister.Index) {
- ci = fsrc->SrcRegister.Index;
+ if (ci == -1 || ci == fsrc->Register.Index) {
+ ci = fsrc->Register.Index;
src[i] = tgsi_src(fpc, fsrc);
} else {
src[i] = temp(fpc);
@@ -439,7 +413,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
/* handled above */
break;
case TGSI_FILE_SAMPLER:
- unit = fsrc->SrcRegister.Index;
+ unit = fsrc->Register.Index;
break;
case TGSI_FILE_OUTPUT:
break;
@@ -449,8 +423,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
}
}
- dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]);
- mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+ dst = tgsi_dst(fpc, &finst->Dst[0]);
+ mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
switch (finst->Instruction.Opcode) {
@@ -461,10 +435,11 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_CMP:
- tmp = temp(fpc);
- arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+ tmp = nv30_sr(NV30SR_NONE, 0);
tmp.cc_update = 1;
arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
+ dst.cc_test = NV30_VP_INST_COND_GE;
+ arith(fpc, sat, MOV, dst, mask, src[2], none, none);
dst.cc_test = NV30_VP_INST_COND_LT;
arith(fpc, sat, MOV, dst, mask, src[1], none, none);
break;
@@ -527,12 +502,6 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
case TGSI_OPCODE_MUL:
arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
break;
- case TGSI_OPCODE_NOISE1:
- case TGSI_OPCODE_NOISE2:
- case TGSI_OPCODE_NOISE3:
- case TGSI_OPCODE_NOISE4:
- arith(fpc, sat, SFL, dst, mask, none, none, none);
- break;
case TGSI_OPCODE_POW:
arith(fpc, sat, POW, dst, mask, src[0], src[1], none);
break;
@@ -549,13 +518,28 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
break;
case TGSI_OPCODE_SCS:
- if (mask & MASK_X) {
- arith(fpc, sat, COS, dst, MASK_X,
- swz(src[0], X, X, X, X), none, none);
+ /* avoid overwriting the source */
+ if(src[0].swz[SWZ_X] != SWZ_X)
+ {
+ if (mask & MASK_X) {
+ arith(fpc, sat, COS, dst, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & MASK_Y) {
+ arith(fpc, sat, SIN, dst, MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
}
- if (mask & MASK_Y) {
- arith(fpc, sat, SIN, dst, MASK_Y,
- swz(src[0], X, X, X, X), none, none);
+ else
+ {
+ if (mask & MASK_Y) {
+ arith(fpc, sat, SIN, dst, MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & MASK_X) {
+ arith(fpc, sat, COS, dst, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
}
break;
case TGSI_OPCODE_SIN:
@@ -604,15 +588,15 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
{
int hw;
- switch (fdec->Semantic.SemanticName) {
+ switch (fdec->Semantic.Name) {
case TGSI_SEMANTIC_POSITION:
hw = NV30_FP_OP_INPUT_SRC_POSITION;
break;
case TGSI_SEMANTIC_COLOR:
- if (fdec->Semantic.SemanticIndex == 0) {
+ if (fdec->Semantic.Index == 0) {
hw = NV30_FP_OP_INPUT_SRC_COL0;
} else
- if (fdec->Semantic.SemanticIndex == 1) {
+ if (fdec->Semantic.Index == 1) {
hw = NV30_FP_OP_INPUT_SRC_COL1;
} else {
NOUVEAU_ERR("bad colour semantic index\n");
@@ -623,9 +607,9 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
hw = NV30_FP_OP_INPUT_SRC_FOGC;
break;
case TGSI_SEMANTIC_GENERIC:
- if (fdec->Semantic.SemanticIndex <= 7) {
+ if (fdec->Semantic.Index <= 7) {
hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic.
- SemanticIndex);
+ Index);
} else {
NOUVEAU_ERR("bad generic semantic index\n");
return FALSE;
@@ -636,7 +620,7 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
return FALSE;
}
- fpc->attrib_map[fdec->DeclarationRange.First] = hw;
+ fpc->attrib_map[fdec->Range.First] = hw;
return TRUE;
}
@@ -644,12 +628,12 @@ static boolean
nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc,
const struct tgsi_full_declaration *fdec)
{
- switch (fdec->Semantic.SemanticName) {
+ switch (fdec->Semantic.Name) {
case TGSI_SEMANTIC_POSITION:
- fpc->depth_id = fdec->DeclarationRange.First;
+ fpc->depth_id = fdec->Range.First;
break;
case TGSI_SEMANTIC_COLOR:
- fpc->colour_id = fdec->DeclarationRange.First;
+ fpc->colour_id = fdec->Range.First;
break;
default:
NOUVEAU_ERR("bad output semantic\n");
@@ -685,9 +669,9 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc)
goto out_err;
break;
/*case TGSI_FILE_TEMPORARY:
- if (fdec->DeclarationRange.Last > high_temp) {
+ if (fdec->Range.Last > high_temp) {
high_temp =
- fdec->DeclarationRange.Last;
+ fdec->Range.Last;
}
break;*/
default:
@@ -699,7 +683,7 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc)
{
struct tgsi_full_immediate *imm;
float vals[4];
-
+
imm = &p.FullToken.FullImmediate;
assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
assert(fpc->nr_imm < MAX_IMM);
@@ -787,7 +771,7 @@ nv30_fragprog_translate(struct nv30_context *nv30,
fp->insn[fpc->inst_offset + 1] = 0x00000000;
fp->insn[fpc->inst_offset + 2] = 0x00000000;
fp->insn[fpc->inst_offset + 3] = 0x00000000;
-
+
fp->translated = TRUE;
fp->on_hw = FALSE;
out_err:
@@ -853,7 +837,7 @@ nv30_fragprog_validate(struct nv30_context *nv30)
fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4);
nv30_fragprog_upload(nv30, fp);
- so = so_new(8, 1);
+ so = so_new(4, 4, 1);
so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1);
so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
@@ -871,7 +855,7 @@ nv30_fragprog_validate(struct nv30_context *nv30)
update_constants:
if (fp->nr_consts) {
float *map;
-
+
map = pipe_buffer_map(pscreen, constbuf,
PIPE_BUFFER_USAGE_CPU_READ);
for (i = 0; i < fp->nr_consts; i++) {
@@ -902,6 +886,12 @@ void
nv30_fragprog_destroy(struct nv30_context *nv30,
struct nv30_fragment_program *fp)
{
+ if (fp->buffer)
+ pipe_buffer_reference(&fp->buffer, NULL);
+
+ if (fp->so)
+ so_ref(NULL, &fp->so);
+
if (fp->insn_len)
FREE(fp->insn);
}
diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c
index 822e1d8defe..98935678911 100644
--- a/src/gallium/drivers/nv30/nv30_fragtex.c
+++ b/src/gallium/drivers/nv30/nv30_fragtex.c
@@ -21,6 +21,7 @@ struct nv30_texture_format {
static struct nv30_texture_format
nv30_texture_formats[] = {
+ _(X8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W),
_(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W),
_(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W),
_(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W),
@@ -29,8 +30,8 @@ nv30_texture_formats[] = {
_(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X),
_(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X),
_(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y),
-// _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X),
-// _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X),
+ _(Z16_UNORM , R5G6B5 , S1, S1, S1, ONE, X, X, X, X),
+ _(Z24S8_UNORM , A8R8G8B8, S1, S1, S1, ONE, X, X, X, X),
_(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W),
_(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W),
_(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W),
@@ -69,13 +70,13 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit)
tf = nv30_fragtex_format(pt->format);
if (!tf)
- assert(0);
+ return NULL;
txf = tf->format;
txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0);
- txf |= log2i(pt->width[0]) << 20;
- txf |= log2i(pt->height[0]) << 24;
- txf |= log2i(pt->depth[0]) << 28;
+ txf |= log2i(pt->width0) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT;
+ txf |= log2i(pt->height0) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT;
+ txf |= log2i(pt->depth0) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT;
txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000;
switch (pt->target) {
@@ -105,7 +106,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit)
txs = tf->swizzle;
- so = so_new(16, 2);
+ so = so_new(1, 8, 2);
so_method(so, nv30->screen->rankine, NV34TCL_TX_OFFSET(unit), 8);
so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR,
@@ -114,8 +115,8 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit)
so_data (so, NV34TCL_TX_ENABLE_ENABLE | ps->en);
so_data (so, txs);
so_data (so, ps->filt | 0x2000 /*voodoo*/);
- so_data (so, (pt->width[0] << NV34TCL_TX_NPOT_SIZE_W_SHIFT) |
- pt->height[0]);
+ so_data (so, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) |
+ pt->height0);
so_data (so, ps->bcol);
return so;
@@ -134,7 +135,7 @@ nv30_fragtex_validate(struct nv30_context *nv30)
unit = ffs(samplers) - 1;
samplers &= ~(1 << unit);
- so = so_new(2, 0);
+ so = so_new(1, 1, 0);
so_method(so, nv30->screen->rankine, NV34TCL_TX_ENABLE(unit), 1);
so_data (so, 0);
so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]);
diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c
index 7f8054de733..8fbba38e78f 100644
--- a/src/gallium/drivers/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nv30/nv30_miptree.c
@@ -1,14 +1,17 @@
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
#include "nv30_context.h"
+#include "../nv04/nv04_surface_2d.h"
static void
nv30_miptree_layout(struct nv30_miptree *nv30mt)
{
struct pipe_texture *pt = &nv30mt->base;
- uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0];
+ uint width = pt->width0;
uint offset = 0;
int nr_faces, l, f;
uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER |
@@ -21,29 +24,21 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt)
nr_faces = 6;
} else
if (pt->target == PIPE_TEXTURE_3D) {
- nr_faces = pt->depth[0];
+ nr_faces = pt->depth0;
} else {
nr_faces = 1;
}
for (l = 0; l <= pt->last_level; l++) {
- pt->width[l] = width;
- pt->height[l] = height;
- pt->depth[l] = depth;
- pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
- pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
-
if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
- nv30mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64);
+ nv30mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64);
else
- nv30mt->level[l].pitch = pt->width[l] * pt->block.size;
+ nv30mt->level[l].pitch = util_format_get_stride(pt->format, width);
nv30mt->level[l].image_offset =
CALLOC(nr_faces, sizeof(unsigned));
- width = MAX2(1, width >> 1);
- height = MAX2(1, height >> 1);
- depth = MAX2(1, depth >> 1);
+ width = u_minify(width, 1);
}
for (f = 0; f < nr_faces; f++) {
@@ -51,14 +46,14 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt)
nv30mt->level[l].image_offset[f] = offset;
if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) &&
- pt->width[l + 1] > 1 && pt->height[l + 1] > 1)
- offset += align(nv30mt->level[l].pitch * pt->height[l], 64);
+ u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1)
+ offset += align(nv30mt->level[l].pitch * u_minify(pt->height0, l), 64);
else
- offset += nv30mt->level[l].pitch * pt->height[l];
+ offset += nv30mt->level[l].pitch * u_minify(pt->height0, l);
}
nv30mt->level[l].image_offset[f] = offset;
- offset += nv30mt->level[l].pitch * pt->height[l];
+ offset += nv30mt->level[l].pitch * u_minify(pt->height0, l);
}
nv30mt->total_size = offset;
@@ -79,8 +74,8 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
mt->base.screen = pscreen;
/* Swizzled textures must be POT */
- if (pt->width[0] & (pt->width[0] - 1) ||
- pt->height[0] & (pt->height[0] - 1))
+ if (pt->width0 & (pt->width0 - 1) ||
+ pt->height0 & (pt->height0 - 1))
mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
else
if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY |
@@ -96,6 +91,11 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
case PIPE_FORMAT_A8R8G8B8_UNORM:
case PIPE_FORMAT_X8R8G8B8_UNORM:
case PIPE_FORMAT_R16_SNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_A8L8_UNORM:
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
{
if (debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE))
mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
@@ -109,6 +109,12 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
+ /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear.
+ * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy.
+ * This also happens for small mipmaps of large textures. */
+ if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64)
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
nv30_miptree_layout(mt);
mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage,
@@ -117,6 +123,7 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
FREE(mt);
return NULL;
}
+ mt->bo = nouveau_bo(mt->buffer);
return &mt->base;
}
@@ -129,7 +136,7 @@ nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
/* Only supports 2D, non-mipmapped textures for the moment */
if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
- pt->depth[0] != 1)
+ pt->depth0 != 1)
return NULL;
mt = CALLOC_STRUCT(nv30_miptree);
@@ -142,7 +149,11 @@ nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
mt->level[0].pitch = stride[0];
mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
+ /* Assume whoever created this buffer expects it to be linear for now */
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
pipe_buffer_reference(&mt->buffer, pb);
+ mt->bo = nouveau_bo(mt->buffer);
return &mt->base;
}
@@ -174,8 +185,8 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
pipe_texture_reference(&ns->base.texture, pt);
ns->base.format = pt->format;
- ns->base.width = pt->width[level];
- ns->base.height = pt->height[level];
+ ns->base.width = u_minify(pt->width0, level);
+ ns->base.height = u_minify(pt->height0, level);
ns->base.usage = flags;
pipe_reference_init(&ns->base.reference, 1);
ns->base.face = face;
@@ -192,12 +203,27 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
ns->base.offset = nv30mt->level[level].image_offset[0];
}
+ /* create a linear temporary that we can render into if necessary.
+ * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so
+ * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/
+ if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE)
+ return &nv04_surface_wrap_for_render(pscreen, ((struct nv30_screen*)pscreen)->eng2d, ns)->base;
+
return &ns->base;
}
static void
nv30_miptree_surface_del(struct pipe_surface *ps)
{
+ struct nv04_surface* ns = (struct nv04_surface*)ps;
+ if(ns->backing)
+ {
+ struct nv30_screen* screen = (struct nv30_screen*)ps->texture->screen;
+ if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE)
+ screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
+ nv30_miptree_surface_del(&ns->backing->base);
+ }
+
pipe_texture_reference(&ps->texture, NULL);
FREE(ps);
}
diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c
index 1d1c8a484e1..e27e9ccbf60 100644
--- a/src/gallium/drivers/nv30/nv30_query.c
+++ b/src/gallium/drivers/nv30/nv30_query.c
@@ -41,6 +41,9 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nv30_query *q = nv30_query(pq);
+ struct nv30_screen *screen = nv30->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *rankine = screen->rankine;
assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER);
@@ -57,10 +60,10 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
assert(0);
nouveau_notifier_reset(nv30->screen->query, q->object->start);
- BEGIN_RING(rankine, NV34TCL_QUERY_RESET, 1);
- OUT_RING (1);
- BEGIN_RING(rankine, NV34TCL_QUERY_UNK17CC, 1);
- OUT_RING (1);
+ BEGIN_RING(chan, rankine, NV34TCL_QUERY_RESET, 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, rankine, NV34TCL_QUERY_UNK17CC, 1);
+ OUT_RING (chan, 1);
q->ready = FALSE;
}
@@ -69,12 +72,15 @@ static void
nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_screen *screen = nv30->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *rankine = screen->rankine;
struct nv30_query *q = nv30_query(pq);
- BEGIN_RING(rankine, NV34TCL_QUERY_GET, 1);
- OUT_RING ((0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) |
+ BEGIN_RING(chan, rankine, NV34TCL_QUERY_GET, 1);
+ OUT_RING (chan, (0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) |
((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT));
- FIRE_RING(NULL);
+ FIRE_RING(chan);
}
static boolean
diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c
index 41af38450b5..9ed48178dc2 100644
--- a/src/gallium/drivers/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nv30/nv30_screen.c
@@ -10,6 +10,22 @@
#define NV34TCL_CHIPSET_3X_MASK 0x00000010
#define NV35TCL_CHIPSET_3X_MASK 0x000001e0
+/* FIXME: It seems I should not include directly ../../winsys/drm/nouveau/drm/nouveau_drm_api.h
+ * to get the pointer to the context front buffer, so I copied nouveau_winsys here.
+ * nv30_screen_surface_format_supported() can then use it to enforce creating fbo
+ * with same number of bits everywhere.
+ */
+struct nouveau_winsys {
+ struct pipe_winsys base;
+
+ struct pipe_screen *pscreen;
+
+ unsigned nr_pctx;
+ struct pipe_context **pctx;
+
+ struct pipe_surface *front;
+};
+
static int
nv30_screen_get_param(struct pipe_screen *pscreen, int param)
{
@@ -83,6 +99,8 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen,
enum pipe_texture_target target,
unsigned tex_usage, unsigned geom_flags)
{
+ struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front;
+
if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
switch (format) {
case PIPE_FORMAT_A8R8G8B8_UNORM:
@@ -96,7 +114,11 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen,
switch (format) {
case PIPE_FORMAT_Z24S8_UNORM:
case PIPE_FORMAT_Z24X8_UNORM:
+ return TRUE;
case PIPE_FORMAT_Z16_UNORM:
+ if (front) {
+ return (front->format == PIPE_FORMAT_R5G6B5_UNORM);
+ }
return TRUE;
default:
break;
@@ -134,6 +156,12 @@ static void
nv30_screen_destroy(struct pipe_screen *pscreen)
{
struct nv30_screen *screen = nv30_screen(pscreen);
+ unsigned i;
+
+ for (i = 0; i < NV30_STATE_MAX; i++) {
+ if (screen->state[i])
+ so_ref(NULL, &screen->state[i]);
+ }
nouveau_resource_free(&screen->vp_exec_heap);
nouveau_resource_free(&screen->vp_data_heap);
@@ -141,6 +169,9 @@ nv30_screen_destroy(struct pipe_screen *pscreen)
nouveau_notifier_free(&screen->query);
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->rankine);
+ nv04_surface_2d_takedown(&screen->eng2d);
+
+ nouveau_screen_fini(&screen->base);
FREE(pscreen);
}
@@ -202,7 +233,6 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
return FALSE;
}
- BIND_RING(chan, screen->rankine, 7);
/* 2D engine setup */
screen->eng2d = nv04_surface_2d_init(&screen->base);
@@ -239,7 +269,7 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
}
/* Static rankine initialisation */
- so = so_new(128, 0);
+ so = so_new(36, 60, 0);
so_method(so, screen->rankine, NV34TCL_DMA_NOTIFY, 1);
so_data (so, screen->sync->handle);
so_method(so, screen->rankine, NV34TCL_DMA_TEXTURE0, 2);
diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c
index b91e972c123..a80dfb04880 100644
--- a/src/gallium/drivers/nv30/nv30_state.c
+++ b/src/gallium/drivers/nv30/nv30_state.c
@@ -14,7 +14,7 @@ nv30_blend_state_create(struct pipe_context *pipe,
struct nv30_context *nv30 = nv30_context(pipe);
struct nouveau_grobj *rankine = nv30->screen->rankine;
struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso));
- struct nouveau_stateobj *so = so_new(16, 0);
+ struct nouveau_stateobj *so = so_new(5, 8, 0);
if (cso->blend_enable) {
so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3);
@@ -300,7 +300,7 @@ nv30_rasterizer_state_create(struct pipe_context *pipe,
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nv30_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso));
- struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_stateobj *so = so_new(9, 19, 0);
struct nouveau_grobj *rankine = nv30->screen->rankine;
/*XXX: ignored:
@@ -435,7 +435,7 @@ nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe,
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nv30_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso));
- struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_stateobj *so = so_new(5, 21, 0);
struct nouveau_grobj *rankine = nv30->screen->rankine;
so_method(so, rankine, NV34TCL_DEPTH_FUNC, 3);
@@ -672,16 +672,6 @@ nv30_set_vertex_elements(struct pipe_context *pipe, unsigned count,
/*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
}
-static void
-nv30_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
-{
- struct nv30_context *nv30 = nv30_context(pipe);
-
- nv30->edgeflags = bitfield;
- nv30->dirty |= NV30_NEW_ARRAYS;
- /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
-}
-
void
nv30_init_state_functions(struct nv30_context *nv30)
{
@@ -690,9 +680,9 @@ nv30_init_state_functions(struct nv30_context *nv30)
nv30->pipe.delete_blend_state = nv30_blend_state_delete;
nv30->pipe.create_sampler_state = nv30_sampler_state_create;
- nv30->pipe.bind_sampler_states = nv30_sampler_state_bind;
+ nv30->pipe.bind_fragment_sampler_states = nv30_sampler_state_bind;
nv30->pipe.delete_sampler_state = nv30_sampler_state_delete;
- nv30->pipe.set_sampler_textures = nv30_set_sampler_texture;
+ nv30->pipe.set_fragment_sampler_textures = nv30_set_sampler_texture;
nv30->pipe.create_rasterizer_state = nv30_rasterizer_state_create;
nv30->pipe.bind_rasterizer_state = nv30_rasterizer_state_bind;
@@ -721,7 +711,6 @@ nv30_init_state_functions(struct nv30_context *nv30)
nv30->pipe.set_scissor_state = nv30_set_scissor_state;
nv30->pipe.set_viewport_state = nv30_set_viewport_state;
- nv30->pipe.set_edgeflags = nv30_set_edgeflags;
nv30->pipe.set_vertex_buffers = nv30_set_vertex_buffers;
nv30->pipe.set_vertex_elements = nv30_set_vertex_elements;
}
diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h
index e6f23bf1667..e42e872de75 100644
--- a/src/gallium/drivers/nv30/nv30_state.h
+++ b/src/gallium/drivers/nv30/nv30_state.h
@@ -72,6 +72,7 @@ struct nv30_fragment_program {
struct nv30_miptree {
struct pipe_texture base;
+ struct nouveau_bo *bo;
struct pipe_buffer *buffer;
uint total_size;
diff --git a/src/gallium/drivers/nv30/nv30_state_blend.c b/src/gallium/drivers/nv30/nv30_state_blend.c
index 64cf9ae93a0..c36d58c040c 100644
--- a/src/gallium/drivers/nv30/nv30_state_blend.c
+++ b/src/gallium/drivers/nv30/nv30_state_blend.c
@@ -18,7 +18,7 @@ struct nv30_state_entry nv30_state_blend = {
static boolean
nv30_state_blend_colour_validate(struct nv30_context *nv30)
{
- struct nouveau_stateobj *so = so_new(2, 0);
+ struct nouveau_stateobj *so = so_new(1, 1, 0);
struct pipe_blend_color *bcol = &nv30->blend_colour;
so_method(so, nv30->screen->rankine, NV34TCL_BLEND_COLOR, 1);
diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c
index 621b8846c8e..ac52d946f02 100644
--- a/src/gallium/drivers/nv30/nv30_state_emit.c
+++ b/src/gallium/drivers/nv30/nv30_state_emit.c
@@ -41,7 +41,7 @@ nv30_state_emit(struct nv30_context *nv30)
struct nouveau_channel *chan = nv30->screen->base.channel;
struct nv30_state *state = &nv30->state;
struct nv30_screen *screen = nv30->screen;
- unsigned i, samplers;
+ unsigned i;
uint64_t states;
if (nv30->pctx_id != screen->cur_pctx) {
@@ -63,6 +63,14 @@ nv30_state_emit(struct nv30_context *nv30)
}
state->dirty = 0;
+}
+
+void
+nv30_state_flush_notify(struct nouveau_channel *chan)
+{
+ struct nv30_context *nv30 = chan->user_private;
+ struct nv30_state *state = &nv30->state;
+ unsigned i, samplers;
so_emit_reloc_markers(chan, state->hw[NV30_STATE_FB]);
for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c
index 44b6a74715a..2ed2ea55e84 100644
--- a/src/gallium/drivers/nv30/nv30_state_fb.c
+++ b/src/gallium/drivers/nv30/nv30_state_fb.c
@@ -8,15 +8,15 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30)
struct nouveau_channel *chan = nv30->screen->base.channel;
struct nouveau_grobj *rankine = nv30->screen->rankine;
struct nv04_surface *rt[2], *zeta = NULL;
- uint32_t rt_enable, rt_format;
- int i, colour_format = 0, zeta_format = 0;
- struct nouveau_stateobj *so = so_new(64, 10);
+ uint32_t rt_enable = 0, rt_format = 0;
+ int i, colour_format = 0, zeta_format = 0, depth_only = 0;
+ struct nouveau_stateobj *so = so_new(12, 18, 10);
unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
unsigned w = fb->width;
unsigned h = fb->height;
struct nv30_miptree *nv30mt;
+ int colour_bits = 32, zeta_bits = 32;
- rt_enable = 0;
for (i = 0; i < fb->nr_cbufs; i++) {
if (colour_format) {
assert(colour_format == fb->cbufs[i]->format);
@@ -35,26 +35,47 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30)
zeta = (struct nv04_surface *)fb->zsbuf;
}
- if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
- assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
- for (i = 1; i < fb->nr_cbufs; i++)
- assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR));
+ if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0|NV34TCL_RT_ENABLE_COLOR1)) {
+ /* Render to at least a colour buffer */
+ if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
+ for (i = 1; i < fb->nr_cbufs; i++)
+ assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR));
- /* FIXME: NV34TCL_RT_FORMAT_LOG2_[WIDTH/HEIGHT] */
- rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
- log2i(fb->width) << 16 /*NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT*/ |
- log2i(fb->height) << 24 /*NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT*/;
+ rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
+ (log2i(rt[0]->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
+ (log2i(rt[0]->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
+ }
+ else
+ rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
+ } else if (fb->zsbuf) {
+ depth_only = 1;
+
+ /* Render to depth buffer only */
+ if (!(zeta->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+ assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
+
+ rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
+ (log2i(zeta->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
+ (log2i(zeta->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
+ }
+ else
+ rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
+ } else {
+ return FALSE;
}
- else
- rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
switch (colour_format) {
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8;
+ break;
case PIPE_FORMAT_A8R8G8B8_UNORM:
case 0:
rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
break;
case PIPE_FORMAT_R5G6B5_UNORM:
rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5;
+ colour_bits = 16;
break;
default:
assert(0);
@@ -63,6 +84,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30)
switch (zeta_format) {
case PIPE_FORMAT_Z16_UNORM:
rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16;
+ zeta_bits = 16;
break;
case PIPE_FORMAT_Z24S8_UNORM:
case PIPE_FORMAT_Z24X8_UNORM:
@@ -73,21 +95,27 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30)
assert(0);
}
- if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) {
- uint32_t pitch = rt[0]->pitch;
+ if (colour_bits > zeta_bits) {
+ return FALSE;
+ }
+
+ if (depth_only || (rt_enable & NV34TCL_RT_ENABLE_COLOR0)) {
+ struct nv04_surface *rt0 = (depth_only ? zeta : rt[0]);
+ uint32_t pitch = rt0->pitch;
+
if (zeta) {
pitch |= (zeta->pitch << 16);
} else {
pitch |= (pitch << 16);
}
- nv30mt = (struct nv30_miptree *)rt[0]->base.texture;
+ nv30mt = (struct nv30_miptree *) rt0->base.texture;
so_method(so, rankine, NV34TCL_DMA_COLOR0, 1);
so_reloc (so, nouveau_bo(nv30mt->buffer), 0, rt_flags | NOUVEAU_BO_OR,
chan->vram->handle, chan->gart->handle);
so_method(so, rankine, NV34TCL_COLOR0_PITCH, 2);
so_data (so, pitch);
- so_reloc (so, nouveau_bo(nv30mt->buffer), rt[0]->base.offset,
+ so_reloc (so, nouveau_bo(nv30mt->buffer), rt0->base.offset,
rt_flags | NOUVEAU_BO_LOW, 0, 0);
}
diff --git a/src/gallium/drivers/nv30/nv30_state_scissor.c b/src/gallium/drivers/nv30/nv30_state_scissor.c
index 3ac7a8471ea..ba61a9e24a4 100644
--- a/src/gallium/drivers/nv30/nv30_state_scissor.c
+++ b/src/gallium/drivers/nv30/nv30_state_scissor.c
@@ -12,7 +12,7 @@ nv30_state_scissor_validate(struct nv30_context *nv30)
return FALSE;
nv30->state.scissor_enabled = rast->scissor;
- so = so_new(3, 0);
+ so = so_new(1, 2, 0);
so_method(so, nv30->screen->rankine, NV34TCL_SCISSOR_HORIZ, 2);
if (nv30->state.scissor_enabled) {
so_data (so, ((s->maxx - s->minx) << 16) | s->minx);
diff --git a/src/gallium/drivers/nv30/nv30_state_stipple.c b/src/gallium/drivers/nv30/nv30_state_stipple.c
index d0c791ac082..ed520a4f439 100644
--- a/src/gallium/drivers/nv30/nv30_state_stipple.c
+++ b/src/gallium/drivers/nv30/nv30_state_stipple.c
@@ -14,14 +14,14 @@ nv30_state_stipple_validate(struct nv30_context *nv30)
if (rast->poly_stipple_enable) {
unsigned i;
- so = so_new(35, 0);
+ so = so_new(2, 33, 0);
so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
so_data (so, 1);
so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32);
for (i = 0; i < 32; i++)
so_data(so, nv30->stipple[i]);
} else {
- so = so_new(2, 0);
+ so = so_new(1, 1, 0);
so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
so_data (so, 0);
}
diff --git a/src/gallium/drivers/nv30/nv30_state_viewport.c b/src/gallium/drivers/nv30/nv30_state_viewport.c
index c3eb413dac6..2d7781292bd 100644
--- a/src/gallium/drivers/nv30/nv30_state_viewport.c
+++ b/src/gallium/drivers/nv30/nv30_state_viewport.c
@@ -19,7 +19,7 @@ nv30_state_viewport_validate(struct nv30_context *nv30)
return FALSE;
nv30->state.viewport_bypass = bypass;
- so = so_new(11, 0);
+ so = so_new(3, 10, 0);
if (!bypass) {
so_method(so, nv30->screen->rankine,
NV34TCL_VIEWPORT_TRANSLATE_X, 8);
diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c
index 98011decf7c..65598991c68 100644
--- a/src/gallium/drivers/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nv30/nv30_transfer.c
@@ -1,7 +1,9 @@
#include <pipe/p_state.h>
#include <pipe/p_defines.h>
#include <pipe/p_inlines.h>
+#include <util/u_format.h>
#include <util/u_memory.h>
+#include <util/u_math.h>
#include <nouveau/nouveau_winsys.h>
#include "nv30_context.h"
#include "nv30_screen.h"
@@ -10,22 +12,19 @@
struct nv30_transfer {
struct pipe_transfer base;
struct pipe_surface *surface;
- bool direct;
+ boolean direct;
};
static void
-nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
struct pipe_texture *template)
{
memset(template, 0, sizeof(struct pipe_texture));
template->target = pt->target;
template->format = pt->format;
- template->width[0] = pt->width[level];
- template->height[0] = pt->height[level];
- template->depth[0] = 1;
- template->block = pt->block;
- template->nblocksx[0] = pt->nblocksx[level];
- template->nblocksy[0] = pt->nblocksx[level];
+ template->width0 = width;
+ template->height0 = height;
+ template->depth0 = 1;
template->last_level = 0;
template->nr_samples = pt->nr_samples;
@@ -48,14 +47,10 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
pipe_texture_reference(&tx->base.texture, pt);
- tx->base.format = pt->format;
tx->base.x = x;
tx->base.y = y;
tx->base.width = w;
tx->base.height = h;
- tx->base.block = pt->block;
- tx->base.nblocksx = pt->nblocksx[level];
- tx->base.nblocksy = pt->nblocksy[level];
tx->base.stride = mt->level[level].pitch;
tx->base.usage = usage;
tx->base.face = face;
@@ -76,7 +71,7 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
tx->direct = false;
- nv30_compatible_transfer_tex(pt, level, &tx_tex_template);
+ nv30_compatible_transfer_tex(pt, w, h, &tx_tex_template);
tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
if (!tx_tex)
@@ -85,6 +80,8 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
}
+ tx->base.stride = ((struct nv30_miptree*)tx_tex)->level[0].pitch;
+
tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
0, 0, 0,
pipe_transfer_buffer_flags(&tx->base));
@@ -110,8 +107,8 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
/* TODO: Check if SIFM can un-swizzle */
nvscreen->eng2d->copy(nvscreen->eng2d,
tx->surface, 0, 0,
- src, 0, 0,
- src->width, src->height);
+ src, x, y,
+ w, h);
pipe_surface_reference(&src, NULL);
}
@@ -131,13 +128,13 @@ nv30_transfer_del(struct pipe_transfer *ptx)
dst = pscreen->get_tex_surface(pscreen, ptx->texture,
ptx->face, ptx->level, ptx->zslice,
- PIPE_BUFFER_USAGE_GPU_WRITE);
+ PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER);
/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
nvscreen->eng2d->copy(nvscreen->eng2d,
- dst, 0, 0,
+ dst, tx->base.x, tx->base.y,
tx->surface, 0, 0,
- dst->width, dst->height);
+ tx->base.width, tx->base.height);
pipe_surface_reference(&dst, NULL);
}
@@ -156,8 +153,10 @@ nv30_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
void *map = pipe_buffer_map(pscreen, mt->buffer,
pipe_transfer_buffer_flags(ptx));
- return map + ns->base.offset +
- ptx->y * ns->pitch + ptx->x * ptx->block.size;
+ if(!tx->direct)
+ return map + ns->base.offset;
+ else
+ return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
}
static void
diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c
index 189656ec817..1c5db03ea24 100644
--- a/src/gallium/drivers/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nv30/nv30_vbo.c
@@ -163,19 +163,21 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so,
return TRUE;
}
-boolean
+void
nv30_draw_arrays(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count)
{
struct nv30_context *nv30 = nv30_context(pipe);
- struct nouveau_channel *chan = nv30->screen->base.channel;
+ struct nv30_screen *screen = nv30->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *rankine = screen->rankine;
unsigned restart = 0;
nv30_vbo_set_idxbuf(nv30, NULL, 0);
if (FORCE_SWTNL || !nv30_state_validate(nv30)) {
/*return nv30_draw_elements_swtnl(pipe, NULL, 0,
mode, start, count);*/
- return FALSE;
+ return;
}
while (count) {
@@ -186,17 +188,17 @@ nv30_draw_arrays(struct pipe_context *pipe,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
mode, start, count, &restart);
if (!vc) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
nr = (vc & 0xff);
if (nr) {
- BEGIN_RING(rankine, NV34TCL_VB_VERTEX_BATCH, 1);
- OUT_RING (((nr - 1) << 24) | start);
+ BEGIN_RING(chan, rankine, NV34TCL_VB_VERTEX_BATCH, 1);
+ OUT_RING (chan, ((nr - 1) << 24) | start);
start += nr;
}
@@ -206,15 +208,15 @@ nv30_draw_arrays(struct pipe_context *pipe,
nr -= push;
- BEGIN_RING_NI(rankine, NV34TCL_VB_VERTEX_BATCH, push);
+ BEGIN_RING_NI(chan, rankine, NV34TCL_VB_VERTEX_BATCH, push);
while (push--) {
- OUT_RING(((0x100 - 1) << 24) | start);
+ OUT_RING(chan, ((0x100 - 1) << 24) | start);
start += 0x100;
}
}
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, 0);
count -= vc;
start = restart;
@@ -228,7 +230,9 @@ static INLINE void
nv30_draw_elements_u08(struct nv30_context *nv30, void *ib,
unsigned mode, unsigned start, unsigned count)
{
- struct nouveau_channel *chan = nv30->screen->base.channel;
+ struct nv30_screen *screen = nv30->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *rankine = screen->rankine;
while (count) {
uint8_t *elts = (uint8_t *)ib + start;
@@ -239,17 +243,17 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
mode, start, count, &restart);
if (vc == 0) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
count -= vc;
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
if (vc & 1) {
- BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1);
- OUT_RING (elts[0]);
+ BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (chan, elts[0]);
elts++; vc--;
}
@@ -258,16 +262,16 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib,
push = MIN2(vc, 2047 * 2);
- BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
+ BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
- OUT_RING((elts[i+1] << 16) | elts[i]);
+ OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
vc -= push;
elts += push;
}
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, 0);
start = restart;
}
@@ -277,7 +281,9 @@ static INLINE void
nv30_draw_elements_u16(struct nv30_context *nv30, void *ib,
unsigned mode, unsigned start, unsigned count)
{
- struct nouveau_channel *chan = nv30->screen->base.channel;
+ struct nv30_screen *screen = nv30->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *rankine = screen->rankine;
while (count) {
uint16_t *elts = (uint16_t *)ib + start;
@@ -288,17 +294,17 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
mode, start, count, &restart);
if (vc == 0) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
count -= vc;
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
if (vc & 1) {
- BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1);
- OUT_RING (elts[0]);
+ BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (chan, elts[0]);
elts++; vc--;
}
@@ -307,16 +313,16 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib,
push = MIN2(vc, 2047 * 2);
- BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
+ BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
- OUT_RING((elts[i+1] << 16) | elts[i]);
+ OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
vc -= push;
elts += push;
}
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, 0);
start = restart;
}
@@ -326,7 +332,9 @@ static INLINE void
nv30_draw_elements_u32(struct nv30_context *nv30, void *ib,
unsigned mode, unsigned start, unsigned count)
{
- struct nouveau_channel *chan = nv30->screen->base.channel;
+ struct nv30_screen *screen = nv30->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *rankine = screen->rankine;
while (count) {
uint32_t *elts = (uint32_t *)ib + start;
@@ -337,32 +345,32 @@ nv30_draw_elements_u32(struct nv30_context *nv30, void *ib,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1,
mode, start, count, &restart);
if (vc == 0) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
count -= vc;
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
while (vc) {
push = MIN2(vc, 2047);
- BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U32, push);
- OUT_RINGp (elts, push);
+ BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U32, push);
+ OUT_RINGp (chan, elts, push);
vc -= push;
elts += push;
}
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, 0);
start = restart;
}
}
-static boolean
+static void
nv30_draw_elements_inline(struct pipe_context *pipe,
struct pipe_buffer *ib, unsigned ib_size,
unsigned mode, unsigned start, unsigned count)
@@ -393,15 +401,16 @@ nv30_draw_elements_inline(struct pipe_context *pipe,
}
pipe_buffer_unmap(pscreen, ib);
- return TRUE;
}
-static boolean
+static void
nv30_draw_elements_vbo(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count)
{
struct nv30_context *nv30 = nv30_context(pipe);
- struct nouveau_channel *chan = nv30->screen->base.channel;
+ struct nv30_screen *screen = nv30->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *rankine = screen->rankine;
unsigned restart = 0;
while (count) {
@@ -412,17 +421,17 @@ nv30_draw_elements_vbo(struct pipe_context *pipe,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
mode, start, count, &restart);
if (!vc) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
nr = (vc & 0xff);
if (nr) {
- BEGIN_RING(rankine, NV34TCL_VB_INDEX_BATCH, 1);
- OUT_RING (((nr - 1) << 24) | start);
+ BEGIN_RING(chan, rankine, NV34TCL_VB_INDEX_BATCH, 1);
+ OUT_RING (chan, ((nr - 1) << 24) | start);
start += nr;
}
@@ -432,24 +441,22 @@ nv30_draw_elements_vbo(struct pipe_context *pipe,
nr -= push;
- BEGIN_RING_NI(rankine, NV34TCL_VB_INDEX_BATCH, push);
+ BEGIN_RING_NI(chan, rankine, NV34TCL_VB_INDEX_BATCH, push);
while (push--) {
- OUT_RING(((0x100 - 1) << 24) | start);
+ OUT_RING(chan, ((0x100 - 1) << 24) | start);
start += 0x100;
}
}
- BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+ OUT_RING (chan, 0);
count -= vc;
start = restart;
}
-
- return TRUE;
}
-boolean
+void
nv30_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer, unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
@@ -461,7 +468,7 @@ nv30_draw_elements(struct pipe_context *pipe,
if (FORCE_SWTNL || !nv30_state_validate(nv30)) {
/*return nv30_draw_elements_swtnl(pipe, NULL, 0,
mode, start, count);*/
- return FALSE;
+ return;
}
if (idxbuf) {
@@ -472,7 +479,6 @@ nv30_draw_elements(struct pipe_context *pipe,
}
pipe->flush(pipe, 0, NULL);
- return TRUE;
}
static boolean
@@ -485,14 +491,9 @@ nv30_vbo_validate(struct nv30_context *nv30)
unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
int hw;
- if (nv30->edgeflags) {
- /*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/
- return FALSE;
- }
-
- vtxbuf = so_new(20, 18);
+ vtxbuf = so_new(3, 17, 18);
so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr);
- vtxfmt = so_new(17, 0);
+ vtxfmt = so_new(1, 16, 0);
so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr);
for (hw = 0; hw < nv30->vtxelt_nr; hw++) {
@@ -505,7 +506,7 @@ nv30_vbo_validate(struct nv30_context *nv30)
if (!vb->stride) {
if (!sattr)
- sattr = so_new(16 * 5, 0);
+ sattr = so_new(16, 16 * 4, 0);
if (nv30_vbo_static_attrib(nv30, sattr, hw, ve, vb)) {
so_data(vtxbuf, 0);
diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c
index 14a5c0260d0..e77a5be3f23 100644
--- a/src/gallium/drivers/nv30/nv30_vertprog.c
+++ b/src/gallium/drivers/nv30/nv30_vertprog.c
@@ -253,32 +253,32 @@ static INLINE struct nv30_sreg
tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
struct nv30_sreg src;
- switch (fsrc->SrcRegister.File) {
+ switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
- src = nv30_sr(NV30SR_INPUT, fsrc->SrcRegister.Index);
+ src = nv30_sr(NV30SR_INPUT, fsrc->Register.Index);
break;
case TGSI_FILE_CONSTANT:
- src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
+ src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0);
break;
case TGSI_FILE_IMMEDIATE:
- src = vpc->imm[fsrc->SrcRegister.Index];
+ src = vpc->imm[fsrc->Register.Index];
break;
case TGSI_FILE_TEMPORARY:
- if (vpc->high_temp < fsrc->SrcRegister.Index)
- vpc->high_temp = fsrc->SrcRegister.Index;
- src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index);
+ if (vpc->high_temp < fsrc->Register.Index)
+ vpc->high_temp = fsrc->Register.Index;
+ src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index);
break;
default:
NOUVEAU_ERR("bad src file\n");
break;
}
- src.abs = fsrc->SrcRegisterExtMod.Absolute;
- src.negate = fsrc->SrcRegister.Negate;
- src.swz[0] = fsrc->SrcRegister.SwizzleX;
- src.swz[1] = fsrc->SrcRegister.SwizzleY;
- src.swz[2] = fsrc->SrcRegister.SwizzleZ;
- src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ src.abs = fsrc->Register.Absolute;
+ src.negate = fsrc->Register.Negate;
+ src.swz[0] = fsrc->Register.SwizzleX;
+ src.swz[1] = fsrc->Register.SwizzleY;
+ src.swz[2] = fsrc->Register.SwizzleZ;
+ src.swz[3] = fsrc->Register.SwizzleW;
return src;
}
@@ -286,14 +286,14 @@ static INLINE struct nv30_sreg
tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
struct nv30_sreg dst;
- switch (fdst->DstRegister.File) {
+ switch (fdst->Register.File) {
case TGSI_FILE_OUTPUT:
dst = nv30_sr(NV30SR_OUTPUT,
- vpc->output_map[fdst->DstRegister.Index]);
+ vpc->output_map[fdst->Register.Index]);
break;
case TGSI_FILE_TEMPORARY:
- dst = nv30_sr(NV30SR_TEMP, fdst->DstRegister.Index);
+ dst = nv30_sr(NV30SR_TEMP, fdst->Register.Index);
if (vpc->high_temp < dst.index)
vpc->high_temp = dst.index;
break;
@@ -334,8 +334,8 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fsrc;
- fsrc = &finst->FullSrcRegisters[i];
- if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ fsrc = &finst->Src[i];
+ if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
src[i] = tgsi_src(vpc, fsrc);
}
}
@@ -343,11 +343,11 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fsrc;
- fsrc = &finst->FullSrcRegisters[i];
- switch (fsrc->SrcRegister.File) {
+ fsrc = &finst->Src[i];
+ switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
- if (ai == -1 || ai == fsrc->SrcRegister.Index) {
- ai = fsrc->SrcRegister.Index;
+ if (ai == -1 || ai == fsrc->Register.Index) {
+ ai = fsrc->Register.Index;
src[i] = tgsi_src(vpc, fsrc);
} else {
src[i] = temp(vpc);
@@ -360,8 +360,8 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
*/
case TGSI_FILE_CONSTANT:
case TGSI_FILE_IMMEDIATE:
- if (ci == -1 || ci == fsrc->SrcRegister.Index) {
- ci = fsrc->SrcRegister.Index;
+ if (ci == -1 || ci == fsrc->Register.Index) {
+ ci = fsrc->Register.Index;
src[i] = tgsi_src(vpc, fsrc);
} else {
src[i] = temp(vpc);
@@ -378,8 +378,8 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
}
}
- dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
- mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+ dst = tgsi_dst(vpc, &finst->Dst[0]);
+ mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
switch (finst->Instruction.Opcode) {
case TGSI_OPCODE_ABS:
@@ -490,15 +490,15 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc,
{
int hw;
- switch (fdec->Semantic.SemanticName) {
+ switch (fdec->Semantic.Name) {
case TGSI_SEMANTIC_POSITION:
hw = NV30_VP_INST_DEST_POS;
break;
case TGSI_SEMANTIC_COLOR:
- if (fdec->Semantic.SemanticIndex == 0) {
+ if (fdec->Semantic.Index == 0) {
hw = NV30_VP_INST_DEST_COL0;
} else
- if (fdec->Semantic.SemanticIndex == 1) {
+ if (fdec->Semantic.Index == 1) {
hw = NV30_VP_INST_DEST_COL1;
} else {
NOUVEAU_ERR("bad colour semantic index\n");
@@ -506,10 +506,10 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc,
}
break;
case TGSI_SEMANTIC_BCOLOR:
- if (fdec->Semantic.SemanticIndex == 0) {
+ if (fdec->Semantic.Index == 0) {
hw = NV30_VP_INST_DEST_BFC0;
} else
- if (fdec->Semantic.SemanticIndex == 1) {
+ if (fdec->Semantic.Index == 1) {
hw = NV30_VP_INST_DEST_BFC1;
} else {
NOUVEAU_ERR("bad bcolour semantic index\n");
@@ -523,19 +523,22 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc,
hw = NV30_VP_INST_DEST_PSZ;
break;
case TGSI_SEMANTIC_GENERIC:
- if (fdec->Semantic.SemanticIndex <= 7) {
- hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
+ if (fdec->Semantic.Index <= 7) {
+ hw = NV30_VP_INST_DEST_TC(fdec->Semantic.Index);
} else {
NOUVEAU_ERR("bad generic semantic index\n");
return FALSE;
}
break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ NOUVEAU_ERR("cannot handle edgeflag output\n");
+ return FALSE;
default:
NOUVEAU_ERR("bad output semantic\n");
return FALSE;
}
- vpc->output_map[fdec->DeclarationRange.First] = hw;
+ vpc->output_map[fdec->Range.First] = hw;
return TRUE;
}
@@ -647,7 +650,9 @@ static boolean
nv30_vertprog_validate(struct nv30_context *nv30)
{
struct pipe_screen *pscreen = nv30->pipe.screen;
- struct nouveau_grobj *rankine = nv30->screen->rankine;
+ struct nv30_screen *screen = nv30->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *rankine = screen->rankine;
struct nv30_vertex_program *vp;
struct pipe_buffer *constbuf;
boolean upload_code = FALSE, upload_data = FALSE;
@@ -681,7 +686,7 @@ nv30_vertprog_validate(struct nv30_context *nv30)
assert(0);
}
- so = so_new(2, 0);
+ so = so_new(1, 1, 0);
so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1);
so_data (so, vp->exec->start);
so_ref(so, &vp->so);
@@ -767,9 +772,9 @@ nv30_vertprog_validate(struct nv30_context *nv30)
4 * sizeof(float));
}
- BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5);
- OUT_RING (i + vp->data->start);
- OUT_RINGp ((uint32_t *)vpd->value, 4);
+ BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5);
+ OUT_RING (chan, i + vp->data->start);
+ OUT_RINGp (chan, (uint32_t *)vpd->value, 4);
}
if (constbuf)
@@ -785,11 +790,11 @@ nv30_vertprog_validate(struct nv30_context *nv30)
vp->insns[i].data[2], vp->insns[i].data[3]);
}
#endif
- BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1);
- OUT_RING (vp->exec->start);
+ BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1);
+ OUT_RING (chan, vp->exec->start);
for (i = 0; i < vp->nr_insns; i++) {
- BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4);
- OUT_RINGp (vp->insns[i].data, 4);
+ BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_INST(0), 4);
+ OUT_RINGp (chan, vp->insns[i].data, 4);
}
}
diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c
index 8eba6a43ef9..f79ae4db84e 100644
--- a/src/gallium/drivers/nv40/nv40_context.c
+++ b/src/gallium/drivers/nv40/nv40_context.c
@@ -10,50 +10,38 @@ nv40_flush(struct pipe_context *pipe, unsigned flags,
struct pipe_fence_handle **fence)
{
struct nv40_context *nv40 = nv40_context(pipe);
-
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
+
if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
- BEGIN_RING(curie, 0x1fd8, 1);
- OUT_RING (2);
- BEGIN_RING(curie, 0x1fd8, 1);
- OUT_RING (1);
+ BEGIN_RING(chan, curie, 0x1fd8, 1);
+ OUT_RING (chan, 2);
+ BEGIN_RING(chan, curie, 0x1fd8, 1);
+ OUT_RING (chan, 1);
}
- FIRE_RING(fence);
+ FIRE_RING(chan);
+ if (fence)
+ *fence = NULL;
}
static void
nv40_destroy(struct pipe_context *pipe)
{
struct nv40_context *nv40 = nv40_context(pipe);
+ unsigned i;
+
+ for (i = 0; i < NV40_STATE_MAX; i++) {
+ if (nv40->state.hw[i])
+ so_ref(NULL, &nv40->state.hw[i]);
+ }
if (nv40->draw)
draw_destroy(nv40->draw);
FREE(nv40);
}
-static unsigned int
-nv40_is_texture_referenced( struct pipe_context *pipe,
- struct pipe_texture *texture,
- unsigned face, unsigned level)
-{
- /**
- * FIXME: Optimize.
- */
-
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
-static unsigned int
-nv40_is_buffer_referenced( struct pipe_context *pipe,
- struct pipe_buffer *buf)
-{
- /**
- * FIXME: Optimize.
- */
-
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
struct pipe_context *
nv40_create(struct pipe_screen *pscreen, unsigned pctx_id)
{
@@ -78,8 +66,11 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id)
nv40->pipe.clear = nv40_clear;
nv40->pipe.flush = nv40_flush;
- nv40->pipe.is_texture_referenced = nv40_is_texture_referenced;
- nv40->pipe.is_buffer_referenced = nv40_is_buffer_referenced;
+ nv40->pipe.is_texture_referenced = nouveau_is_texture_referenced;
+ nv40->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
+
+ screen->base.channel->user_private = nv40;
+ screen->base.channel->flush_notify = nv40_state_flush_notify;
nv40_init_query_functions(nv40);
nv40_init_surface_functions(nv40);
@@ -95,4 +86,3 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id)
return &nv40->pipe;
}
-
diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h
index 97bc83292d4..e219bb537ac 100644
--- a/src/gallium/drivers/nv40/nv40_context.h
+++ b/src/gallium/drivers/nv40/nv40_context.h
@@ -13,10 +13,7 @@
#include "nouveau/nouveau_winsys.h"
#include "nouveau/nouveau_gldefs.h"
-
-#define NOUVEAU_PUSH_CONTEXT(ctx) \
- struct nv40_screen *ctx = nv40->screen
-#include "nouveau/nouveau_push.h"
+#include "nouveau/nouveau_context.h"
#include "nouveau/nouveau_stateobj.h"
#include "nv40_state.h"
@@ -158,7 +155,6 @@ struct nv40_context {
unsigned vtxbuf_nr;
struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
unsigned vtxelt_nr;
- const unsigned *edgeflags;
};
static INLINE struct nv40_context *
@@ -183,7 +179,7 @@ extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen);
/* nv40_draw.c */
extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40);
-extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe,
+extern void nv40_draw_elements_swtnl(struct pipe_context *pipe,
struct pipe_buffer *idxbuf,
unsigned ib_size, unsigned mode,
unsigned start, unsigned count);
@@ -203,6 +199,7 @@ extern void nv40_fragtex_bind(struct nv40_context *);
extern boolean nv40_state_validate(struct nv40_context *nv40);
extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40);
extern void nv40_state_emit(struct nv40_context *nv40);
+extern void nv40_state_flush_notify(struct nouveau_channel *chan);
extern struct nv40_state_entry nv40_state_rasterizer;
extern struct nv40_state_entry nv40_state_scissor;
extern struct nv40_state_entry nv40_state_stipple;
@@ -218,9 +215,9 @@ extern struct nv40_state_entry nv40_state_vbo;
extern struct nv40_state_entry nv40_state_vtxfmt;
/* nv40_vbo.c */
-extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv40_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
-extern boolean nv40_draw_elements(struct pipe_context *pipe,
+extern void nv40_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start,
diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c
index b2f19ecb699..d826f8c2f5f 100644
--- a/src/gallium/drivers/nv40/nv40_draw.c
+++ b/src/gallium/drivers/nv40/nv40_draw.c
@@ -31,6 +31,9 @@ nv40_render_stage(struct draw_stage *stage)
static INLINE void
nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v)
{
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
unsigned i;
for (i = 0; i < nv40->swtnl.nr_attribs; i++) {
@@ -41,30 +44,30 @@ nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v)
case EMIT_OMIT:
break;
case EMIT_1F:
- BEGIN_RING(curie, NV40TCL_VTX_ATTR_1F(hw), 1);
- OUT_RING (fui(v->data[idx][0]));
+ BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_1F(hw), 1);
+ OUT_RING (chan, fui(v->data[idx][0]));
break;
case EMIT_2F:
- BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2);
- OUT_RING (fui(v->data[idx][0]));
- OUT_RING (fui(v->data[idx][1]));
+ BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2F_X(hw), 2);
+ OUT_RING (chan, fui(v->data[idx][0]));
+ OUT_RING (chan, fui(v->data[idx][1]));
break;
case EMIT_3F:
- BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3);
- OUT_RING (fui(v->data[idx][0]));
- OUT_RING (fui(v->data[idx][1]));
- OUT_RING (fui(v->data[idx][2]));
+ BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_3F_X(hw), 3);
+ OUT_RING (chan, fui(v->data[idx][0]));
+ OUT_RING (chan, fui(v->data[idx][1]));
+ OUT_RING (chan, fui(v->data[idx][2]));
break;
case EMIT_4F:
- BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4);
- OUT_RING (fui(v->data[idx][0]));
- OUT_RING (fui(v->data[idx][1]));
- OUT_RING (fui(v->data[idx][2]));
- OUT_RING (fui(v->data[idx][3]));
+ BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4F_X(hw), 4);
+ OUT_RING (chan, fui(v->data[idx][0]));
+ OUT_RING (chan, fui(v->data[idx][1]));
+ OUT_RING (chan, fui(v->data[idx][2]));
+ OUT_RING (chan, fui(v->data[idx][3]));
break;
case EMIT_4UB:
- BEGIN_RING(curie, NV40TCL_VTX_ATTR_4UB(hw), 1);
- OUT_RING (pack_ub4(float_to_ubyte(v->data[idx][0]),
+ BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4UB(hw), 1);
+ OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]),
float_to_ubyte(v->data[idx][1]),
float_to_ubyte(v->data[idx][2]),
float_to_ubyte(v->data[idx][3])));
@@ -82,7 +85,11 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim,
{
struct nv40_render_stage *rs = nv40_render_stage(stage);
struct nv40_context *nv40 = rs->nv40;
- struct nouveau_pushbuf *pb = nv40->screen->base.channel->pushbuf;
+
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_pushbuf *pb = chan->pushbuf;
+ struct nouveau_grobj *curie = screen->curie;
unsigned i;
/* Ensure there's room for 4xfloat32 + potentially 3 begin/end */
@@ -91,19 +98,19 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim,
NOUVEAU_ERR("AIII, missed flush\n");
assert(0);
}
- FIRE_RING(NULL);
+ FIRE_RING(chan);
nv40_state_emit(nv40);
}
/* Switch primitive modes if necessary */
if (rs->prim != mode) {
if (rs->prim != NV40TCL_BEGIN_END_STOP) {
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (NV40TCL_BEGIN_END_STOP);
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, NV40TCL_BEGIN_END_STOP);
}
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (mode);
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, mode);
rs->prim = mode;
}
@@ -115,8 +122,8 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim,
* off the primitive now.
*/
if (pb->remaining < ((count * 20) + 6)) {
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (NV40TCL_BEGIN_END_STOP);
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, NV40TCL_BEGIN_END_STOP);
rs->prim = NV40TCL_BEGIN_END_STOP;
}
}
@@ -144,10 +151,13 @@ nv40_render_flush(struct draw_stage *draw, unsigned flags)
{
struct nv40_render_stage *rs = nv40_render_stage(draw);
struct nv40_context *nv40 = rs->nv40;
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
if (rs->prim != NV40TCL_BEGIN_END_STOP) {
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (NV40TCL_BEGIN_END_STOP);
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, NV40TCL_BEGIN_END_STOP);
rs->prim = NV40TCL_BEGIN_END_STOP;
}
}
@@ -226,7 +236,7 @@ nv40_draw_render_stage(struct nv40_context *nv40)
return &render->stage;
}
-boolean
+void
nv40_draw_elements_swtnl(struct pipe_context *pipe,
struct pipe_buffer *idxbuf, unsigned idxbuf_size,
unsigned mode, unsigned start, unsigned count)
@@ -237,7 +247,7 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe,
void *map;
if (!nv40_state_validate_swtnl(nv40))
- return FALSE;
+ return;
nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF);
nv40_state_emit(nv40);
@@ -261,7 +271,8 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe,
map = pipe_buffer_map(pscreen,
nv40->constbuf[PIPE_SHADER_VERTEX],
PIPE_BUFFER_USAGE_CPU_READ);
- draw_set_mapped_constant_buffer(nv40->draw, map, nr);
+ draw_set_mapped_constant_buffer(nv40->draw, PIPE_SHADER_VERTEX,
+ map, nr);
}
draw_arrays(nv40->draw, mode, start, count);
@@ -277,15 +288,13 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe,
draw_flush(nv40->draw);
pipe->flush(pipe, 0, NULL);
-
- return TRUE;
}
static INLINE void
emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit,
unsigned semantic, unsigned index)
{
- unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index);
+ unsigned draw_out = draw_find_shader_output(nv40->draw, semantic, index);
unsigned a = nv40->swtnl.nr_attribs++;
nv40->swtnl.hw[a] = hw;
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index 32d9ed1a7f8..1237066c398 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -149,7 +149,7 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src)
sizeof(uint32_t) * 4);
}
- sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT);
+ sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT);
break;
case NV40SR_NONE:
sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
@@ -255,50 +255,50 @@ tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc)
{
struct nv40_sreg src;
- switch (fsrc->SrcRegister.File) {
+ switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
src = nv40_sr(NV40SR_INPUT,
- fpc->attrib_map[fsrc->SrcRegister.Index]);
+ fpc->attrib_map[fsrc->Register.Index]);
break;
case TGSI_FILE_CONSTANT:
- src = constant(fpc, fsrc->SrcRegister.Index, NULL);
+ src = constant(fpc, fsrc->Register.Index, NULL);
break;
case TGSI_FILE_IMMEDIATE:
- assert(fsrc->SrcRegister.Index < fpc->nr_imm);
- src = fpc->imm[fsrc->SrcRegister.Index];
+ assert(fsrc->Register.Index < fpc->nr_imm);
+ src = fpc->imm[fsrc->Register.Index];
break;
case TGSI_FILE_TEMPORARY:
- src = fpc->r_temp[fsrc->SrcRegister.Index];
+ src = fpc->r_temp[fsrc->Register.Index];
break;
/* NV40 fragprog result regs are just temps, so this is simple */
case TGSI_FILE_OUTPUT:
- src = fpc->r_result[fsrc->SrcRegister.Index];
+ src = fpc->r_result[fsrc->Register.Index];
break;
default:
NOUVEAU_ERR("bad src file\n");
break;
}
- src.abs = fsrc->SrcRegisterExtMod.Absolute;
- src.negate = fsrc->SrcRegister.Negate;
- src.swz[0] = fsrc->SrcRegister.SwizzleX;
- src.swz[1] = fsrc->SrcRegister.SwizzleY;
- src.swz[2] = fsrc->SrcRegister.SwizzleZ;
- src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ src.abs = fsrc->Register.Absolute;
+ src.negate = fsrc->Register.Negate;
+ src.swz[0] = fsrc->Register.SwizzleX;
+ src.swz[1] = fsrc->Register.SwizzleY;
+ src.swz[2] = fsrc->Register.SwizzleZ;
+ src.swz[3] = fsrc->Register.SwizzleW;
return src;
}
static INLINE struct nv40_sreg
tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
- switch (fdst->DstRegister.File) {
+ switch (fdst->Register.File) {
case TGSI_FILE_OUTPUT:
- return fpc->r_result[fdst->DstRegister.Index];
+ return fpc->r_result[fdst->Register.Index];
case TGSI_FILE_TEMPORARY:
- return fpc->r_temp[fdst->DstRegister.Index];
+ return fpc->r_temp[fdst->Register.Index];
case TGSI_FILE_NULL:
return nv40_sr(NV40SR_NONE, 0);
default:
- NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File);
+ NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
return nv40_sr(NV40SR_NONE, 0);
}
}
@@ -321,38 +321,23 @@ src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc,
{
const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
struct nv40_sreg tgsi = tgsi_src(fpc, fsrc);
- uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
- uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
- fsrc->SrcRegisterExtSwz.NegateY,
- fsrc->SrcRegisterExtSwz.NegateZ,
- fsrc->SrcRegisterExtSwz.NegateW };
+ uint mask = 0;
uint c;
for (c = 0; c < 4; c++) {
- switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
- case TGSI_EXTSWIZZLE_X:
- case TGSI_EXTSWIZZLE_Y:
- case TGSI_EXTSWIZZLE_Z:
- case TGSI_EXTSWIZZLE_W:
+ switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) {
+ case TGSI_SWIZZLE_X:
+ case TGSI_SWIZZLE_Y:
+ case TGSI_SWIZZLE_Z:
+ case TGSI_SWIZZLE_W:
mask |= (1 << c);
break;
- case TGSI_EXTSWIZZLE_ZERO:
- zero_mask |= (1 << c);
- tgsi.swz[c] = SWZ_X;
- break;
- case TGSI_EXTSWIZZLE_ONE:
- one_mask |= (1 << c);
- tgsi.swz[c] = SWZ_X;
- break;
default:
assert(0);
}
-
- if (!tgsi.negate && neg[c])
- neg_mask |= (1 << c);
}
- if (mask == MASK_ALL && !neg_mask)
+ if (mask == MASK_ALL)
return TRUE;
*src = temp(fpc);
@@ -360,18 +345,6 @@ src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc,
if (mask)
arith(fpc, 0, MOV, *src, mask, tgsi, none, none);
- if (zero_mask)
- arith(fpc, 0, SFL, *src, zero_mask, *src, none, none);
-
- if (one_mask)
- arith(fpc, 0, STR, *src, one_mask, *src, none, none);
-
- if (neg_mask) {
- struct nv40_sreg one = temp(fpc);
- arith(fpc, 0, STR, one, neg_mask, one, none, none);
- arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none);
- }
-
return FALSE;
}
@@ -391,8 +364,8 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fsrc;
- fsrc = &finst->FullSrcRegisters[i];
- if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ fsrc = &finst->Src[i];
+ if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
src[i] = tgsi_src(fpc, fsrc);
}
}
@@ -400,9 +373,9 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fsrc;
- fsrc = &finst->FullSrcRegisters[i];
+ fsrc = &finst->Src[i];
- switch (fsrc->SrcRegister.File) {
+ switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
case TGSI_FILE_CONSTANT:
case TGSI_FILE_TEMPORARY:
@@ -413,10 +386,10 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
break;
}
- switch (fsrc->SrcRegister.File) {
+ switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
- if (ai == -1 || ai == fsrc->SrcRegister.Index) {
- ai = fsrc->SrcRegister.Index;
+ if (ai == -1 || ai == fsrc->Register.Index) {
+ ai = fsrc->Register.Index;
src[i] = tgsi_src(fpc, fsrc);
} else {
src[i] = temp(fpc);
@@ -426,8 +399,8 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
break;
case TGSI_FILE_CONSTANT:
if ((ci == -1 && ii == -1) ||
- ci == fsrc->SrcRegister.Index) {
- ci = fsrc->SrcRegister.Index;
+ ci == fsrc->Register.Index) {
+ ci = fsrc->Register.Index;
src[i] = tgsi_src(fpc, fsrc);
} else {
src[i] = temp(fpc);
@@ -437,8 +410,8 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
break;
case TGSI_FILE_IMMEDIATE:
if ((ci == -1 && ii == -1) ||
- ii == fsrc->SrcRegister.Index) {
- ii = fsrc->SrcRegister.Index;
+ ii == fsrc->Register.Index) {
+ ii = fsrc->Register.Index;
src[i] = tgsi_src(fpc, fsrc);
} else {
src[i] = temp(fpc);
@@ -450,7 +423,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
/* handled above */
break;
case TGSI_FILE_SAMPLER:
- unit = fsrc->SrcRegister.Index;
+ unit = fsrc->Register.Index;
break;
case TGSI_FILE_OUTPUT:
break;
@@ -460,8 +433,8 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
}
}
- dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]);
- mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+ dst = tgsi_dst(fpc, &finst->Dst[0]);
+ mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
switch (finst->Instruction.Opcode) {
@@ -472,10 +445,11 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
break;
case TGSI_OPCODE_CMP:
- tmp = temp(fpc);
- arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+ tmp = nv40_sr(NV40SR_NONE, 0);
tmp.cc_update = 1;
arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
+ dst.cc_test = NV40_VP_INST_COND_GE;
+ arith(fpc, sat, MOV, dst, mask, src[2], none, none);
dst.cc_test = NV40_VP_INST_COND_LT;
arith(fpc, sat, MOV, dst, mask, src[1], none, none);
break;
@@ -568,12 +542,6 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
case TGSI_OPCODE_MUL:
arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
break;
- case TGSI_OPCODE_NOISE1:
- case TGSI_OPCODE_NOISE2:
- case TGSI_OPCODE_NOISE3:
- case TGSI_OPCODE_NOISE4:
- arith(fpc, sat, SFL, dst, mask, none, none, none);
- break;
case TGSI_OPCODE_POW:
tmp = temp(fpc);
arith(fpc, 0, LG2, tmp, MASK_X,
@@ -606,13 +574,28 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
neg(swz(tmp, X, X, X, X)), none, none);
break;
case TGSI_OPCODE_SCS:
- if (mask & MASK_X) {
- arith(fpc, sat, COS, dst, MASK_X,
- swz(src[0], X, X, X, X), none, none);
+ /* avoid overwriting the source */
+ if(src[0].swz[SWZ_X] != SWZ_X)
+ {
+ if (mask & MASK_X) {
+ arith(fpc, sat, COS, dst, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & MASK_Y) {
+ arith(fpc, sat, SIN, dst, MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
}
- if (mask & MASK_Y) {
- arith(fpc, sat, SIN, dst, MASK_Y,
- swz(src[0], X, X, X, X), none, none);
+ else
+ {
+ if (mask & MASK_Y) {
+ arith(fpc, sat, SIN, dst, MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & MASK_X) {
+ arith(fpc, sat, COS, dst, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
}
break;
case TGSI_OPCODE_SEQ:
@@ -677,15 +660,15 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc,
{
int hw;
- switch (fdec->Semantic.SemanticName) {
+ switch (fdec->Semantic.Name) {
case TGSI_SEMANTIC_POSITION:
hw = NV40_FP_OP_INPUT_SRC_POSITION;
break;
case TGSI_SEMANTIC_COLOR:
- if (fdec->Semantic.SemanticIndex == 0) {
+ if (fdec->Semantic.Index == 0) {
hw = NV40_FP_OP_INPUT_SRC_COL0;
} else
- if (fdec->Semantic.SemanticIndex == 1) {
+ if (fdec->Semantic.Index == 1) {
hw = NV40_FP_OP_INPUT_SRC_COL1;
} else {
NOUVEAU_ERR("bad colour semantic index\n");
@@ -696,9 +679,9 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc,
hw = NV40_FP_OP_INPUT_SRC_FOGC;
break;
case TGSI_SEMANTIC_GENERIC:
- if (fdec->Semantic.SemanticIndex <= 7) {
+ if (fdec->Semantic.Index <= 7) {
hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic.
- SemanticIndex);
+ Index);
} else {
NOUVEAU_ERR("bad generic semantic index\n");
return FALSE;
@@ -709,7 +692,7 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc,
return FALSE;
}
- fpc->attrib_map[fdec->DeclarationRange.First] = hw;
+ fpc->attrib_map[fdec->Range.First] = hw;
return TRUE;
}
@@ -717,15 +700,15 @@ static boolean
nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc,
const struct tgsi_full_declaration *fdec)
{
- unsigned idx = fdec->DeclarationRange.First;
+ unsigned idx = fdec->Range.First;
unsigned hw;
- switch (fdec->Semantic.SemanticName) {
+ switch (fdec->Semantic.Name) {
case TGSI_SEMANTIC_POSITION:
hw = 1;
break;
case TGSI_SEMANTIC_COLOR:
- switch (fdec->Semantic.SemanticIndex) {
+ switch (fdec->Semantic.Index) {
case 0: hw = 0; break;
case 1: hw = 2; break;
case 2: hw = 3; break;
@@ -771,9 +754,9 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc)
goto out_err;
break;
case TGSI_FILE_TEMPORARY:
- if (fdec->DeclarationRange.Last > high_temp) {
+ if (fdec->Range.Last > high_temp) {
high_temp =
- fdec->DeclarationRange.Last;
+ fdec->Range.Last;
}
break;
default:
@@ -785,7 +768,7 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc)
{
struct tgsi_full_immediate *imm;
float vals[4];
-
+
imm = &p.FullToken.FullImmediate;
assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
assert(fpc->nr_imm < MAX_IMM);
@@ -869,7 +852,7 @@ nv40_fragprog_translate(struct nv40_context *nv40,
fp->insn[fpc->inst_offset + 1] = 0x00000000;
fp->insn[fpc->inst_offset + 2] = 0x00000000;
fp->insn[fpc->inst_offset + 3] = 0x00000000;
-
+
fp->translated = TRUE;
out_err:
tgsi_parse_free(&parse);
@@ -936,7 +919,7 @@ nv40_fragprog_validate(struct nv40_context *nv40)
fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4);
nv40_fragprog_upload(nv40, fp);
- so = so_new(4, 1);
+ so = so_new(2, 2, 1);
so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1);
so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
@@ -950,7 +933,7 @@ nv40_fragprog_validate(struct nv40_context *nv40)
update_constants:
if (fp->nr_consts) {
float *map;
-
+
map = pipe_buffer_map(pscreen, constbuf,
PIPE_BUFFER_USAGE_CPU_READ);
for (i = 0; i < fp->nr_consts; i++) {
@@ -981,6 +964,12 @@ void
nv40_fragprog_destroy(struct nv40_context *nv40,
struct nv40_fragment_program *fp)
{
+ if (fp->buffer)
+ pipe_buffer_reference(&fp->buffer, NULL);
+
+ if (fp->so)
+ so_ref(NULL, &fp->so);
+
if (fp->insn_len)
FREE(fp->insn);
}
diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c
index f6cdf31dfee..aad9198210f 100644
--- a/src/gallium/drivers/nv40/nv40_fragtex.c
+++ b/src/gallium/drivers/nv40/nv40_fragtex.c
@@ -23,6 +23,7 @@ struct nv40_texture_format {
static struct nv40_texture_format
nv40_texture_formats[] = {
+ _(X8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0),
_(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
_(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
_(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0),
@@ -107,7 +108,7 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit)
txs = tf->swizzle;
- so = so_new(16, 2);
+ so = so_new(2, 9, 2);
so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8);
so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR,
@@ -116,11 +117,11 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit)
so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en);
so_data (so, txs);
so_data (so, ps->filt | tf->sign | 0x2000 /*voodoo*/);
- so_data (so, (pt->width[0] << NV40TCL_TEX_SIZE0_W_SHIFT) |
- pt->height[0]);
+ so_data (so, (pt->width0 << NV40TCL_TEX_SIZE0_W_SHIFT) |
+ pt->height0);
so_data (so, ps->bcol);
so_method(so, nv40->screen->curie, NV40TCL_TEX_SIZE1(unit), 1);
- so_data (so, (pt->depth[0] << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp);
+ so_data (so, (pt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp);
return so;
}
@@ -138,7 +139,7 @@ nv40_fragtex_validate(struct nv40_context *nv40)
unit = ffs(samplers) - 1;
samplers &= ~(1 << unit);
- so = so_new(2, 0);
+ so = so_new(1, 1, 0);
so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1);
so_data (so, 0);
so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]);
diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c
index 5a201ccf458..89bd155ff49 100644
--- a/src/gallium/drivers/nv40/nv40_miptree.c
+++ b/src/gallium/drivers/nv40/nv40_miptree.c
@@ -1,14 +1,19 @@
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
#include "nv40_context.h"
+#include "../nv04/nv04_surface_2d.h"
+
+
static void
nv40_miptree_layout(struct nv40_miptree *mt)
{
struct pipe_texture *pt = &mt->base;
- uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0];
+ uint width = pt->width0;
uint offset = 0;
int nr_faces, l, f;
uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER |
@@ -21,29 +26,21 @@ nv40_miptree_layout(struct nv40_miptree *mt)
nr_faces = 6;
} else
if (pt->target == PIPE_TEXTURE_3D) {
- nr_faces = pt->depth[0];
+ nr_faces = pt->depth0;
} else {
nr_faces = 1;
}
for (l = 0; l <= pt->last_level; l++) {
- pt->width[l] = width;
- pt->height[l] = height;
- pt->depth[l] = depth;
- pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
- pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
-
if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
- mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64);
+ mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64);
else
- mt->level[l].pitch = pt->width[l] * pt->block.size;
+ mt->level[l].pitch = util_format_get_stride(pt->format, width);
mt->level[l].image_offset =
CALLOC(nr_faces, sizeof(unsigned));
- width = MAX2(1, width >> 1);
- height = MAX2(1, height >> 1);
- depth = MAX2(1, depth >> 1);
+ width = u_minify(width, 1);
}
for (f = 0; f < nr_faces; f++) {
@@ -51,14 +48,14 @@ nv40_miptree_layout(struct nv40_miptree *mt)
mt->level[l].image_offset[f] = offset;
if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) &&
- pt->width[l + 1] > 1 && pt->height[l + 1] > 1)
- offset += align(mt->level[l].pitch * pt->height[l], 64);
+ u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1)
+ offset += align(mt->level[l].pitch * u_minify(pt->height0, l), 64);
else
- offset += mt->level[l].pitch * pt->height[l];
+ offset += mt->level[l].pitch * u_minify(pt->height0, l);
}
mt->level[l].image_offset[f] = offset;
- offset += mt->level[l].pitch * pt->height[l];
+ offset += mt->level[l].pitch * u_minify(pt->height0, l);
}
mt->total_size = offset;
@@ -79,8 +76,8 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
mt->base.screen = pscreen;
/* Swizzled textures must be POT */
- if (pt->width[0] & (pt->width[0] - 1) ||
- pt->height[0] & (pt->height[0] - 1))
+ if (pt->width0 & (pt->width0 - 1) ||
+ pt->height0 & (pt->height0 - 1))
mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
else
if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY |
@@ -109,6 +106,12 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
+ /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear.
+ * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy.
+ * This also happens for small mipmaps of large textures. */
+ if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64)
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
nv40_miptree_layout(mt);
mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, mt->total_size);
@@ -116,7 +119,7 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
FREE(mt);
return NULL;
}
-
+ mt->bo = nouveau_bo(mt->buffer);
return &mt->base;
}
@@ -128,7 +131,7 @@ nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
/* Only supports 2D, non-mipmapped textures for the moment */
if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
- pt->depth[0] != 1)
+ pt->depth0 != 1)
return NULL;
mt = CALLOC_STRUCT(nv40_miptree);
@@ -141,7 +144,11 @@ nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
mt->level[0].pitch = stride[0];
mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
+ /* Assume whoever created this buffer expects it to be linear for now */
+ mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
pipe_buffer_reference(&mt->buffer, pb);
+ mt->bo = nouveau_bo(mt->buffer);
return &mt->base;
}
@@ -173,8 +180,8 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
pipe_texture_reference(&ns->base.texture, pt);
ns->base.format = pt->format;
- ns->base.width = pt->width[level];
- ns->base.height = pt->height[level];
+ ns->base.width = u_minify(pt->width0, level);
+ ns->base.height = u_minify(pt->height0, level);
ns->base.usage = flags;
pipe_reference_init(&ns->base.reference, 1);
ns->base.face = face;
@@ -191,12 +198,27 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
ns->base.offset = mt->level[level].image_offset[0];
}
+ /* create a linear temporary that we can render into if necessary.
+ * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so
+ * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/
+ if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE)
+ return &nv04_surface_wrap_for_render(pscreen, ((struct nv40_screen*)pscreen)->eng2d, ns)->base;
+
return &ns->base;
}
static void
nv40_miptree_surface_del(struct pipe_surface *ps)
{
+ struct nv04_surface* ns = (struct nv04_surface*)ps;
+ if(ns->backing)
+ {
+ struct nv40_screen* screen = (struct nv40_screen*)ps->texture->screen;
+ if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE)
+ screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
+ nv40_miptree_surface_del(&ns->backing->base);
+ }
+
pipe_texture_reference(&ps->texture, NULL);
FREE(ps);
}
diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c
index 7874aedd428..8ed4a67dd03 100644
--- a/src/gallium/drivers/nv40/nv40_query.c
+++ b/src/gallium/drivers/nv40/nv40_query.c
@@ -41,6 +41,9 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_query *q = nv40_query(pq);
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER);
@@ -57,10 +60,10 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
assert(0);
nouveau_notifier_reset(nv40->screen->query, q->object->start);
- BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1);
- OUT_RING (1);
- BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1);
- OUT_RING (1);
+ BEGIN_RING(chan, curie, NV40TCL_QUERY_RESET, 1);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, curie, NV40TCL_QUERY_UNK17CC, 1);
+ OUT_RING (chan, 1);
q->ready = FALSE;
}
@@ -70,11 +73,14 @@ nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_query *q = nv40_query(pq);
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
- BEGIN_RING(curie, NV40TCL_QUERY_GET, 1);
- OUT_RING ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) |
+ BEGIN_RING(chan, curie, NV40TCL_QUERY_GET, 1);
+ OUT_RING (chan, (0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) |
((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT));
- FIRE_RING(NULL);
+ FIRE_RING(chan);
}
static boolean
diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c
index bd13dfddd1c..9e55e5a089c 100644
--- a/src/gallium/drivers/nv40/nv40_screen.c
+++ b/src/gallium/drivers/nv40/nv40_screen.c
@@ -140,6 +140,12 @@ static void
nv40_screen_destroy(struct pipe_screen *pscreen)
{
struct nv40_screen *screen = nv40_screen(pscreen);
+ unsigned i;
+
+ for (i = 0; i < NV40_STATE_MAX; i++) {
+ if (screen->state[i])
+ so_ref(NULL, &screen->state[i]);
+ }
nouveau_resource_free(&screen->vp_exec_heap);
nouveau_resource_free(&screen->vp_data_heap);
@@ -147,6 +153,7 @@ nv40_screen_destroy(struct pipe_screen *pscreen)
nouveau_notifier_free(&screen->query);
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->curie);
+ nv04_surface_2d_takedown(&screen->eng2d);
nouveau_screen_fini(&screen->base);
@@ -208,7 +215,6 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
return FALSE;
}
- BIND_RING(chan, screen->curie, 7);
/* 2D engine setup */
screen->eng2d = nv04_surface_2d_init(&screen->base);
@@ -245,7 +251,7 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
}
/* Static curie initialisation */
- so = so_new(128, 0);
+ so = so_new(16, 25, 0);
so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1);
so_data (so, screen->sync->handle);
so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2);
diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c
index c3ee4d23453..ed0ca9e02c3 100644
--- a/src/gallium/drivers/nv40/nv40_state.c
+++ b/src/gallium/drivers/nv40/nv40_state.c
@@ -16,7 +16,7 @@ nv40_blend_state_create(struct pipe_context *pipe,
struct nv40_context *nv40 = nv40_context(pipe);
struct nouveau_grobj *curie = nv40->screen->curie;
struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso));
- struct nouveau_stateobj *so = so_new(16, 0);
+ struct nouveau_stateobj *so = so_new(5, 8, 0);
if (cso->blend_enable) {
so_method(so, curie, NV40TCL_BLEND_ENABLE, 3);
@@ -310,7 +310,7 @@ nv40_rasterizer_state_create(struct pipe_context *pipe,
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso));
- struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_stateobj *so = so_new(8, 18, 0);
struct nouveau_grobj *curie = nv40->screen->curie;
/*XXX: ignored:
@@ -445,7 +445,7 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe,
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso));
- struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_stateobj *so = so_new(4, 21, 0);
struct nouveau_grobj *curie = nv40->screen->curie;
so_method(so, curie, NV40TCL_DEPTH_FUNC, 3);
@@ -687,16 +687,6 @@ nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count,
nv40->draw_dirty |= NV40_NEW_ARRAYS;
}
-static void
-nv40_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
-{
- struct nv40_context *nv40 = nv40_context(pipe);
-
- nv40->edgeflags = bitfield;
- nv40->dirty |= NV40_NEW_ARRAYS;
- nv40->draw_dirty |= NV40_NEW_ARRAYS;
-}
-
void
nv40_init_state_functions(struct nv40_context *nv40)
{
@@ -705,9 +695,9 @@ nv40_init_state_functions(struct nv40_context *nv40)
nv40->pipe.delete_blend_state = nv40_blend_state_delete;
nv40->pipe.create_sampler_state = nv40_sampler_state_create;
- nv40->pipe.bind_sampler_states = nv40_sampler_state_bind;
+ nv40->pipe.bind_fragment_sampler_states = nv40_sampler_state_bind;
nv40->pipe.delete_sampler_state = nv40_sampler_state_delete;
- nv40->pipe.set_sampler_textures = nv40_set_sampler_texture;
+ nv40->pipe.set_fragment_sampler_textures = nv40_set_sampler_texture;
nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create;
nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind;
@@ -736,7 +726,6 @@ nv40_init_state_functions(struct nv40_context *nv40)
nv40->pipe.set_scissor_state = nv40_set_scissor_state;
nv40->pipe.set_viewport_state = nv40_set_viewport_state;
- nv40->pipe.set_edgeflags = nv40_set_edgeflags;
nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers;
nv40->pipe.set_vertex_elements = nv40_set_vertex_elements;
}
diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h
index 8a9d8c8fdf6..192074e7471 100644
--- a/src/gallium/drivers/nv40/nv40_state.h
+++ b/src/gallium/drivers/nv40/nv40_state.h
@@ -75,6 +75,7 @@ struct nv40_fragment_program {
struct nv40_miptree {
struct pipe_texture base;
+ struct nouveau_bo *bo;
struct pipe_buffer *buffer;
uint total_size;
diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c
index 8cd05ce66ef..3ff00a37f66 100644
--- a/src/gallium/drivers/nv40/nv40_state_blend.c
+++ b/src/gallium/drivers/nv40/nv40_state_blend.c
@@ -18,7 +18,7 @@ struct nv40_state_entry nv40_state_blend = {
static boolean
nv40_state_blend_colour_validate(struct nv40_context *nv40)
{
- struct nouveau_stateobj *so = so_new(2, 0);
+ struct nouveau_stateobj *so = so_new(1, 1, 0);
struct pipe_blend_color *bcol = &nv40->blend_colour;
so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1);
diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c
index 198692965dc..13fe854915b 100644
--- a/src/gallium/drivers/nv40/nv40_state_emit.c
+++ b/src/gallium/drivers/nv40/nv40_state_emit.c
@@ -54,10 +54,11 @@ nv40_state_do_validate(struct nv40_context *nv40,
void
nv40_state_emit(struct nv40_context *nv40)
{
- struct nouveau_channel *chan = nv40->screen->base.channel;
struct nv40_state *state = &nv40->state;
struct nv40_screen *screen = nv40->screen;
- unsigned i, samplers;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
+ unsigned i;
uint64_t states;
if (nv40->pctx_id != screen->cur_pctx) {
@@ -80,13 +81,21 @@ nv40_state_emit(struct nv40_context *nv40)
if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) |
(1ULL << NV40_STATE_FRAGTEX0))) {
- BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
- OUT_RING (2);
- BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
- OUT_RING (1);
+ BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1);
+ OUT_RING (chan, 2);
+ BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1);
+ OUT_RING (chan, 1);
}
state->dirty = 0;
+}
+
+void
+nv40_state_flush_notify(struct nouveau_channel *chan)
+{
+ struct nv40_context *nv40 = chan->user_private;
+ struct nv40_state *state = &nv40->state;
+ unsigned i, samplers;
so_emit_reloc_markers(chan, state->hw[NV40_STATE_FB]);
for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
@@ -160,7 +169,6 @@ nv40_state_validate_swtnl(struct nv40_context *nv40)
draw_set_viewport_state(draw, &nv40->viewport);
if (nv40->draw_dirty & NV40_NEW_ARRAYS) {
- draw_set_edgeflags(draw, nv40->edgeflags);
draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf);
draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt);
}
diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c
index c2f739157ad..a58fe9ddb19 100644
--- a/src/gallium/drivers/nv40/nv40_state_fb.c
+++ b/src/gallium/drivers/nv40/nv40_state_fb.c
@@ -19,7 +19,7 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40)
struct nv04_surface *rt[4], *zeta;
uint32_t rt_enable, rt_format;
int i, colour_format = 0, zeta_format = 0;
- struct nouveau_stateobj *so = so_new(64, 10);
+ struct nouveau_stateobj *so = so_new(18, 24, 10);
unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
unsigned w = fb->width;
unsigned h = fb->height;
@@ -57,6 +57,9 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40)
rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR;
switch (colour_format) {
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ rt_format |= NV40TCL_RT_FORMAT_COLOR_X8R8G8B8;
+ break;
case PIPE_FORMAT_A8R8G8B8_UNORM:
case 0:
rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8;
diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c
index cf58d33906a..753a505e934 100644
--- a/src/gallium/drivers/nv40/nv40_state_scissor.c
+++ b/src/gallium/drivers/nv40/nv40_state_scissor.c
@@ -12,7 +12,7 @@ nv40_state_scissor_validate(struct nv40_context *nv40)
return FALSE;
nv40->state.scissor_enabled = rast->scissor;
- so = so_new(3, 0);
+ so = so_new(1, 2, 0);
so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2);
if (nv40->state.scissor_enabled) {
so_data (so, ((s->maxx - s->minx) << 16) | s->minx);
diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c
index b51024ad9b2..2b371ebfec0 100644
--- a/src/gallium/drivers/nv40/nv40_state_stipple.c
+++ b/src/gallium/drivers/nv40/nv40_state_stipple.c
@@ -14,14 +14,14 @@ nv40_state_stipple_validate(struct nv40_context *nv40)
if (rast->poly_stipple_enable) {
unsigned i;
- so = so_new(35, 0);
+ so = so_new(2, 33, 0);
so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
so_data (so, 1);
so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32);
for (i = 0; i < 32; i++)
so_data(so, nv40->stipple[i]);
} else {
- so = so_new(2, 0);
+ so = so_new(1, 1, 0);
so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
so_data (so, 0);
}
diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c
index 665d2d5fcac..9919ba1d0b0 100644
--- a/src/gallium/drivers/nv40/nv40_state_viewport.c
+++ b/src/gallium/drivers/nv40/nv40_state_viewport.c
@@ -19,7 +19,7 @@ nv40_state_viewport_validate(struct nv40_context *nv40)
return FALSE;
nv40->state.viewport_bypass = bypass;
- so = so_new(11, 0);
+ so = so_new(2, 9, 0);
if (!bypass) {
so_method(so, nv40->screen->curie,
NV40TCL_VIEWPORT_TRANSLATE_X, 8);
diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c
index 92caee6f382..791ee6823d3 100644
--- a/src/gallium/drivers/nv40/nv40_transfer.c
+++ b/src/gallium/drivers/nv40/nv40_transfer.c
@@ -1,7 +1,9 @@
#include <pipe/p_state.h>
#include <pipe/p_defines.h>
#include <pipe/p_inlines.h>
+#include <util/u_format.h>
#include <util/u_memory.h>
+#include <util/u_math.h>
#include <nouveau/nouveau_winsys.h>
#include "nv40_context.h"
#include "nv40_screen.h"
@@ -10,22 +12,19 @@
struct nv40_transfer {
struct pipe_transfer base;
struct pipe_surface *surface;
- bool direct;
+ boolean direct;
};
static void
-nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
struct pipe_texture *template)
{
memset(template, 0, sizeof(struct pipe_texture));
template->target = pt->target;
template->format = pt->format;
- template->width[0] = pt->width[level];
- template->height[0] = pt->height[level];
- template->depth[0] = 1;
- template->block = pt->block;
- template->nblocksx[0] = pt->nblocksx[level];
- template->nblocksy[0] = pt->nblocksx[level];
+ template->width0 = width;
+ template->height0 = height;
+ template->depth0 = 1;
template->last_level = 0;
template->nr_samples = pt->nr_samples;
@@ -48,14 +47,10 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
pipe_texture_reference(&tx->base.texture, pt);
- tx->base.format = pt->format;
tx->base.x = x;
tx->base.y = y;
tx->base.width = w;
tx->base.height = h;
- tx->base.block = pt->block;
- tx->base.nblocksx = pt->nblocksx[level];
- tx->base.nblocksy = pt->nblocksy[level];
tx->base.stride = mt->level[level].pitch;
tx->base.usage = usage;
tx->base.face = face;
@@ -76,7 +71,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
tx->direct = false;
- nv40_compatible_transfer_tex(pt, level, &tx_tex_template);
+ nv40_compatible_transfer_tex(pt, w, h, &tx_tex_template);
tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
if (!tx_tex)
@@ -85,6 +80,8 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
return NULL;
}
+ tx->base.stride = ((struct nv40_miptree*)tx_tex)->level[0].pitch;
+
tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
0, 0, 0,
pipe_transfer_buffer_flags(&tx->base));
@@ -110,8 +107,8 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
/* TODO: Check if SIFM can un-swizzle */
nvscreen->eng2d->copy(nvscreen->eng2d,
tx->surface, 0, 0,
- src, 0, 0,
- src->width, src->height);
+ src, x, y,
+ w, h);
pipe_surface_reference(&src, NULL);
}
@@ -131,13 +128,13 @@ nv40_transfer_del(struct pipe_transfer *ptx)
dst = pscreen->get_tex_surface(pscreen, ptx->texture,
ptx->face, ptx->level, ptx->zslice,
- PIPE_BUFFER_USAGE_GPU_WRITE);
+ PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER);
/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
nvscreen->eng2d->copy(nvscreen->eng2d,
- dst, 0, 0,
+ dst, tx->base.x, tx->base.y,
tx->surface, 0, 0,
- dst->width, dst->height);
+ tx->base.width, tx->base.height);
pipe_surface_reference(&dst, NULL);
}
@@ -156,8 +153,10 @@ nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
void *map = pipe_buffer_map(pscreen, mt->buffer,
pipe_transfer_buffer_flags(ptx));
- return map + ns->base.offset +
- ptx->y * ns->pitch + ptx->x * ptx->block.size;
+ if(!tx->direct)
+ return map + ns->base.offset;
+ else
+ return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
}
static void
diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c
index b2753b8e2e0..a777898f688 100644
--- a/src/gallium/drivers/nv40/nv40_vbo.c
+++ b/src/gallium/drivers/nv40/nv40_vbo.c
@@ -164,18 +164,21 @@ nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so,
return TRUE;
}
-boolean
+void
nv40_draw_arrays(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count)
{
struct nv40_context *nv40 = nv40_context(pipe);
- struct nouveau_channel *chan = nv40->screen->base.channel;
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
unsigned restart;
nv40_vbo_set_idxbuf(nv40, NULL, 0);
if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
- return nv40_draw_elements_swtnl(pipe, NULL, 0,
- mode, start, count);
+ nv40_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);
+ return;
}
while (count) {
@@ -186,17 +189,17 @@ nv40_draw_arrays(struct pipe_context *pipe,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
mode, start, count, &restart);
if (!vc) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
nr = (vc & 0xff);
if (nr) {
- BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1);
- OUT_RING (((nr - 1) << 24) | start);
+ BEGIN_RING(chan, curie, NV40TCL_VB_VERTEX_BATCH, 1);
+ OUT_RING (chan, ((nr - 1) << 24) | start);
start += nr;
}
@@ -206,29 +209,30 @@ nv40_draw_arrays(struct pipe_context *pipe,
nr -= push;
- BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push);
+ BEGIN_RING_NI(chan, curie, NV40TCL_VB_VERTEX_BATCH, push);
while (push--) {
- OUT_RING(((0x100 - 1) << 24) | start);
+ OUT_RING(chan, ((0x100 - 1) << 24) | start);
start += 0x100;
}
}
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, 0);
count -= vc;
start = restart;
}
pipe->flush(pipe, 0, NULL);
- return TRUE;
}
static INLINE void
nv40_draw_elements_u08(struct nv40_context *nv40, void *ib,
unsigned mode, unsigned start, unsigned count)
{
- struct nouveau_channel *chan = nv40->screen->base.channel;
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
while (count) {
uint8_t *elts = (uint8_t *)ib + start;
@@ -239,17 +243,17 @@ nv40_draw_elements_u08(struct nv40_context *nv40, void *ib,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
mode, start, count, &restart);
if (vc == 0) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
count -= vc;
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
if (vc & 1) {
- BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1);
- OUT_RING (elts[0]);
+ BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (chan, elts[0]);
elts++; vc--;
}
@@ -258,16 +262,16 @@ nv40_draw_elements_u08(struct nv40_context *nv40, void *ib,
push = MIN2(vc, 2047 * 2);
- BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
+ BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
- OUT_RING((elts[i+1] << 16) | elts[i]);
+ OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
vc -= push;
elts += push;
}
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, 0);
start = restart;
}
@@ -277,7 +281,9 @@ static INLINE void
nv40_draw_elements_u16(struct nv40_context *nv40, void *ib,
unsigned mode, unsigned start, unsigned count)
{
- struct nouveau_channel *chan = nv40->screen->base.channel;
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
while (count) {
uint16_t *elts = (uint16_t *)ib + start;
@@ -288,17 +294,17 @@ nv40_draw_elements_u16(struct nv40_context *nv40, void *ib,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
mode, start, count, &restart);
if (vc == 0) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
count -= vc;
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
if (vc & 1) {
- BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1);
- OUT_RING (elts[0]);
+ BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (chan, elts[0]);
elts++; vc--;
}
@@ -307,16 +313,16 @@ nv40_draw_elements_u16(struct nv40_context *nv40, void *ib,
push = MIN2(vc, 2047 * 2);
- BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
+ BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
for (i = 0; i < push; i+=2)
- OUT_RING((elts[i+1] << 16) | elts[i]);
+ OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
vc -= push;
elts += push;
}
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, 0);
start = restart;
}
@@ -326,7 +332,9 @@ static INLINE void
nv40_draw_elements_u32(struct nv40_context *nv40, void *ib,
unsigned mode, unsigned start, unsigned count)
{
- struct nouveau_channel *chan = nv40->screen->base.channel;
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
while (count) {
uint32_t *elts = (uint32_t *)ib + start;
@@ -337,32 +345,32 @@ nv40_draw_elements_u32(struct nv40_context *nv40, void *ib,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1,
mode, start, count, &restart);
if (vc == 0) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
count -= vc;
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
while (vc) {
push = MIN2(vc, 2047);
- BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push);
- OUT_RINGp (elts, push);
+ BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U32, push);
+ OUT_RINGp (chan, elts, push);
vc -= push;
elts += push;
}
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, 0);
start = restart;
}
}
-static boolean
+static void
nv40_draw_elements_inline(struct pipe_context *pipe,
struct pipe_buffer *ib, unsigned ib_size,
unsigned mode, unsigned start, unsigned count)
@@ -393,15 +401,16 @@ nv40_draw_elements_inline(struct pipe_context *pipe,
}
pipe_buffer_unmap(pscreen, ib);
- return TRUE;
}
-static boolean
+static void
nv40_draw_elements_vbo(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count)
{
struct nv40_context *nv40 = nv40_context(pipe);
- struct nouveau_channel *chan = nv40->screen->base.channel;
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
unsigned restart;
while (count) {
@@ -412,17 +421,17 @@ nv40_draw_elements_vbo(struct pipe_context *pipe,
vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
mode, start, count, &restart);
if (!vc) {
- FIRE_RING(NULL);
+ FIRE_RING(chan);
continue;
}
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (nvgl_primitive(mode));
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, nvgl_primitive(mode));
nr = (vc & 0xff);
if (nr) {
- BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1);
- OUT_RING (((nr - 1) << 24) | start);
+ BEGIN_RING(chan, curie, NV40TCL_VB_INDEX_BATCH, 1);
+ OUT_RING (chan, ((nr - 1) << 24) | start);
start += nr;
}
@@ -432,24 +441,22 @@ nv40_draw_elements_vbo(struct pipe_context *pipe,
nr -= push;
- BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push);
+ BEGIN_RING_NI(chan, curie, NV40TCL_VB_INDEX_BATCH, push);
while (push--) {
- OUT_RING(((0x100 - 1) << 24) | start);
+ OUT_RING(chan, ((0x100 - 1) << 24) | start);
start += 0x100;
}
}
- BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
- OUT_RING (0);
+ BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (chan, 0);
count -= vc;
start = restart;
}
-
- return TRUE;
}
-boolean
+void
nv40_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer, unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
@@ -459,8 +466,9 @@ nv40_draw_elements(struct pipe_context *pipe,
idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize);
if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
- return nv40_draw_elements_swtnl(pipe, NULL, 0,
- mode, start, count);
+ nv40_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);
+ return;
}
if (idxbuf) {
@@ -471,7 +479,6 @@ nv40_draw_elements(struct pipe_context *pipe,
}
pipe->flush(pipe, 0, NULL);
- return TRUE;
}
static boolean
@@ -484,14 +491,9 @@ nv40_vbo_validate(struct nv40_context *nv40)
unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
int hw;
- if (nv40->edgeflags) {
- nv40->fallback_swtnl |= NV40_NEW_ARRAYS;
- return FALSE;
- }
-
- vtxbuf = so_new(20, 18);
+ vtxbuf = so_new(3, 17, 18);
so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr);
- vtxfmt = so_new(17, 0);
+ vtxfmt = so_new(1, 16, 0);
so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr);
for (hw = 0; hw < nv40->vtxelt_nr; hw++) {
@@ -504,7 +506,7 @@ nv40_vbo_validate(struct nv40_context *nv40)
if (!vb->stride) {
if (!sattr)
- sattr = so_new(16 * 5, 0);
+ sattr = so_new(16, 16 * 4, 0);
if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) {
so_data(vtxbuf, 0);
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index 0382dbba8f6..8d80fcad38e 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -295,30 +295,30 @@ static INLINE struct nv40_sreg
tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
struct nv40_sreg src;
- switch (fsrc->SrcRegister.File) {
+ switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
- src = nv40_sr(NV40SR_INPUT, fsrc->SrcRegister.Index);
+ src = nv40_sr(NV40SR_INPUT, fsrc->Register.Index);
break;
case TGSI_FILE_CONSTANT:
- src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
+ src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0);
break;
case TGSI_FILE_IMMEDIATE:
- src = vpc->imm[fsrc->SrcRegister.Index];
+ src = vpc->imm[fsrc->Register.Index];
break;
case TGSI_FILE_TEMPORARY:
- src = vpc->r_temp[fsrc->SrcRegister.Index];
+ src = vpc->r_temp[fsrc->Register.Index];
break;
default:
NOUVEAU_ERR("bad src file\n");
break;
}
- src.abs = fsrc->SrcRegisterExtMod.Absolute;
- src.negate = fsrc->SrcRegister.Negate;
- src.swz[0] = fsrc->SrcRegister.SwizzleX;
- src.swz[1] = fsrc->SrcRegister.SwizzleY;
- src.swz[2] = fsrc->SrcRegister.SwizzleZ;
- src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ src.abs = fsrc->Register.Absolute;
+ src.negate = fsrc->Register.Negate;
+ src.swz[0] = fsrc->Register.SwizzleX;
+ src.swz[1] = fsrc->Register.SwizzleY;
+ src.swz[2] = fsrc->Register.SwizzleZ;
+ src.swz[3] = fsrc->Register.SwizzleW;
return src;
}
@@ -326,15 +326,15 @@ static INLINE struct nv40_sreg
tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
struct nv40_sreg dst;
- switch (fdst->DstRegister.File) {
+ switch (fdst->Register.File) {
case TGSI_FILE_OUTPUT:
- dst = vpc->r_result[fdst->DstRegister.Index];
+ dst = vpc->r_result[fdst->Register.Index];
break;
case TGSI_FILE_TEMPORARY:
- dst = vpc->r_temp[fdst->DstRegister.Index];
+ dst = vpc->r_temp[fdst->Register.Index];
break;
case TGSI_FILE_ADDRESS:
- dst = vpc->r_address[fdst->DstRegister.Index];
+ dst = vpc->r_address[fdst->Register.Index];
break;
default:
NOUVEAU_ERR("bad dst file\n");
@@ -362,38 +362,23 @@ src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc,
{
const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
struct nv40_sreg tgsi = tgsi_src(vpc, fsrc);
- uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
- uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
- fsrc->SrcRegisterExtSwz.NegateY,
- fsrc->SrcRegisterExtSwz.NegateZ,
- fsrc->SrcRegisterExtSwz.NegateW };
+ uint mask = 0;
uint c;
for (c = 0; c < 4; c++) {
- switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
- case TGSI_EXTSWIZZLE_X:
- case TGSI_EXTSWIZZLE_Y:
- case TGSI_EXTSWIZZLE_Z:
- case TGSI_EXTSWIZZLE_W:
+ switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) {
+ case TGSI_SWIZZLE_X:
+ case TGSI_SWIZZLE_Y:
+ case TGSI_SWIZZLE_Z:
+ case TGSI_SWIZZLE_W:
mask |= tgsi_mask(1 << c);
break;
- case TGSI_EXTSWIZZLE_ZERO:
- zero_mask |= tgsi_mask(1 << c);
- tgsi.swz[c] = SWZ_X;
- break;
- case TGSI_EXTSWIZZLE_ONE:
- one_mask |= tgsi_mask(1 << c);
- tgsi.swz[c] = SWZ_X;
- break;
default:
assert(0);
}
-
- if (!tgsi.negate && neg[c])
- neg_mask |= tgsi_mask(1 << c);
}
- if (mask == MASK_ALL && !neg_mask)
+ if (mask == MASK_ALL)
return TRUE;
*src = temp(vpc);
@@ -401,18 +386,6 @@ src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc,
if (mask)
arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none);
- if (zero_mask)
- arith(vpc, 0, OP_SFL, *src, zero_mask, *src, none, none);
-
- if (one_mask)
- arith(vpc, 0, OP_STR, *src, one_mask, *src, none, none);
-
- if (neg_mask) {
- struct nv40_sreg one = temp(vpc);
- arith(vpc, 0, OP_STR, one, neg_mask, one, none, none);
- arith(vpc, 0, OP_MUL, *src, neg_mask, *src, neg(one), none);
- }
-
return FALSE;
}
@@ -432,8 +405,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fsrc;
- fsrc = &finst->FullSrcRegisters[i];
- if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ fsrc = &finst->Src[i];
+ if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
src[i] = tgsi_src(vpc, fsrc);
}
}
@@ -441,9 +414,9 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fsrc;
- fsrc = &finst->FullSrcRegisters[i];
+ fsrc = &finst->Src[i];
- switch (fsrc->SrcRegister.File) {
+ switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
case TGSI_FILE_CONSTANT:
case TGSI_FILE_TEMPORARY:
@@ -454,10 +427,10 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
break;
}
- switch (fsrc->SrcRegister.File) {
+ switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
- if (ai == -1 || ai == fsrc->SrcRegister.Index) {
- ai = fsrc->SrcRegister.Index;
+ if (ai == -1 || ai == fsrc->Register.Index) {
+ ai = fsrc->Register.Index;
src[i] = tgsi_src(vpc, fsrc);
} else {
src[i] = temp(vpc);
@@ -467,8 +440,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
break;
case TGSI_FILE_CONSTANT:
if ((ci == -1 && ii == -1) ||
- ci == fsrc->SrcRegister.Index) {
- ci = fsrc->SrcRegister.Index;
+ ci == fsrc->Register.Index) {
+ ci = fsrc->Register.Index;
src[i] = tgsi_src(vpc, fsrc);
} else {
src[i] = temp(vpc);
@@ -478,8 +451,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
break;
case TGSI_FILE_IMMEDIATE:
if ((ci == -1 && ii == -1) ||
- ii == fsrc->SrcRegister.Index) {
- ii = fsrc->SrcRegister.Index;
+ ii == fsrc->Register.Index) {
+ ii = fsrc->Register.Index;
src[i] = tgsi_src(vpc, fsrc);
} else {
src[i] = temp(vpc);
@@ -496,8 +469,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
}
}
- dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
- mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+ dst = tgsi_dst(vpc, &finst->Dst[0]);
+ mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
switch (finst->Instruction.Opcode) {
case TGSI_OPCODE_ABS:
@@ -604,19 +577,19 @@ static boolean
nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
const struct tgsi_full_declaration *fdec)
{
- unsigned idx = fdec->DeclarationRange.First;
+ unsigned idx = fdec->Range.First;
int hw;
- switch (fdec->Semantic.SemanticName) {
+ switch (fdec->Semantic.Name) {
case TGSI_SEMANTIC_POSITION:
hw = NV40_VP_INST_DEST_POS;
vpc->hpos_idx = idx;
break;
case TGSI_SEMANTIC_COLOR:
- if (fdec->Semantic.SemanticIndex == 0) {
+ if (fdec->Semantic.Index == 0) {
hw = NV40_VP_INST_DEST_COL0;
} else
- if (fdec->Semantic.SemanticIndex == 1) {
+ if (fdec->Semantic.Index == 1) {
hw = NV40_VP_INST_DEST_COL1;
} else {
NOUVEAU_ERR("bad colour semantic index\n");
@@ -624,10 +597,10 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
}
break;
case TGSI_SEMANTIC_BCOLOR:
- if (fdec->Semantic.SemanticIndex == 0) {
+ if (fdec->Semantic.Index == 0) {
hw = NV40_VP_INST_DEST_BFC0;
} else
- if (fdec->Semantic.SemanticIndex == 1) {
+ if (fdec->Semantic.Index == 1) {
hw = NV40_VP_INST_DEST_BFC1;
} else {
NOUVEAU_ERR("bad bcolour semantic index\n");
@@ -641,13 +614,17 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
hw = NV40_VP_INST_DEST_PSZ;
break;
case TGSI_SEMANTIC_GENERIC:
- if (fdec->Semantic.SemanticIndex <= 7) {
- hw = NV40_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
+ if (fdec->Semantic.Index <= 7) {
+ hw = NV40_VP_INST_DEST_TC(fdec->Semantic.Index);
} else {
NOUVEAU_ERR("bad generic semantic index\n");
return FALSE;
}
break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ /* not really an error just a fallback */
+ NOUVEAU_ERR("cannot handle edgeflag output\n");
+ return FALSE;
default:
NOUVEAU_ERR("bad output semantic\n");
return FALSE;
@@ -679,16 +656,16 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc)
fdec = &p.FullToken.FullDeclaration;
switch (fdec->Declaration.File) {
case TGSI_FILE_TEMPORARY:
- if (fdec->DeclarationRange.Last > high_temp) {
+ if (fdec->Range.Last > high_temp) {
high_temp =
- fdec->DeclarationRange.Last;
+ fdec->Range.Last;
}
break;
#if 0 /* this would be nice.. except gallium doesn't track it */
case TGSI_FILE_ADDRESS:
- if (fdec->DeclarationRange.Last > high_addr) {
+ if (fdec->Range.Last > high_addr) {
high_addr =
- fdec->DeclarationRange.Last;
+ fdec->Range.Last;
}
break;
#endif
@@ -708,11 +685,11 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc)
const struct tgsi_full_dst_register *fdst;
finst = &p.FullToken.FullInstruction;
- fdst = &finst->FullDstRegisters[0];
+ fdst = &finst->Dst[0];
- if (fdst->DstRegister.File == TGSI_FILE_ADDRESS) {
- if (fdst->DstRegister.Index > high_addr)
- high_addr = fdst->DstRegister.Index;
+ if (fdst->Register.File == TGSI_FILE_ADDRESS) {
+ if (fdst->Register.Index > high_addr)
+ high_addr = fdst->Register.Index;
}
}
@@ -857,7 +834,9 @@ static boolean
nv40_vertprog_validate(struct nv40_context *nv40)
{
struct pipe_screen *pscreen = nv40->pipe.screen;
- struct nouveau_grobj *curie = nv40->screen->curie;
+ struct nv40_screen *screen = nv40->screen;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *curie = screen->curie;
struct nv40_vertex_program *vp;
struct pipe_buffer *constbuf;
boolean upload_code = FALSE, upload_data = FALSE;
@@ -907,7 +886,7 @@ check_gpu_resources:
assert(0);
}
- so = so_new(7, 0);
+ so = so_new(3, 4, 0);
so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1);
so_data (so, vp->exec->start);
so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2);
@@ -997,9 +976,9 @@ check_gpu_resources:
4 * sizeof(float));
}
- BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5);
- OUT_RING (i + vp->data->start);
- OUT_RINGp ((uint32_t *)vpd->value, 4);
+ BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_CONST_ID, 5);
+ OUT_RING (chan, i + vp->data->start);
+ OUT_RINGp (chan, (uint32_t *)vpd->value, 4);
}
if (constbuf)
@@ -1016,11 +995,11 @@ check_gpu_resources:
NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]);
}
#endif
- BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1);
- OUT_RING (vp->exec->start);
+ BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_FROM_ID, 1);
+ OUT_RING (chan, vp->exec->start);
for (i = 0; i < vp->nr_insns; i++) {
- BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4);
- OUT_RINGp (vp->insns[i].data, 4);
+ BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_INST(0), 4);
+ OUT_RINGp (chan, vp->insns[i].data, 4);
}
}
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index fca078b174a..5997456e4c9 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -33,13 +33,6 @@ nv50_flush(struct pipe_context *pipe, unsigned flags,
{
struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *eng2d = nv50->screen->eng2d;
-
- /* We need this in the ddx for reliable composite, not sure what we're
- * actually flushing. We generate all our own flushes with flags = 0. */
- WAIT_RING(chan, 2);
- BEGIN_RING(chan, eng2d, 0x0110, 1);
- OUT_RING (chan, 0);
if (flags & PIPE_FLUSH_FRAME)
FIRE_RING(chan);
@@ -50,39 +43,44 @@ nv50_destroy(struct pipe_context *pipe)
{
struct nv50_context *nv50 = nv50_context(pipe);
+ if (nv50->state.fb)
+ so_ref(NULL, &nv50->state.fb);
+ if (nv50->state.blend)
+ so_ref(NULL, &nv50->state.blend);
+ if (nv50->state.blend_colour)
+ so_ref(NULL, &nv50->state.blend_colour);
+ if (nv50->state.zsa)
+ so_ref(NULL, &nv50->state.zsa);
+ if (nv50->state.rast)
+ so_ref(NULL, &nv50->state.rast);
+ if (nv50->state.stipple)
+ so_ref(NULL, &nv50->state.stipple);
+ if (nv50->state.scissor)
+ so_ref(NULL, &nv50->state.scissor);
+ if (nv50->state.viewport)
+ so_ref(NULL, &nv50->state.viewport);
+ if (nv50->state.tsc_upload)
+ so_ref(NULL, &nv50->state.tsc_upload);
+ if (nv50->state.tic_upload)
+ so_ref(NULL, &nv50->state.tic_upload);
+ if (nv50->state.vertprog)
+ so_ref(NULL, &nv50->state.vertprog);
+ if (nv50->state.fragprog)
+ so_ref(NULL, &nv50->state.fragprog);
+ if (nv50->state.programs)
+ so_ref(NULL, &nv50->state.programs);
+ if (nv50->state.vtxfmt)
+ so_ref(NULL, &nv50->state.vtxfmt);
+ if (nv50->state.vtxbuf)
+ so_ref(NULL, &nv50->state.vtxbuf);
+ if (nv50->state.vtxattr)
+ so_ref(NULL, &nv50->state.vtxattr);
+
draw_destroy(nv50->draw);
FREE(nv50);
}
-static void
-nv50_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
-{
-}
-
-static unsigned int
-nv50_is_texture_referenced( struct pipe_context *pipe,
- struct pipe_texture *texture,
- unsigned face, unsigned level)
-{
- /**
- * FIXME: Optimize.
- */
-
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
-static unsigned int
-nv50_is_buffer_referenced( struct pipe_context *pipe,
- struct pipe_buffer *buf)
-{
- /**
- * FIXME: Optimize.
- */
-
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
struct pipe_context *
nv50_create(struct pipe_screen *pscreen, unsigned pctx_id)
{
@@ -101,15 +99,14 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id)
nv50->pipe.destroy = nv50_destroy;
- nv50->pipe.set_edgeflags = nv50_set_edgeflags;
nv50->pipe.draw_arrays = nv50_draw_arrays;
nv50->pipe.draw_elements = nv50_draw_elements;
nv50->pipe.clear = nv50_clear;
nv50->pipe.flush = nv50_flush;
- nv50->pipe.is_texture_referenced = nv50_is_texture_referenced;
- nv50->pipe.is_buffer_referenced = nv50_is_buffer_referenced;
+ nv50->pipe.is_texture_referenced = nouveau_is_texture_referenced;
+ nv50->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
screen->base.channel->user_private = nv50;
screen->base.channel->flush_notify = nv50_state_flush_notify;
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 4608854d711..cbd4c3ff86d 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -14,6 +14,7 @@
#include "nouveau/nouveau_winsys.h"
#include "nouveau/nouveau_gldefs.h"
#include "nouveau/nouveau_stateobj.h"
+#include "nouveau/nouveau_context.h"
#include "nv50_screen.h"
#include "nv50_program.h"
@@ -64,10 +65,22 @@ struct nv50_rasterizer_stateobj {
};
struct nv50_sampler_stateobj {
- bool normalized;
+ boolean normalized;
unsigned tsc[8];
};
+static INLINE unsigned
+get_tile_height(uint32_t tile_mode)
+{
+ return 1 << ((tile_mode & 0xf) + 2);
+}
+
+static INLINE unsigned
+get_tile_depth(uint32_t tile_mode)
+{
+ return 1 << (tile_mode >> 4);
+}
+
struct nv50_miptree_level {
int *image_offset;
unsigned pitch;
@@ -113,13 +126,14 @@ struct nv50_state {
unsigned viewport_bypass;
struct nouveau_stateobj *tsc_upload;
struct nouveau_stateobj *tic_upload;
- unsigned miptree_nr;
+ unsigned miptree_nr[PIPE_SHADER_TYPES];
struct nouveau_stateobj *vertprog;
struct nouveau_stateobj *fragprog;
struct nouveau_stateobj *programs;
struct nouveau_stateobj *vtxfmt;
struct nouveau_stateobj *vtxbuf;
struct nouveau_stateobj *vtxattr;
+ unsigned vtxelt_nr;
};
struct nv50_context {
@@ -148,10 +162,12 @@ struct nv50_context {
unsigned vtxbuf_nr;
struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
unsigned vtxelt_nr;
- struct nv50_sampler_stateobj *sampler[PIPE_MAX_SAMPLERS];
- unsigned sampler_nr;
- struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS];
- unsigned miptree_nr;
+ struct nv50_sampler_stateobj *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+ unsigned sampler_nr[PIPE_SHADER_TYPES];
+ struct nv50_miptree *miptree[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+ unsigned miptree_nr[PIPE_SHADER_TYPES];
+
+ uint16_t vbo_fifo;
};
static INLINE struct nv50_context *
@@ -175,9 +191,9 @@ nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst,
extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50);
/* nv50_vbo.c */
-extern boolean nv50_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv50_draw_arrays(struct pipe_context *, unsigned mode,
unsigned start, unsigned count);
-extern boolean nv50_draw_elements(struct pipe_context *pipe,
+extern void nv50_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start,
@@ -192,13 +208,27 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers,
extern void nv50_vertprog_validate(struct nv50_context *nv50);
extern void nv50_fragprog_validate(struct nv50_context *nv50);
extern void nv50_linkage_validate(struct nv50_context *nv50);
-extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p);
+extern void nv50_program_destroy(struct nv50_context *nv50,
+ struct nv50_program *p);
/* nv50_state_validate.c */
extern boolean nv50_state_validate(struct nv50_context *nv50);
extern void nv50_state_flush_notify(struct nouveau_channel *chan);
+extern void nv50_so_init_sifc(struct nv50_context *nv50,
+ struct nouveau_stateobj *so,
+ struct nouveau_bo *bo, unsigned reloc,
+ unsigned offset, unsigned size);
+
/* nv50_tex.c */
extern void nv50_tex_validate(struct nv50_context *);
+/* nv50_transfer.c */
+extern void
+nv50_upload_sifc(struct nv50_context *nv50,
+ struct nouveau_bo *bo, unsigned dst_offset, unsigned reloc,
+ unsigned dst_format, int dst_w, int dst_h, int dst_pitch,
+ void *src, unsigned src_format, int src_pitch,
+ int x, int y, int w, int h, int cpp);
+
#endif
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 93479a0314a..3f1edf0a139 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -23,18 +23,62 @@
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
+#include "util/u_format.h"
#include "nv50_context.h"
+/* The restrictions in tile mode selection probably aren't necessary. */
+static INLINE uint32_t
+get_tile_mode(unsigned ny, unsigned d)
+{
+ uint32_t tile_mode = 0x00;
+
+ if (ny > 32) tile_mode = 0x04; /* height 64 tiles */
+ else
+ if (ny > 16) tile_mode = 0x03; /* height 32 tiles */
+ else
+ if (ny > 8) tile_mode = 0x02; /* height 16 tiles */
+ else
+ if (ny > 4) tile_mode = 0x01; /* height 8 tiles */
+
+ if (d == 1)
+ return tile_mode;
+ else
+ if (tile_mode > 0x02)
+ tile_mode = 0x02;
+
+ if (d > 16 && tile_mode < 0x02)
+ return tile_mode | 0x50; /* depth 32 tiles */
+ if (d > 8) return tile_mode | 0x40; /* depth 16 tiles */
+ if (d > 4) return tile_mode | 0x30; /* depth 8 tiles */
+ if (d > 2) return tile_mode | 0x20; /* depth 4 tiles */
+
+ return tile_mode | 0x10;
+}
+
+static INLINE unsigned
+get_zslice_offset(unsigned tile_mode, unsigned z, unsigned pitch, unsigned nb_h)
+{
+ unsigned tile_h = get_tile_height(tile_mode);
+ unsigned tile_d = get_tile_depth(tile_mode);
+
+ /* pitch_2d == to next slice within this volume-tile */
+ /* pitch_3d == size (in bytes) of a volume-tile */
+ unsigned pitch_2d = tile_h * 64;
+ unsigned pitch_3d = tile_d * align(nb_h, tile_h) * pitch;
+
+ return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d;
+}
+
static struct pipe_texture *
nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
{
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
struct pipe_texture *pt = &mt->base.base;
- unsigned width = tmp->width[0], height = tmp->height[0];
- unsigned depth = tmp->depth[0];
- uint32_t tile_mode, tile_flags, tile_h;
+ unsigned width = tmp->width0, height = tmp->height0;
+ unsigned depth = tmp->depth0, image_alignment;
+ uint32_t tile_flags;
int ret, i, l;
*pt = *tmp;
@@ -57,68 +101,52 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
break;
}
- if (pt->height[0] > 32) tile_mode = 4;
- else if (pt->height[0] > 16) tile_mode = 3;
- else if (pt->height[0] > 8) tile_mode = 2;
- else if (pt->height[0] > 4) tile_mode = 1;
- else tile_mode = 0;
- tile_h = 1 << (tile_mode + 2);
-
- switch (pt->target) {
- case PIPE_TEXTURE_3D:
- mt->image_nr = pt->depth[0];
- break;
- case PIPE_TEXTURE_CUBE:
- mt->image_nr = 6;
- break;
- default:
- mt->image_nr = 1;
- break;
- }
+ /* XXX: texture arrays */
+ mt->image_nr = (pt->target == PIPE_TEXTURE_CUBE) ? 6 : 1;
for (l = 0; l <= pt->last_level; l++) {
struct nv50_miptree_level *lvl = &mt->level[l];
-
- pt->width[l] = width;
- pt->height[l] = height;
- pt->depth[l] = depth;
- pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
- pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+ unsigned nblocksy = util_format_get_nblocksy(pt->format, height);
lvl->image_offset = CALLOC(mt->image_nr, sizeof(int));
- lvl->pitch = align(pt->width[l] * pt->block.size, 64);
- lvl->tile_mode = tile_mode;
-
- width = MAX2(1, width >> 1);
- height = MAX2(1, height >> 1);
- depth = MAX2(1, depth >> 1);
+ lvl->pitch = align(util_format_get_stride(pt->format, width), 64);
+ lvl->tile_mode = get_tile_mode(nblocksy, depth);
- if (tile_mode && height <= (tile_h >> 1)) {
- tile_mode--;
- tile_h >>= 1;
- }
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ depth = u_minify(depth, 1);
}
+ image_alignment = get_tile_height(mt->level[0].tile_mode) * 64;
+ image_alignment *= get_tile_depth(mt->level[0].tile_mode);
+
+ /* NOTE the distinction between arrays of mip-mapped 2D textures and
+ * mip-mapped 3D textures. We can't use image_nr == depth for 3D mip.
+ */
for (i = 0; i < mt->image_nr; i++) {
for (l = 0; l <= pt->last_level; l++) {
struct nv50_miptree_level *lvl = &mt->level[l];
int size;
- tile_h = 1 << (lvl->tile_mode + 2);
+ unsigned tile_h = get_tile_height(lvl->tile_mode);
+ unsigned tile_d = get_tile_depth(lvl->tile_mode);
- size = align(pt->width[l], 8) * pt->block.size;
- size = align(size, 64);
- size *= align(pt->height[l], tile_h);
+ size = lvl->pitch;
+ size *= align(util_format_get_nblocksy(pt->format, u_minify(pt->height0, l)), tile_h);
+ size *= align(u_minify(pt->depth0, l), tile_d);
lvl->image_offset[i] = mt->total_size;
mt->total_size += size;
}
+ mt->total_size = align(mt->total_size, image_alignment);
}
ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, mt->total_size,
mt->level[0].tile_mode, tile_flags,
&mt->base.bo);
if (ret) {
+ for (l = 0; l < pt->last_level; ++l)
+ FREE(mt->level[l].image_offset);
FREE(mt);
return NULL;
}
@@ -135,7 +163,7 @@ nv50_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
/* Only supports 2D, non-mipmapped textures for the moment */
if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
- pt->depth[0] != 1)
+ pt->depth0 != 1)
return NULL;
mt = CALLOC_STRUCT(nv50_miptree);
@@ -158,6 +186,10 @@ static void
nv50_miptree_destroy(struct pipe_texture *pt)
{
struct nv50_miptree *mt = nv50_miptree(pt);
+ unsigned l;
+
+ for (l = 0; l < pt->last_level; ++l)
+ FREE(mt->level[l].image_offset);
nouveau_bo_ref(NULL, &mt->base.bo);
FREE(mt);
@@ -171,23 +203,18 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
struct nv50_miptree *mt = nv50_miptree(pt);
struct nv50_miptree_level *lvl = &mt->level[level];
struct pipe_surface *ps;
- int img;
+ unsigned img = 0;
if (pt->target == PIPE_TEXTURE_CUBE)
img = face;
- else
- if (pt->target == PIPE_TEXTURE_3D)
- img = zslice;
- else
- img = 0;
ps = CALLOC_STRUCT(pipe_surface);
if (!ps)
return NULL;
pipe_texture_reference(&ps->texture, pt);
ps->format = pt->format;
- ps->width = pt->width[level];
- ps->height = pt->height[level];
+ ps->width = u_minify(pt->width0, level);
+ ps->height = u_minify(pt->height0, level);
ps->usage = flags;
pipe_reference_init(&ps->reference, 1);
ps->face = face;
@@ -195,6 +222,12 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
ps->zslice = zslice;
ps->offset = lvl->image_offset[img];
+ if (pt->target == PIPE_TEXTURE_3D) {
+ unsigned nb_h = util_format_get_nblocksy(pt->format, ps->height);
+ ps->offset += get_zslice_offset(lvl->tile_mode, zslice,
+ lvl->pitch, nb_h);
+ }
+
return ps;
}
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 576d075318f..53f9f0adf31 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -31,9 +31,12 @@
#include "nv50_context.h"
-#define NV50_SU_MAX_TEMP 64
+#define NV50_SU_MAX_TEMP 127
+#define NV50_SU_MAX_ADDR 4
//#define NV50_PROGRAM_DUMP
+/* $a5 and $a6 always seem to be 0, and using $a7 gives you noise */
+
/* ARL - gallium craps itself on progs/vp/arl.txt
*
* MSB - Like MAD, but MUL+SUB
@@ -79,26 +82,44 @@ struct nv50_reg {
P_ATTR,
P_RESULT,
P_CONST,
- P_IMMD
+ P_IMMD,
+ P_ADDR
} type;
int index;
int hw;
- int neg;
+ int mod;
int rhw; /* result hw for FP outputs, or interpolant index */
int acc; /* instruction where this reg is last read (first insn == 1) */
};
-/* arbitrary limits */
-#define MAX_IF_DEPTH 4
-#define MAX_LOOP_DEPTH 4
+#define NV50_MOD_NEG 1
+#define NV50_MOD_ABS 2
+#define NV50_MOD_NEG_ABS (NV50_MOD_NEG | NV50_MOD_ABS)
+#define NV50_MOD_SAT 4
+#define NV50_MOD_I32 8
+
+/* NV50_MOD_I32 is used to indicate integer mode for neg/abs */
+
+/* STACK: Conditionals and loops have to use the (per warp) stack.
+ * Stack entries consist of an entry type (divergent path, join at),
+ * a mask indicating the active threads of the warp, and an address.
+ * MPs can store 12 stack entries internally, if we need more (and
+ * we probably do), we have to create a stack buffer in VRAM.
+ */
+/* impose low limits for now */
+#define NV50_MAX_COND_NESTING 4
+#define NV50_MAX_LOOP_NESTING 3
+
+#define JOIN_ON(e) e; pc->p->exec_tail->inst[1] |= 2
struct nv50_pc {
struct nv50_program *p;
/* hw resources */
struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
+ struct nv50_reg r_addr[NV50_SU_MAX_ADDR];
/* tgsi resources */
struct nv50_reg *temp;
@@ -110,8 +131,11 @@ struct nv50_pc {
struct nv50_reg *param;
int param_nr;
struct nv50_reg *immd;
- float *immd_buf;
+ uint32_t *immd_buf;
int immd_nr;
+ struct nv50_reg **addr;
+ int addr_nr;
+ uint8_t addr_alloc; /* set bit indicates used for TGSI_FILE_ADDRESS */
struct nv50_reg *temp_temp[16];
unsigned temp_temp_nr;
@@ -120,23 +144,30 @@ struct nv50_pc {
struct nv50_reg *r_brdc;
struct nv50_reg *r_dst[4];
+ struct nv50_reg reg_instances[16];
+ unsigned reg_instance_nr;
+
unsigned interp_mode[32];
/* perspective interpolation registers */
struct nv50_reg *iv_p;
struct nv50_reg *iv_c;
- struct nv50_program_exec *if_cond;
- struct nv50_program_exec *if_insn[MAX_IF_DEPTH];
- struct nv50_program_exec *br_join[MAX_IF_DEPTH];
- struct nv50_program_exec *br_loop[MAX_LOOP_DEPTH]; /* for BRK branch */
+ struct nv50_program_exec *if_insn[NV50_MAX_COND_NESTING];
+ struct nv50_program_exec *if_join[NV50_MAX_COND_NESTING];
+ struct nv50_program_exec *loop_brka[NV50_MAX_LOOP_NESTING];
int if_lvl, loop_lvl;
- unsigned loop_pos[MAX_LOOP_DEPTH];
+ unsigned loop_pos[NV50_MAX_LOOP_NESTING];
+
+ unsigned *insn_pos; /* actual program offset of each TGSI insn */
+ boolean in_subroutine;
/* current instruction and total number of insns */
unsigned insn_cur;
unsigned insn_nr;
boolean allow32;
+
+ uint8_t edgeflag_out;
};
static INLINE void
@@ -145,7 +176,7 @@ ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw)
reg->type = type;
reg->index = index;
reg->hw = hw;
- reg->neg = 0;
+ reg->mod = 0;
reg->rhw = -1;
reg->acc = 0;
}
@@ -159,6 +190,16 @@ popcnt4(uint32_t val)
}
static void
+terminate_mbb(struct nv50_pc *pc)
+{
+ int i;
+
+ /* remove records of temporary address register values */
+ for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
+ pc->r_addr[i].rhw = -1;
+}
+
+static void
alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
{
int i = 0;
@@ -207,6 +248,21 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
assert(0);
}
+static INLINE struct nv50_reg *
+reg_instance(struct nv50_pc *pc, struct nv50_reg *reg)
+{
+ struct nv50_reg *ri;
+
+ assert(pc->reg_instance_nr < 16);
+ ri = &pc->reg_instances[pc->reg_instance_nr++];
+ if (reg) {
+ alloc_reg(pc, reg);
+ *ri = *reg;
+ reg->mod = 0;
+ }
+ return ri;
+}
+
/* XXX: For shaders that aren't executed linearly (e.g. shaders that
* contain loops), we need to assign all hw regs to TGSI TEMPs early,
* lest we risk temp_temps overwriting regs alloc'd "later".
@@ -233,22 +289,6 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
return NULL;
}
-/* Assign the hw of the discarded temporary register src
- * to the tgsi register dst and free src.
- */
-static void
-assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
- assert(src->index == -1 && src->hw != -1);
-
- if (dst->hw != -1)
- pc->r_temp[dst->hw] = NULL;
- pc->r_temp[src->hw] = dst;
- dst->hw = src->hw;
-
- FREE(src);
-}
-
/* release the hardware resource held by r */
static void
release_hw(struct nv50_pc *pc, struct nv50_reg *r)
@@ -320,25 +360,34 @@ static void
kill_temp_temp(struct nv50_pc *pc)
{
int i;
-
+
for (i = 0; i < pc->temp_temp_nr; i++)
free_temp(pc, pc->temp_temp[i]);
pc->temp_temp_nr = 0;
}
static int
-ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
+ctor_immd_4u32(struct nv50_pc *pc,
+ uint32_t x, uint32_t y, uint32_t z, uint32_t w)
{
- pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * 4 * sizeof(float)),
- (pc->immd_nr + 1) * 4 * sizeof(float));
+ unsigned size = pc->immd_nr * 4 * sizeof(uint32_t);
+
+ pc->immd_buf = REALLOC(pc->immd_buf, size, size + 4 * sizeof(uint32_t));
+
pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
-
+
return pc->immd_nr++;
}
+static INLINE int
+ctor_immd_4f32(struct nv50_pc *pc, float x, float y, float z, float w)
+{
+ return ctor_immd_4u32(pc, fui(x), fui(y), fui(z), fui(w));
+}
+
static struct nv50_reg *
alloc_immd(struct nv50_pc *pc, float f)
{
@@ -346,11 +395,11 @@ alloc_immd(struct nv50_pc *pc, float f)
unsigned hw;
for (hw = 0; hw < pc->immd_nr * 4; hw++)
- if (pc->immd_buf[hw] == f)
+ if (pc->immd_buf[hw] == fui(f))
break;
if (hw == pc->immd_nr * 4)
- hw = ctor_immd(pc, f, -f, 0.5 * f, 0) * 4;
+ hw = ctor_immd_4f32(pc, f, -f, 0.5 * f, 0) * 4;
ctor_reg(r, P_IMMD, -1, hw);
return r;
@@ -396,10 +445,19 @@ is_immd(struct nv50_program_exec *e)
return FALSE;
}
+static boolean
+is_join(struct nv50_program_exec *e)
+{
+ if (is_long(e) && (e->inst[1] & 3) == 2)
+ return TRUE;
+ return FALSE;
+}
+
static INLINE void
set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
struct nv50_program_exec *e)
{
+ assert(!is_immd(e));
set_long(pc, e);
e->inst[1] &= ~((0x1f << 7) | (0x3 << 12));
e->inst[1] |= (pred << 7) | (idx << 12);
@@ -434,29 +492,119 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
}
alloc_reg(pc, dst);
+ if (dst->hw > 63)
+ set_long(pc, e);
e->inst[0] |= (dst->hw << 2);
}
static INLINE void
set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
{
- float f = pc->immd_buf[imm->hw];
- unsigned val = fui(imm->neg ? -f : f);
-
set_long(pc, e);
- /*XXX: can't be predicated - bits overlap.. catch cases where both
- * are required and avoid them. */
+ /* XXX: can't be predicated - bits overlap; cases where both
+ * are required should be avoided by using pc->allow32 */
set_pred(pc, 0, 0, e);
set_pred_wr(pc, 0, 0, e);
e->inst[1] |= 0x00000002 | 0x00000001;
- e->inst[0] |= (val & 0x3f) << 16;
- e->inst[1] |= (val >> 6) << 2;
+ e->inst[0] |= (pc->immd_buf[imm->hw] & 0x3f) << 16;
+ e->inst[1] |= (pc->immd_buf[imm->hw] >> 6) << 2;
}
+static INLINE void
+set_addr(struct nv50_program_exec *e, struct nv50_reg *a)
+{
+ assert(!(e->inst[0] & 0x0c000000));
+ assert(!(e->inst[1] & 0x00000004));
+
+ e->inst[0] |= (a->hw & 3) << 26;
+ e->inst[1] |= (a->hw >> 2) << 2;
+}
+
+static void
+emit_add_addr_imm(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src0, uint16_t src1_val)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0xd0000000 | (src1_val << 9);
+ e->inst[1] = 0x20000000;
+ set_long(pc, e);
+ e->inst[0] |= dst->hw << 2;
+ if (src0) /* otherwise will add to $a0, which is always 0 */
+ set_addr(e, src0);
+
+ emit(pc, e);
+}
+
+static struct nv50_reg *
+alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref)
+{
+ struct nv50_reg *a_tgsi = NULL, *a = NULL;
+ int i;
+ uint8_t avail = ~pc->addr_alloc;
+
+ if (!ref) {
+ /* allocate for TGSI_FILE_ADDRESS */
+ while (avail) {
+ i = ffs(avail) - 1;
+
+ if (pc->r_addr[i].rhw < 0 ||
+ pc->r_addr[i].acc != pc->insn_cur) {
+ pc->addr_alloc |= (1 << i);
+
+ pc->r_addr[i].rhw = -1;
+ pc->r_addr[i].index = i;
+ return &pc->r_addr[i];
+ }
+ avail &= ~(1 << i);
+ }
+ assert(0);
+ return NULL;
+ }
+
+ /* Allocate and set an address reg so we can access 'ref'.
+ *
+ * If and r_addr->index will be -1 or the hw index the value
+ * value in rhw is relative to. If rhw < 0, the reg has not
+ * been initialized or is in use for TGSI_FILE_ADDRESS.
+ */
+ while (avail) { /* only consider regs that are not TGSI */
+ i = ffs(avail) - 1;
+ avail &= ~(1 << i);
+
+ if ((!a || a->rhw >= 0) && pc->r_addr[i].rhw < 0) {
+ /* prefer an usused reg with low hw index */
+ a = &pc->r_addr[i];
+ continue;
+ }
+ if (!a && pc->r_addr[i].acc != pc->insn_cur)
+ a = &pc->r_addr[i];
+
+ if (ref->hw - pc->r_addr[i].rhw >= 128)
+ continue;
+
+ if ((ref->acc >= 0 && pc->r_addr[i].index < 0) ||
+ (ref->acc < 0 && pc->r_addr[i].index == ref->index)) {
+ pc->r_addr[i].acc = pc->insn_cur;
+ return &pc->r_addr[i];
+ }
+ }
+ assert(a);
+
+ if (ref->acc < 0)
+ a_tgsi = pc->addr[ref->index];
+
+ emit_add_addr_imm(pc, a, a_tgsi, (ref->hw & ~0x7f) * 4);
+
+ a->rhw = ref->hw & ~0x7f;
+ a->acc = pc->insn_cur;
+ a->index = a_tgsi ? ref->index : -1;
+ return a;
+}
#define INTERP_LINEAR 0
-#define INTERP_FLAT 1
+#define INTERP_FLAT 1
#define INTERP_PERSPECTIVE 2
#define INTERP_CENTROID 4
@@ -494,23 +642,34 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
{
set_long(pc, e);
- e->param.index = src->hw;
+ e->param.index = src->hw & 127;
e->param.shift = s;
e->param.mask = m << (s % 32);
+ if (src->hw > 127)
+ set_addr(e, alloc_addr(pc, src));
+ else
+ if (src->acc < 0) {
+ assert(src->type == P_CONST);
+ set_addr(e, pc->addr[src->index]);
+ }
+
e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22);
}
+/* Never apply nv50_reg::mod in emit_mov, or carefully check the code !!! */
static void
emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
{
struct nv50_program_exec *e = exec(pc);
- e->inst[0] |= 0x10000000;
+ e->inst[0] = 0x10000000;
+ if (!pc->allow32)
+ set_long(pc, e);
set_dst(pc, dst, e);
- if (pc->allow32 && dst->type != P_RESULT && src->type == P_IMMD) {
+ if (!is_long(e) && src->type == P_IMMD) {
set_immd(pc, src, e);
/*XXX: 32-bit, but steals part of "half" reg space - need to
* catch and handle this case if/when we do half-regs
@@ -519,7 +678,7 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
if (src->type == P_IMMD || src->type == P_CONST) {
set_long(pc, e);
set_data(pc, src, 0x7f, 9, e);
- e->inst[1] |= 0x20000000; /* src0 const? */
+ e->inst[1] |= 0x20000000; /* mov from c[] */
} else {
if (src->type == P_ATTR) {
set_long(pc, e);
@@ -527,14 +686,16 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
}
alloc_reg(pc, src);
+ if (src->hw > 63)
+ set_long(pc, e);
e->inst[0] |= (src->hw << 9);
}
if (is_long(e) && !is_immd(e)) {
e->inst[1] |= 0x04000000; /* 32-bit */
- e->inst[1] |= 0x0000c000; /* "subsubop" 0x3 */
+ e->inst[1] |= 0x0000c000; /* 32-bit c[] load / lane mask 0:1 */
if (!(e->inst[1] & 0x20000000))
- e->inst[1] |= 0x00030000; /* "subsubop" 0xf */
+ e->inst[1] |= 0x00030000; /* lane mask 2:3 */
} else
e->inst[0] |= 0x00008000;
@@ -549,6 +710,45 @@ emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f)
FREE(imm);
}
+/* Assign the hw of the discarded temporary register src
+ * to the tgsi register dst and free src.
+ */
+static void
+assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ assert(src->index == -1 && src->hw != -1);
+
+ if (pc->if_lvl || pc->loop_lvl ||
+ (dst->type != P_TEMP) ||
+ (src->hw < pc->result_nr * 4 &&
+ pc->p->type == PIPE_SHADER_FRAGMENT) ||
+ pc->p->info.opcode_count[TGSI_OPCODE_CAL] ||
+ pc->p->info.opcode_count[TGSI_OPCODE_BRA]) {
+
+ emit_mov(pc, dst, src);
+ free_temp(pc, src);
+ return;
+ }
+
+ if (dst->hw != -1)
+ pc->r_temp[dst->hw] = NULL;
+ pc->r_temp[src->hw] = dst;
+ dst->hw = src->hw;
+
+ FREE(src);
+}
+
+static void
+emit_nop(struct nv50_pc *pc)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0xf0000000;
+ set_long(pc, e);
+ e->inst[1] = 0xe0000000;
+ emit(pc, e);
+}
+
static boolean
check_swap_src_0_1(struct nv50_pc *pc,
struct nv50_reg **s0, struct nv50_reg **s1)
@@ -586,6 +786,8 @@ set_src_0_restricted(struct nv50_pc *pc, struct nv50_reg *src,
}
alloc_reg(pc, src);
+ if (src->hw > 63)
+ set_long(pc, e);
e->inst[0] |= (src->hw << 9);
}
@@ -604,6 +806,8 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
}
alloc_reg(pc, src);
+ if (src->hw > 63)
+ set_long(pc, e);
e->inst[0] |= (src->hw << 9);
}
@@ -630,7 +834,9 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
}
alloc_reg(pc, src);
- e->inst[0] |= (src->hw << 16);
+ if (src->hw > 63)
+ set_long(pc, e);
+ e->inst[0] |= ((src->hw & 127) << 16);
}
static void
@@ -658,7 +864,34 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
}
alloc_reg(pc, src);
- e->inst[1] |= (src->hw << 14);
+ e->inst[1] |= ((src->hw & 127) << 14);
+}
+
+static void
+emit_mov_from_pred(struct nv50_pc *pc, struct nv50_reg *dst, int pred)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ assert(dst->type == P_TEMP);
+ e->inst[1] = 0x20000000 | (pred << 12);
+ set_long(pc, e);
+ set_dst(pc, dst, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_mov_to_pred(struct nv50_pc *pc, int pred, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0x000001fc;
+ e->inst[1] = 0xa0000008;
+ set_long(pc, e);
+ set_pred_wr(pc, 1, pred, e);
+ set_src_0_restricted(pc, src, e);
+
+ emit(pc, e);
}
static void
@@ -676,12 +909,12 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
set_dst(pc, dst, e);
set_src_0(pc, src0, e);
if (src1->type == P_IMMD && !is_long(e)) {
- if (src0->neg)
+ if (src0->mod ^ src1->mod)
e->inst[0] |= 0x00008000;
set_immd(pc, src1, e);
} else {
set_src_1(pc, src1, e);
- if (src0->neg ^ src1->neg) {
+ if ((src0->mod ^ src1->mod) & NV50_MOD_NEG) {
if (is_long(e))
e->inst[1] |= 0x08000000;
else
@@ -698,13 +931,15 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
{
struct nv50_program_exec *e = exec(pc);
- e->inst[0] |= 0xb0000000;
+ e->inst[0] = 0xb0000000;
+ alloc_reg(pc, src1);
check_swap_src_0_1(pc, &src0, &src1);
- if (!pc->allow32 || src0->neg || src1->neg) {
+ if (!pc->allow32 || (src0->mod | src1->mod) || src1->hw > 63) {
set_long(pc, e);
- e->inst[1] |= (src0->neg << 26) | (src1->neg << 27);
+ e->inst[1] |= ((src0->mod & NV50_MOD_NEG) << 26) |
+ ((src1->mod & NV50_MOD_NEG) << 27);
}
set_dst(pc, dst, e);
@@ -721,20 +956,48 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
}
static void
+emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
+ uint8_t s)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ set_long(pc, e);
+ e->inst[1] |= 0xc0000000;
+
+ e->inst[0] |= dst->hw << 2;
+ e->inst[0] |= s << 16; /* shift left */
+ set_src_0_restricted(pc, src, e);
+
+ emit(pc, e);
+}
+
+#define NV50_MAX_F32 0x880
+#define NV50_MAX_S32 0x08c
+#define NV50_MAX_U32 0x084
+#define NV50_MIN_F32 0x8a0
+#define NV50_MIN_S32 0x0ac
+#define NV50_MIN_U32 0x0a4
+
+static void
emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
struct nv50_reg *src0, struct nv50_reg *src1)
{
struct nv50_program_exec *e = exec(pc);
set_long(pc, e);
- e->inst[0] |= 0xb0000000;
- e->inst[1] |= (sub << 29);
+ e->inst[0] |= 0x30000000 | ((sub & 0x800) << 20);
+ e->inst[1] |= (sub << 24);
check_swap_src_0_1(pc, &src0, &src1);
set_dst(pc, dst, e);
set_src_0(pc, src0, e);
set_src_1(pc, src1, e);
+ if (src0->mod & NV50_MOD_ABS)
+ e->inst[1] |= 0x00100000;
+ if (src1->mod & NV50_MOD_ABS)
+ e->inst[1] |= 0x00080000;
+
emit(pc, e);
}
@@ -742,9 +1005,76 @@ static INLINE void
emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
struct nv50_reg *src1)
{
- src1->neg ^= 1;
+ src1->mod ^= NV50_MOD_NEG;
emit_add(pc, dst, src0, src1);
- src1->neg ^= 1;
+ src1->mod ^= NV50_MOD_NEG;
+}
+
+static void
+emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
+ struct nv50_reg *src1, unsigned op)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0xd0000000;
+ set_long(pc, e);
+
+ check_swap_src_0_1(pc, &src0, &src1);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+
+ if (op != TGSI_OPCODE_AND && op != TGSI_OPCODE_OR &&
+ op != TGSI_OPCODE_XOR)
+ assert(!"invalid bit op");
+
+ assert(!(src0->mod | src1->mod));
+
+ if (src1->type == P_IMMD && src0->type == P_TEMP && pc->allow32) {
+ set_immd(pc, src1, e);
+ if (op == TGSI_OPCODE_OR)
+ e->inst[0] |= 0x0100;
+ else
+ if (op == TGSI_OPCODE_XOR)
+ e->inst[0] |= 0x8000;
+ } else {
+ set_src_1(pc, src1, e);
+ e->inst[1] |= 0x04000000; /* 32 bit */
+ if (op == TGSI_OPCODE_OR)
+ e->inst[1] |= 0x4000;
+ else
+ if (op == TGSI_OPCODE_XOR)
+ e->inst[1] |= 0x8000;
+ }
+
+ emit(pc, e);
+}
+
+static void
+emit_shift(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src0, struct nv50_reg *src1, unsigned dir)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0x30000000;
+ e->inst[1] = 0xc4000000;
+
+ set_long(pc, e);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+
+ if (src1->type == P_IMMD) {
+ e->inst[1] |= (1 << 20);
+ e->inst[0] |= (pc->immd_buf[src1->hw] & 0x7f) << 16;
+ } else
+ set_src_1(pc, src1, e);
+
+ if (dir != TGSI_OPCODE_SHL)
+ e->inst[1] |= (1 << 29);
+
+ if (dir == TGSI_OPCODE_ISHR)
+ e->inst[1] |= (1 << 27);
+
+ emit(pc, e);
}
static void
@@ -761,9 +1091,9 @@ emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
set_src_1(pc, src1, e);
set_src_2(pc, src2, e);
- if (src0->neg ^ src1->neg)
+ if ((src0->mod ^ src1->mod) & NV50_MOD_NEG)
e->inst[1] |= 0x04000000;
- if (src2->neg)
+ if (src2->mod & NV50_MOD_NEG)
e->inst[1] |= 0x08000000;
emit(pc, e);
@@ -773,11 +1103,19 @@ static INLINE void
emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
struct nv50_reg *src1, struct nv50_reg *src2)
{
- src2->neg ^= 1;
+ src2->mod ^= NV50_MOD_NEG;
emit_mad(pc, dst, src0, src1, src2);
- src2->neg ^= 1;
+ src2->mod ^= NV50_MOD_NEG;
}
+#define NV50_FLOP_RCP 0
+#define NV50_FLOP_RSQ 2
+#define NV50_FLOP_LG2 3
+#define NV50_FLOP_SIN 4
+#define NV50_FLOP_COS 5
+#define NV50_FLOP_EX2 6
+
+/* rcp, rsqrt, lg2 support neg and abs */
static void
emit_flop(struct nv50_pc *pc, unsigned sub,
struct nv50_reg *dst, struct nv50_reg *src)
@@ -785,17 +1123,20 @@ emit_flop(struct nv50_pc *pc, unsigned sub,
struct nv50_program_exec *e = exec(pc);
e->inst[0] |= 0x90000000;
- if (sub) {
+ if (sub || src->mod) {
set_long(pc, e);
e->inst[1] |= (sub << 29);
}
set_dst(pc, dst, e);
+ set_src_0_restricted(pc, src, e);
- if (sub == 0 || sub == 2)
- set_src_0_restricted(pc, src, e);
- else
- set_src_0(pc, src, e);
+ assert(!src->mod || sub < 4);
+
+ if (src->mod & NV50_MOD_NEG)
+ e->inst[1] |= 0x04000000;
+ if (src->mod & NV50_MOD_ABS)
+ e->inst[1] |= 0x00100000;
emit(pc, e);
}
@@ -812,6 +1153,11 @@ emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
set_long(pc, e);
e->inst[1] |= (6 << 29) | 0x00004000;
+ if (src->mod & NV50_MOD_NEG)
+ e->inst[1] |= 0x04000000;
+ if (src->mod & NV50_MOD_ABS)
+ e->inst[1] |= 0x00100000;
+
emit(pc, e);
}
@@ -827,40 +1173,49 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
set_long(pc, e);
e->inst[1] |= (6 << 29);
+ if (src->mod & NV50_MOD_NEG)
+ e->inst[1] |= 0x04000000;
+ if (src->mod & NV50_MOD_ABS)
+ e->inst[1] |= 0x00100000;
+
emit(pc, e);
}
-#define CVTOP_RN 0x01
-#define CVTOP_FLOOR 0x03
-#define CVTOP_CEIL 0x05
-#define CVTOP_TRUNC 0x07
-#define CVTOP_SAT 0x08
-#define CVTOP_ABS 0x10
-
-/* 0x04 == 32 bit */
-/* 0x40 == dst is float */
-/* 0x80 == src is float */
-#define CVT_F32_F32 0xc4
-#define CVT_F32_S32 0x44
-#define CVT_F32_U32 0x64
-#define CVT_S32_F32 0x8c
-#define CVT_S32_S32 0x0c
-#define CVT_NEG 0x20
-#define CVT_RI 0x08
+#define CVT_RN (0x00 << 16)
+#define CVT_FLOOR (0x02 << 16)
+#define CVT_CEIL (0x04 << 16)
+#define CVT_TRUNC (0x06 << 16)
+#define CVT_SAT (0x08 << 16)
+#define CVT_ABS (0x10 << 16)
+
+#define CVT_X32_X32 0x04004000
+#define CVT_X32_S32 0x04014000
+#define CVT_F32_F32 ((0xc0 << 24) | CVT_X32_X32)
+#define CVT_S32_F32 ((0x88 << 24) | CVT_X32_X32)
+#define CVT_U32_F32 ((0x80 << 24) | CVT_X32_X32)
+#define CVT_F32_S32 ((0x40 << 24) | CVT_X32_S32)
+#define CVT_F32_U32 ((0x40 << 24) | CVT_X32_X32)
+#define CVT_S32_S32 ((0x08 << 24) | CVT_X32_S32)
+#define CVT_S32_U32 ((0x08 << 24) | CVT_X32_X32)
+#define CVT_U32_S32 ((0x00 << 24) | CVT_X32_S32)
+
+#define CVT_NEG 0x20000000
+#define CVT_RI 0x08000000
static void
emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
- int wp, unsigned cvn, unsigned fmt)
+ int wp, uint32_t cvn)
{
struct nv50_program_exec *e;
e = exec(pc);
- set_long(pc, e);
- e->inst[0] |= 0xa0000000;
- e->inst[1] |= 0x00004000;
- e->inst[1] |= (cvn << 16);
- e->inst[1] |= (fmt << 24);
+ if (src->mod & NV50_MOD_NEG) cvn |= CVT_NEG;
+ if (src->mod & NV50_MOD_ABS) cvn |= CVT_ABS;
+
+ e->inst[0] = 0xa0000000;
+ e->inst[1] = cvn;
+ set_long(pc, e);
set_src_0(pc, src, e);
if (wp >= 0)
@@ -885,10 +1240,12 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
* 0x6 = GE
* 0x7 = set condition code ? (used before bra.lt/le/gt/ge)
* 0x8 = unordered bit (allows NaN)
+ *
+ * mode = 0x04 (u32), 0x0c (s32), 0x80 (f32)
*/
static void
emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp,
- struct nv50_reg *src0, struct nv50_reg *src1)
+ struct nv50_reg *src0, struct nv50_reg *src1, uint8_t mode)
{
static const unsigned cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
@@ -903,16 +1260,10 @@ emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp,
if (dst && dst->type != P_TEMP)
dst = alloc_temp(pc, NULL);
- /* set.u32 */
set_long(pc, e);
- e->inst[0] |= 0xb0000000;
+ e->inst[0] |= 0x30000000 | (mode << 24);
e->inst[1] |= 0x60000000 | (ccode << 14);
- /* XXX: decuda will disasm as .u16 and use .lo/.hi regs, but
- * that doesn't seem to match what the hw actually does
- e->inst[1] |= 0x04000000; << breaks things, u32 by default ?
- */
-
if (wp >= 0)
set_pred_wr(pc, 1, wp, e);
if (dst)
@@ -926,35 +1277,108 @@ emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp,
set_src_1(pc, src1, e);
emit(pc, e);
- pc->if_cond = pc->p->exec_tail; /* record for OPCODE_IF */
- /* cvt.f32.u32/s32 (?) if we didn't only write the predicate */
- if (rdst)
- emit_cvt(pc, rdst, dst, -1, CVTOP_ABS | CVTOP_RN, CVT_F32_S32);
+ if (rdst && mode == 0x80) /* convert to float ? */
+ emit_cvt(pc, rdst, dst, -1, CVT_ABS | CVT_F32_S32);
if (rdst && rdst != dst)
free_temp(pc, dst);
}
-static INLINE unsigned
-map_tgsi_setop_cc(unsigned op)
+static INLINE void
+map_tgsi_setop_hw(unsigned op, uint8_t *cc, uint8_t *ty)
{
switch (op) {
- case TGSI_OPCODE_SLT: return 0x1;
- case TGSI_OPCODE_SGE: return 0x6;
- case TGSI_OPCODE_SEQ: return 0x2;
- case TGSI_OPCODE_SGT: return 0x4;
- case TGSI_OPCODE_SLE: return 0x3;
- case TGSI_OPCODE_SNE: return 0xd;
+ case TGSI_OPCODE_SLT: *cc = 0x1; *ty = 0x80; break;
+ case TGSI_OPCODE_SGE: *cc = 0x6; *ty = 0x80; break;
+ case TGSI_OPCODE_SEQ: *cc = 0x2; *ty = 0x80; break;
+ case TGSI_OPCODE_SGT: *cc = 0x4; *ty = 0x80; break;
+ case TGSI_OPCODE_SLE: *cc = 0x3; *ty = 0x80; break;
+ case TGSI_OPCODE_SNE: *cc = 0xd; *ty = 0x80; break;
+
+ case TGSI_OPCODE_ISLT: *cc = 0x1; *ty = 0x0c; break;
+ case TGSI_OPCODE_ISGE: *cc = 0x6; *ty = 0x0c; break;
+ case TGSI_OPCODE_USEQ: *cc = 0x2; *ty = 0x04; break;
+ case TGSI_OPCODE_USGE: *cc = 0x6; *ty = 0x04; break;
+ case TGSI_OPCODE_USLT: *cc = 0x1; *ty = 0x04; break;
+ case TGSI_OPCODE_USNE: *cc = 0x5; *ty = 0x04; break;
default:
assert(0);
- return 0;
+ return;
}
}
+static void
+emit_add_b32(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src0, struct nv50_reg *rsrc1)
+{
+ struct nv50_program_exec *e = exec(pc);
+ struct nv50_reg *src1;
+
+ e->inst[0] = 0x20000000;
+
+ alloc_reg(pc, rsrc1);
+ check_swap_src_0_1(pc, &src0, &rsrc1);
+
+ src1 = rsrc1;
+ if (src0->mod & rsrc1->mod & NV50_MOD_NEG) {
+ src1 = alloc_temp(pc, NULL);
+ emit_cvt(pc, src1, rsrc1, -1, CVT_S32_S32);
+ }
+
+ if (!pc->allow32 || src1->hw > 63 ||
+ (src1->type != P_TEMP && src1->type != P_IMMD))
+ set_long(pc, e);
+
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+
+ if (is_long(e)) {
+ e->inst[1] |= 1 << 26;
+ set_src_2(pc, src1, e);
+ } else {
+ e->inst[0] |= 0x8000;
+ if (src1->type == P_IMMD)
+ set_immd(pc, src1, e);
+ else
+ set_src_1(pc, src1, e);
+ }
+
+ if (src0->mod & NV50_MOD_NEG)
+ e->inst[0] |= 1 << 28;
+ else
+ if (src1->mod & NV50_MOD_NEG)
+ e->inst[0] |= 1 << 22;
+
+ emit(pc, e);
+
+ if (src1 != rsrc1)
+ free_temp(pc, src1);
+}
+
+static void
+emit_sad(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0x50000000;
+ set_dst(pc, dst, e);
+ set_src_0(pc, src0, e);
+ set_src_1(pc, src1, e);
+ alloc_reg(pc, src2);
+ if (is_long(e) || (src2->type != dst->type) || (src2->hw != dst->hw))
+ set_src_2(pc, src2, e);
+
+ if (is_long(e))
+ e->inst[1] |= 0x0c << 24;
+ else
+ e->inst[0] |= 0x81 << 8;
+}
+
static INLINE void
emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
{
- emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32 | CVT_RI);
+ emit_cvt(pc, dst, src, -1, CVT_FLOOR | CVT_F32_F32 | CVT_RI);
}
static void
@@ -963,24 +1387,18 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
{
struct nv50_reg *temp = alloc_temp(pc, NULL);
- emit_flop(pc, 3, temp, v);
+ emit_flop(pc, NV50_FLOP_LG2, temp, v);
emit_mul(pc, temp, temp, e);
emit_preex2(pc, temp, temp);
- emit_flop(pc, 6, dst, temp);
+ emit_flop(pc, NV50_FLOP_EX2, dst, temp);
free_temp(pc, temp);
}
static INLINE void
-emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
- emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32);
-}
-
-static INLINE void
emit_sat(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
{
- emit_cvt(pc, dst, src, -1, CVTOP_SAT, CVT_F32_F32);
+ emit_cvt(pc, dst, src, -1, CVT_SAT | CVT_F32_F32);
}
static void
@@ -998,18 +1416,18 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
if (mask & (3 << 1)) {
tmp[0] = alloc_temp(pc, NULL);
- emit_minmax(pc, 4, tmp[0], src[0], zero);
+ emit_minmax(pc, NV50_MAX_F32, tmp[0], src[0], zero);
}
if (mask & (1 << 2)) {
set_pred_wr(pc, 1, 0, pc->p->exec_tail);
tmp[1] = temp_temp(pc);
- emit_minmax(pc, 4, tmp[1], src[1], zero);
+ emit_minmax(pc, NV50_MAX_F32, tmp[1], src[1], zero);
tmp[3] = temp_temp(pc);
- emit_minmax(pc, 4, tmp[3], src[3], neg128);
- emit_minmax(pc, 5, tmp[3], tmp[3], pos128);
+ emit_minmax(pc, NV50_MAX_F32, tmp[3], src[3], neg128);
+ emit_minmax(pc, NV50_MIN_F32, tmp[3], tmp[3], pos128);
emit_pow(pc, dst[2], tmp[1], tmp[3]);
emit_mov(pc, dst[2], zero);
@@ -1038,132 +1456,356 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
}
static void
-emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e;
+ const int r_pred = 1;
+
+ e = exec(pc);
+ e->inst[0] = 0x00000002; /* discard */
+ set_long(pc, e); /* sets cond code to ALWAYS */
+
+ if (src) {
+ set_pred(pc, 0x1 /* cc = LT */, r_pred, e);
+ /* write to predicate reg */
+ emit_cvt(pc, NULL, src, r_pred, CVT_F32_F32);
+ }
+
+ emit(pc, e);
+}
+
+static struct nv50_program_exec *
+emit_control_flow(struct nv50_pc *pc, unsigned op, int pred, unsigned cc)
{
struct nv50_program_exec *e = exec(pc);
+ e->inst[0] = (op << 28) | 2;
set_long(pc, e);
- e->inst[0] |= 0xa0000000; /* delta */
- e->inst[1] |= (7 << 29); /* delta */
- e->inst[1] |= 0x04000000; /* negate arg0? probably not */
- e->inst[1] |= (1 << 14); /* src .f32 */
- set_dst(pc, dst, e);
- set_src_0(pc, src, e);
+ if (pred >= 0)
+ set_pred(pc, cc, pred, e);
emit(pc, e);
+ return e;
+}
+
+static INLINE struct nv50_program_exec *
+emit_breakaddr(struct nv50_pc *pc)
+{
+ return emit_control_flow(pc, 0x4, -1, 0);
+}
+
+static INLINE void
+emit_break(struct nv50_pc *pc, int pred, unsigned cc)
+{
+ emit_control_flow(pc, 0x5, pred, cc);
+}
+
+static INLINE struct nv50_program_exec *
+emit_joinat(struct nv50_pc *pc)
+{
+ return emit_control_flow(pc, 0xa, -1, 0);
}
+static INLINE struct nv50_program_exec *
+emit_branch(struct nv50_pc *pc, int pred, unsigned cc)
+{
+ return emit_control_flow(pc, 0x1, pred, cc);
+}
+
+static INLINE struct nv50_program_exec *
+emit_call(struct nv50_pc *pc, int pred, unsigned cc)
+{
+ return emit_control_flow(pc, 0x2, pred, cc);
+}
+
+static INLINE void
+emit_ret(struct nv50_pc *pc, int pred, unsigned cc)
+{
+ emit_control_flow(pc, 0x3, pred, cc);
+}
+
+#define QOP_ADD 0
+#define QOP_SUBR 1
+#define QOP_SUB 2
+#define QOP_MOV_SRC1 3
+
+/* For a quad of threads / top left, top right, bottom left, bottom right
+ * pixels, do a different operation, and take src0 from a specific thread.
+ */
static void
-emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
+emit_quadop(struct nv50_pc *pc, struct nv50_reg *dst, int wp, int lane_src0,
+ struct nv50_reg *src0, struct nv50_reg *src1, ubyte qop)
{
- struct nv50_program_exec *e;
- const int r_pred = 1;
+ struct nv50_program_exec *e = exec(pc);
- /* Sets predicate reg ? */
- e = exec(pc);
- e->inst[0] = 0xa00001fd;
- e->inst[1] = 0xc4014788;
- set_src_0(pc, src, e);
- set_pred_wr(pc, 1, r_pred, e);
- if (src->neg)
- e->inst[1] |= 0x20000000;
- emit(pc, e);
+ e->inst[0] = 0xc0000000;
+ e->inst[1] = 0x80000000;
+ set_long(pc, e);
+ e->inst[0] |= lane_src0 << 16;
+ set_src_0(pc, src0, e);
+ set_src_2(pc, src1, e);
- /* This is probably KILP */
- e = exec(pc);
- e->inst[0] = 0x000001fe;
- set_long(pc, e);
- set_pred(pc, 1 /* LT? */, r_pred, e);
- emit(pc, e);
+ if (wp >= 0)
+ set_pred_wr(pc, 1, wp, e);
+
+ if (dst)
+ set_dst(pc, dst, e);
+ else {
+ e->inst[0] |= 0x000001fc;
+ e->inst[1] |= 0x00000008;
+ }
+
+ e->inst[0] |= (qop & 3) << 20;
+ e->inst[1] |= (qop >> 2) << 22;
+
+ emit(pc, e);
}
static void
-emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
- struct nv50_reg **src, unsigned unit, unsigned type, boolean proj)
+load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
+ struct nv50_reg **src, unsigned arg, boolean proj)
{
- struct nv50_reg *temp, *t[4];
- struct nv50_program_exec *e;
+ int mod[3] = { src[0]->mod, src[1]->mod, src[2]->mod };
+
+ src[0]->mod |= NV50_MOD_ABS;
+ src[1]->mod |= NV50_MOD_ABS;
+ src[2]->mod |= NV50_MOD_ABS;
+
+ emit_minmax(pc, NV50_MAX_F32, t[2], src[0], src[1]);
+ emit_minmax(pc, NV50_MAX_F32, t[2], src[2], t[2]);
+
+ src[0]->mod = mod[0];
+ src[1]->mod = mod[1];
+ src[2]->mod = mod[2];
+
+ if (proj && 0 /* looks more correct without this */)
+ emit_mul(pc, t[2], t[2], src[3]);
+ else
+ if (arg == 4) /* there is no textureProj(samplerCubeShadow) */
+ emit_mov(pc, t[3], src[3]);
+
+ emit_flop(pc, NV50_FLOP_RCP, t[2], t[2]);
+
+ emit_mul(pc, t[0], src[0], t[2]);
+ emit_mul(pc, t[1], src[1], t[2]);
+ emit_mul(pc, t[2], src[2], t[2]);
+}
+
+static void
+load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
+ struct nv50_reg **src, unsigned dim, unsigned arg)
+{
+ unsigned c, mode;
+
+ if (src[0]->type == P_TEMP && src[0]->rhw != -1) {
+ mode = pc->interp_mode[src[0]->index] | INTERP_PERSPECTIVE;
+
+ t[3]->rhw = src[3]->rhw;
+ emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID));
+ emit_flop(pc, NV50_FLOP_RCP, t[3], t[3]);
- unsigned c, mode, dim;
+ for (c = 0; c < dim; ++c) {
+ t[c]->rhw = src[c]->rhw;
+ emit_interp(pc, t[c], t[3], mode);
+ }
+ if (arg != dim) { /* depth reference value */
+ t[dim]->rhw = src[2]->rhw;
+ emit_interp(pc, t[dim], t[3], mode);
+ }
+ } else {
+ /* XXX: for some reason the blob sometimes uses MAD
+ * (mad f32 $rX $rY $rZ neg $r63)
+ */
+ emit_flop(pc, NV50_FLOP_RCP, t[3], src[3]);
+ for (c = 0; c < dim; ++c)
+ emit_mul(pc, t[c], src[c], t[3]);
+ if (arg != dim) /* depth reference value */
+ emit_mul(pc, t[dim], src[2], t[3]);
+ }
+}
+static INLINE void
+get_tex_dim(unsigned type, unsigned *dim, unsigned *arg)
+{
switch (type) {
case TGSI_TEXTURE_1D:
- dim = 1;
+ *arg = *dim = 1;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ *dim = 1;
+ *arg = 2;
break;
case TGSI_TEXTURE_UNKNOWN:
case TGSI_TEXTURE_2D:
- case TGSI_TEXTURE_SHADOW1D: /* XXX: x, z */
case TGSI_TEXTURE_RECT:
- dim = 2;
+ *arg = *dim = 2;
+ break;
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ *dim = 2;
+ *arg = 3;
break;
case TGSI_TEXTURE_3D:
case TGSI_TEXTURE_CUBE:
- case TGSI_TEXTURE_SHADOW2D:
- case TGSI_TEXTURE_SHADOWRECT: /* XXX */
- dim = 3;
+ *dim = *arg = 3;
break;
default:
assert(0);
break;
}
+}
- /* some cards need t[0]'s hw index to be a multiple of 4 */
- alloc_temp4(pc, t, 0);
+/* We shouldn't execute TEXLOD if any of the pixels in a quad have
+ * different LOD values, so branch off groups of equal LOD.
+ */
+static void
+emit_texlod_sequence(struct nv50_pc *pc, struct nv50_reg *tlod,
+ struct nv50_reg *src, struct nv50_program_exec *tex)
+{
+ struct nv50_program_exec *join_at;
+ unsigned i, target = pc->p->exec_size + 9 * 2;
- if (proj) {
- if (src[0]->type == P_TEMP && src[0]->rhw != -1) {
- mode = pc->interp_mode[src[0]->index];
+ if (pc->p->type != PIPE_SHADER_FRAGMENT) {
+ emit(pc, tex);
+ return;
+ }
+ pc->allow32 = FALSE;
- t[3]->rhw = src[3]->rhw;
- emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID));
- emit_flop(pc, 0, t[3], t[3]);
+ /* Subtract lod of each pixel from lod of top left pixel, jump
+ * texlod insn if result is 0, then repeat for 2 other pixels.
+ */
+ join_at = emit_joinat(pc);
+ emit_quadop(pc, NULL, 0, 0, tlod, tlod, 0x55);
+ emit_branch(pc, 0, 2)->param.index = target;
- for (c = 0; c < dim; c++) {
- t[c]->rhw = src[c]->rhw;
- emit_interp(pc, t[c], t[3],
- (mode | INTERP_PERSPECTIVE));
- }
- } else {
- emit_flop(pc, 0, t[3], src[3]);
- for (c = 0; c < dim; c++)
- emit_mul(pc, t[c], src[c], t[3]);
+ for (i = 1; i < 4; ++i) {
+ emit_quadop(pc, NULL, 0, i, tlod, tlod, 0x55);
+ emit_branch(pc, 0, 2)->param.index = target;
+ }
- /* XXX: for some reason the blob sometimes uses MAD:
- * emit_mad(pc, t[c], src[0][c], t[3], t[3])
- * pc->p->exec_tail->inst[1] |= 0x080fc000;
- */
- }
- } else {
- if (type == TGSI_TEXTURE_CUBE) {
- temp = temp_temp(pc);
- emit_minmax(pc, 4, temp, src[0], src[1]);
- emit_minmax(pc, 4, temp, temp, src[2]);
- emit_flop(pc, 0, temp, temp);
- for (c = 0; c < 3; c++)
- emit_mul(pc, t[c], src[c], temp);
- } else {
- for (c = 0; c < dim; c++)
- emit_mov(pc, t[c], src[c]);
+ emit_mov(pc, tlod, src); /* target */
+ emit(pc, tex); /* texlod */
+
+ join_at->param.index = target + 2 * 2;
+ JOIN_ON(emit_nop(pc)); /* join _after_ tex */
+}
+
+static void
+emit_texbias_sequence(struct nv50_pc *pc, struct nv50_reg *t[4], unsigned arg,
+ struct nv50_program_exec *tex)
+{
+ struct nv50_program_exec *e;
+ struct nv50_reg imm_1248, *t123[4][4], *r_bits = alloc_temp(pc, NULL);
+ int r_pred = 0;
+ unsigned n, c, i, cc[4] = { 0x0a, 0x13, 0x11, 0x10 };
+
+ pc->allow32 = FALSE;
+ ctor_reg(&imm_1248, P_IMMD, -1, ctor_immd_4u32(pc, 1, 2, 4, 8) * 4);
+
+ /* Subtract bias value of thread i from bias values of each thread,
+ * store result in r_pred, and set bit i in r_bits if result was 0.
+ */
+ assert(arg < 4);
+ for (i = 0; i < 4; ++i, ++imm_1248.hw) {
+ emit_quadop(pc, NULL, r_pred, i, t[arg], t[arg], 0x55);
+ emit_mov(pc, r_bits, &imm_1248);
+ set_pred(pc, 2, r_pred, pc->p->exec_tail);
+ }
+ emit_mov_to_pred(pc, r_pred, r_bits);
+
+ /* The lanes of a quad are now grouped by the bit in r_pred they have
+ * set. Put the input values for TEX into a new register set for each
+ * group and execute TEX only for a specific group.
+ * We cannot use the same register set for each group because we need
+ * the derivatives, which are implicitly calculated, to be correct.
+ */
+ for (i = 1; i < 4; ++i) {
+ alloc_temp4(pc, t123[i], 0);
+
+ for (c = 0; c <= arg; ++c)
+ emit_mov(pc, t123[i][c], t[c]);
+
+ *(e = exec(pc)) = *(tex);
+ e->inst[0] &= ~0x01fc;
+ set_dst(pc, t123[i][0], e);
+ set_pred(pc, cc[i], r_pred, e);
+ emit(pc, e);
+ }
+ /* finally TEX on the original regs (where we kept the input) */
+ set_pred(pc, cc[0], r_pred, tex);
+ emit(pc, tex);
+
+ /* put the 3 * n other results into regs for lane 0 */
+ n = popcnt4(((e->inst[0] >> 25) & 0x3) | ((e->inst[1] >> 12) & 0xc));
+ for (i = 1; i < 4; ++i) {
+ for (c = 0; c < n; ++c) {
+ emit_mov(pc, t[c], t123[i][c]);
+ set_pred(pc, cc[i], r_pred, pc->p->exec_tail);
}
+ free_temp4(pc, t123[i]);
}
+ emit_nop(pc);
+ free_temp(pc, r_bits);
+}
+
+static void
+emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
+ struct nv50_reg **src, unsigned unit, unsigned type,
+ boolean proj, int bias_lod)
+{
+ struct nv50_reg *t[4];
+ struct nv50_program_exec *e;
+ unsigned c, dim, arg;
+
+ /* t[i] must be within a single 128 bit super-reg */
+ alloc_temp4(pc, t, 0);
+
e = exec(pc);
+ e->inst[0] = 0xf0000000;
set_long(pc, e);
- e->inst[0] |= 0xf0000000;
- e->inst[1] |= 0x00000004;
set_dst(pc, t[0], e);
- e->inst[0] |= (unit << 9);
- if (dim == 2)
- e->inst[0] |= 0x00400000;
- else
- if (dim == 3)
- e->inst[0] |= 0x00800000;
+ /* TIC and TSC binding indices (TSC is ignored as TSC_LINKED = TRUE): */
+ e->inst[0] |= (unit << 9) /* | (unit << 17) */;
+
+ /* live flag (don't set if TEX results affect input to another TEX): */
+ /* e->inst[0] |= 0x00000004; */
+
+ get_tex_dim(type, &dim, &arg);
+
+ if (type == TGSI_TEXTURE_CUBE) {
+ e->inst[0] |= 0x08000000;
+ load_cube_tex_coords(pc, t, src, arg, proj);
+ } else
+ if (proj)
+ load_proj_tex_coords(pc, t, src, dim, arg);
+ else {
+ for (c = 0; c < dim; c++)
+ emit_mov(pc, t[c], src[c]);
+ if (arg != dim) /* depth reference value (always src.z here) */
+ emit_mov(pc, t[dim], src[2]);
+ }
e->inst[0] |= (mask & 0x3) << 25;
e->inst[1] |= (mask & 0xc) << 12;
- emit(pc, e);
+ if (!bias_lod) {
+ e->inst[0] |= (arg - 1) << 22;
+ emit(pc, e);
+ } else
+ if (bias_lod < 0) {
+ assert(pc->p->type == PIPE_SHADER_FRAGMENT);
+ e->inst[0] |= arg << 22;
+ e->inst[1] |= 0x20000000; /* texbias */
+ emit_mov(pc, t[arg], src[3]);
+ emit_texbias_sequence(pc, t, arg, e);
+ } else {
+ e->inst[0] |= arg << 22;
+ e->inst[1] |= 0x40000000; /* texlod */
+ emit_mov(pc, t[arg], src[3]);
+ emit_texlod_sequence(pc, t[arg], src[3], e);
+ }
#if 1
c = 0;
@@ -1187,34 +1829,36 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
}
static void
-emit_branch(struct nv50_pc *pc, int pred, unsigned cc,
- struct nv50_program_exec **join)
+emit_ddx(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
{
struct nv50_program_exec *e = exec(pc);
- if (join) {
- set_long(pc, e);
- e->inst[0] |= 0xa0000002;
- emit(pc, e);
- *join = e;
- e = exec(pc);
- }
+ assert(src->type == P_TEMP);
+ e->inst[0] = (src->mod & NV50_MOD_NEG) ? 0xc0240000 : 0xc0140000;
+ e->inst[1] = (src->mod & NV50_MOD_NEG) ? 0x86400000 : 0x89800000;
set_long(pc, e);
- e->inst[0] |= 0x10000002;
- if (pred >= 0)
- set_pred(pc, cc, pred, e);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+ set_src_2(pc, src, e);
+
emit(pc, e);
}
static void
-emit_nop(struct nv50_pc *pc)
+emit_ddy(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
{
struct nv50_program_exec *e = exec(pc);
- e->inst[0] = 0xf0000000;
+ assert(src->type == P_TEMP);
+
+ e->inst[0] = (src->mod & NV50_MOD_NEG) ? 0xc0250000 : 0xc0150000;
+ e->inst[1] = (src->mod & NV50_MOD_NEG) ? 0x85800000 : 0x8a400000;
set_long(pc, e);
- e->inst[1] = 0xe0000000;
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+ set_src_2(pc, src, e);
+
emit(pc, e);
}
@@ -1231,6 +1875,17 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
q = 0x0403c000;
m = 0xffff7fff;
break;
+ case 0x2:
+ case 0x3:
+ /* ADD, SUB, SUBR b32 */
+ m = ~(0x8000 | (127 << 16));
+ q = ((e->inst[0] & (~m)) >> 2) | (1 << 26);
+ break;
+ case 0x5:
+ /* SAD */
+ m = ~(0x81 << 8);
+ q = 0x0c << 24;
+ break;
case 0x8:
/* INTERP (move centroid, perspective and flat bits) */
m = ~0x03000100;
@@ -1266,22 +1921,55 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
e->inst[1] |= q;
}
-static boolean
-negate_supported(const struct tgsi_full_instruction *insn, int i)
+/* Some operations support an optional negation flag. */
+static int
+get_supported_mods(const struct tgsi_full_instruction *insn, int i)
{
switch (insn->Instruction.Opcode) {
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_DDX:
+ case TGSI_OPCODE_DDY:
case TGSI_OPCODE_DP3:
case TGSI_OPCODE_DP4:
- case TGSI_OPCODE_MUL:
+ case TGSI_OPCODE_EX2:
case TGSI_OPCODE_KIL:
- case TGSI_OPCODE_ADD:
- case TGSI_OPCODE_SUB:
+ case TGSI_OPCODE_LG2:
case TGSI_OPCODE_MAD:
- return TRUE;
+ case TGSI_OPCODE_MUL:
case TGSI_OPCODE_POW:
- return (i == 1) ? TRUE : FALSE;
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ: /* ignored, RSQ = rsqrt(abs(src.x)) */
+ case TGSI_OPCODE_SCS:
+ case TGSI_OPCODE_SIN:
+ case TGSI_OPCODE_SUB:
+ return NV50_MOD_NEG;
+ case TGSI_OPCODE_MAX:
+ case TGSI_OPCODE_MIN:
+ case TGSI_OPCODE_INEG: /* tgsi src sign toggle/set would be stupid */
+ return NV50_MOD_ABS;
+ case TGSI_OPCODE_CEIL:
+ case TGSI_OPCODE_FLR:
+ case TGSI_OPCODE_TRUNC:
+ return NV50_MOD_NEG | NV50_MOD_ABS;
+ case TGSI_OPCODE_F2I:
+ case TGSI_OPCODE_F2U:
+ case TGSI_OPCODE_I2F:
+ case TGSI_OPCODE_U2F:
+ return NV50_MOD_NEG | NV50_MOD_ABS | NV50_MOD_I32;
+ case TGSI_OPCODE_UADD:
+ return NV50_MOD_NEG | NV50_MOD_I32;
+ case TGSI_OPCODE_SAD:
+ case TGSI_OPCODE_SHL:
+ case TGSI_OPCODE_IMAX:
+ case TGSI_OPCODE_IMIN:
+ case TGSI_OPCODE_ISHR:
+ case TGSI_OPCODE_UMAX:
+ case TGSI_OPCODE_UMIN:
+ case TGSI_OPCODE_USHR:
+ return NV50_MOD_I32;
default:
- return FALSE;
+ return 0;
}
}
@@ -1289,7 +1977,7 @@ negate_supported(const struct tgsi_full_instruction *insn, int i)
static unsigned
nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
{
- unsigned x, mask = insn->FullDstRegisters[0].DstRegister.WriteMask;
+ unsigned x, mask = insn->Dst[0].Register.WriteMask;
switch (insn->Instruction.Opcode) {
case TGSI_OPCODE_COS:
@@ -1304,30 +1992,40 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
case TGSI_OPCODE_DST:
return mask & (c ? 0xa : 0x6);
case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_EXP:
case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_LOG:
case TGSI_OPCODE_POW:
case TGSI_OPCODE_RCP:
case TGSI_OPCODE_RSQ:
case TGSI_OPCODE_SCS:
return 0x1;
+ case TGSI_OPCODE_IF:
+ return 0x1;
case TGSI_OPCODE_LIT:
return 0xb;
case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXP:
{
- const struct tgsi_instruction_ext_texture *tex;
+ const struct tgsi_instruction_texture *tex;
- assert(insn->Instruction.Extended);
- tex = &insn->InstructionExtTexture;
+ assert(insn->Instruction.Texture);
+ tex = &insn->Texture;
mask = 0x7;
- if (insn->Instruction.Opcode == TGSI_OPCODE_TXP)
- mask |= 0x8;
+ if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
+ insn->Instruction.Opcode != TGSI_OPCODE_TXD)
+ mask |= 0x8; /* bias, lod or proj */
switch (tex->Texture) {
case TGSI_TEXTURE_1D:
mask &= 0x9;
break;
+ case TGSI_TEXTURE_SHADOW1D:
+ mask &= 0x5;
+ break;
case TGSI_TEXTURE_2D:
mask &= 0xb;
break;
@@ -1352,11 +2050,21 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
static struct nv50_reg *
tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
{
- switch (dst->DstRegister.File) {
+ switch (dst->Register.File) {
case TGSI_FILE_TEMPORARY:
- return &pc->temp[dst->DstRegister.Index * 4 + c];
+ return &pc->temp[dst->Register.Index * 4 + c];
case TGSI_FILE_OUTPUT:
- return &pc->result[dst->DstRegister.Index * 4 + c];
+ return &pc->result[dst->Register.Index * 4 + c];
+ case TGSI_FILE_ADDRESS:
+ {
+ struct nv50_reg *r = pc->addr[dst->Register.Index * 4 + c];
+ if (!r) {
+ r = alloc_addr(pc, NULL);
+ pc->addr[dst->Register.Index * 4 + c] = r;
+ }
+ assert(r);
+ return r;
+ }
case TGSI_FILE_NULL:
return NULL;
default:
@@ -1368,83 +2076,93 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
static struct nv50_reg *
tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
- boolean neg)
+ int mod)
{
struct nv50_reg *r = NULL;
- struct nv50_reg *temp;
- unsigned sgn, c;
+ struct nv50_reg *temp = NULL;
+ unsigned sgn, c, swz, cvn;
+
+ if (src->Register.File != TGSI_FILE_CONSTANT)
+ assert(!src->Register.Indirect);
sgn = tgsi_util_get_full_src_register_sign_mode(src, chan);
- c = tgsi_util_get_full_src_register_extswizzle(src, chan);
+ c = tgsi_util_get_full_src_register_swizzle(src, chan);
switch (c) {
- case TGSI_EXTSWIZZLE_X:
- case TGSI_EXTSWIZZLE_Y:
- case TGSI_EXTSWIZZLE_Z:
- case TGSI_EXTSWIZZLE_W:
- switch (src->SrcRegister.File) {
+ case TGSI_SWIZZLE_X:
+ case TGSI_SWIZZLE_Y:
+ case TGSI_SWIZZLE_Z:
+ case TGSI_SWIZZLE_W:
+ switch (src->Register.File) {
case TGSI_FILE_INPUT:
- r = &pc->attr[src->SrcRegister.Index * 4 + c];
+ r = &pc->attr[src->Register.Index * 4 + c];
break;
case TGSI_FILE_TEMPORARY:
- r = &pc->temp[src->SrcRegister.Index * 4 + c];
+ r = &pc->temp[src->Register.Index * 4 + c];
break;
case TGSI_FILE_CONSTANT:
- r = &pc->param[src->SrcRegister.Index * 4 + c];
+ if (!src->Register.Indirect) {
+ r = &pc->param[src->Register.Index * 4 + c];
+ break;
+ }
+ /* Indicate indirection by setting r->acc < 0 and
+ * use the index field to select the address reg.
+ */
+ r = reg_instance(pc, NULL);
+ swz = tgsi_util_get_src_register_swizzle(
+ &src->Indirect, 0);
+ ctor_reg(r, P_CONST,
+ src->Indirect.Index * 4 + swz,
+ src->Register.Index * 4 + c);
+ r->acc = -1;
break;
case TGSI_FILE_IMMEDIATE:
- r = &pc->immd[src->SrcRegister.Index * 4 + c];
+ r = &pc->immd[src->Register.Index * 4 + c];
break;
case TGSI_FILE_SAMPLER:
+ return NULL;
+ case TGSI_FILE_ADDRESS:
+ r = pc->addr[src->Register.Index * 4 + c];
+ assert(r);
break;
default:
assert(0);
break;
}
break;
- case TGSI_EXTSWIZZLE_ZERO:
- r = alloc_immd(pc, 0.0);
- return r;
- case TGSI_EXTSWIZZLE_ONE:
- if (sgn == TGSI_UTIL_SIGN_TOGGLE || sgn == TGSI_UTIL_SIGN_SET)
- return alloc_immd(pc, -1.0);
- return alloc_immd(pc, 1.0);
default:
assert(0);
break;
}
+ cvn = (mod & NV50_MOD_I32) ? CVT_S32_S32 : CVT_F32_F32;
+
switch (sgn) {
- case TGSI_UTIL_SIGN_KEEP:
- break;
case TGSI_UTIL_SIGN_CLEAR:
- temp = temp_temp(pc);
- emit_abs(pc, temp, r);
- r = temp;
- break;
- case TGSI_UTIL_SIGN_TOGGLE:
- if (neg)
- r->neg = 1;
- else {
- temp = temp_temp(pc);
- emit_neg(pc, temp, r);
- r = temp;
- }
+ r->mod = NV50_MOD_ABS;
break;
case TGSI_UTIL_SIGN_SET:
- temp = temp_temp(pc);
- emit_abs(pc, temp, r);
- if (neg)
- temp->neg = 1;
- else
- emit_neg(pc, temp, temp);
- r = temp;
+ r->mod = NV50_MOD_NEG_ABS;
+ break;
+ case TGSI_UTIL_SIGN_TOGGLE:
+ r->mod = NV50_MOD_NEG;
break;
default:
- assert(0);
+ assert(!r->mod && sgn == TGSI_UTIL_SIGN_KEEP);
break;
}
+ if ((r->mod & mod) != r->mod) {
+ temp = temp_temp(pc);
+ emit_cvt(pc, temp, r, -1, cvn);
+ r->mod = 0;
+ r = temp;
+ } else
+ r->mod |= mod & NV50_MOD_I32;
+
+ assert(r);
+ if (r->acc >= 0 && r != temp)
+ return reg_instance(pc, r); /* will clear r->mod */
return r;
}
@@ -1497,9 +2215,13 @@ nv50_tgsi_dst_revdep(unsigned op, int s, int c)
assert(0);
return 0x0;
}
+ case TGSI_OPCODE_EXP:
+ case TGSI_OPCODE_LOG:
case TGSI_OPCODE_LIT:
case TGSI_OPCODE_SCS:
case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXP:
/* these take care of dangerous swizzles themselves */
return 0x0;
@@ -1535,34 +2257,51 @@ nv50_kill_branch(struct nv50_pc *pc)
if (pc->if_insn[lvl]->next != pc->p->exec_tail)
return FALSE;
+ if (is_immd(pc->p->exec_tail))
+ return FALSE;
/* if ccode == 'true', the BRA is from an ELSE and the predicate
* reg may no longer be valid, since we currently always use $p0
*/
if (has_pred(pc->if_insn[lvl], 0xf))
return FALSE;
- assert(pc->if_insn[lvl] && pc->br_join[lvl]);
+ assert(pc->if_insn[lvl] && pc->if_join[lvl]);
- /* We'll use the exec allocated for JOIN_AT (as we can't easily
- * update prev's next); if exec_tail is BRK, update the pointer.
+ /* We'll use the exec allocated for JOIN_AT (we can't easily
+ * access nv50_program_exec's prev).
*/
- if (pc->loop_lvl && pc->br_loop[pc->loop_lvl - 1] == pc->p->exec_tail)
- pc->br_loop[pc->loop_lvl - 1] = pc->br_join[lvl];
-
pc->p->exec_size -= 4; /* remove JOIN_AT and BRA */
- *pc->br_join[lvl] = *pc->p->exec_tail;
+ *pc->if_join[lvl] = *pc->p->exec_tail;
FREE(pc->if_insn[lvl]);
FREE(pc->p->exec_tail);
- pc->p->exec_tail = pc->br_join[lvl];
+ pc->p->exec_tail = pc->if_join[lvl];
pc->p->exec_tail->next = NULL;
set_pred(pc, 0xd, 0, pc->p->exec_tail);
return TRUE;
}
+static void
+nv50_fp_move_results(struct nv50_pc *pc)
+{
+ struct nv50_reg reg;
+ unsigned i;
+
+ ctor_reg(&reg, P_TEMP, -1, -1);
+
+ for (i = 0; i < pc->result_nr * 4; ++i) {
+ if (pc->result[i].rhw < 0 || pc->result[i].hw < 0)
+ continue;
+ if (pc->result[i].rhw != pc->result[i].hw) {
+ reg.hw = pc->result[i].rhw;
+ emit_mov(pc, &reg, &pc->result[i]);
+ }
+ }
+}
+
static boolean
nv50_program_tx_insn(struct nv50_pc *pc,
const struct tgsi_full_instruction *inst)
@@ -1571,33 +2310,33 @@ nv50_program_tx_insn(struct nv50_pc *pc,
unsigned mask, sat, unit;
int i, c;
- mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ mask = inst->Dst[0].Register.WriteMask;
sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
memset(src, 0, sizeof(src));
for (c = 0; c < 4; c++) {
if ((mask & (1 << c)) && !pc->r_dst[c])
- dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
+ dst[c] = tgsi_dst(pc, c, &inst->Dst[0]);
else
dst[c] = pc->r_dst[c];
rdst[c] = dst[c];
}
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i];
+ const struct tgsi_full_src_register *fs = &inst->Src[i];
unsigned src_mask;
- boolean neg_supp;
+ int mod_supp;
src_mask = nv50_tgsi_src_mask(inst, i);
- neg_supp = negate_supported(inst, i);
+ mod_supp = get_supported_mods(inst, i);
- if (fs->SrcRegister.File == TGSI_FILE_SAMPLER)
- unit = fs->SrcRegister.Index;
+ if (fs->Register.File == TGSI_FILE_SAMPLER)
+ unit = fs->Register.Index;
for (c = 0; c < 4; c++)
if (src_mask & (1 << c))
- src[i][c] = tgsi_src(pc, c, fs, neg_supp);
+ src[i][c] = tgsi_src(pc, c, fs, mod_supp);
}
brdc = temp = pc->r_brdc;
@@ -1610,7 +2349,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)) || dst[c]->type == P_TEMP)
continue;
- rdst[c] = dst[c];
+ /* rdst[c] = dst[c]; */ /* done above */
dst[c] = temp_temp(pc);
}
}
@@ -1622,7 +2361,8 @@ nv50_program_tx_insn(struct nv50_pc *pc,
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
- emit_abs(pc, dst[c], src[0][c]);
+ emit_cvt(pc, dst[c], src[0][c], -1,
+ CVT_ABS | CVT_F32_F32);
}
break;
case TGSI_OPCODE_ADD:
@@ -1632,33 +2372,93 @@ nv50_program_tx_insn(struct nv50_pc *pc,
emit_add(pc, dst[c], src[0][c], src[1][c]);
}
break;
+ case TGSI_OPCODE_AND:
+ case TGSI_OPCODE_XOR:
+ case TGSI_OPCODE_OR:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_bitop2(pc, dst[c], src[0][c], src[1][c],
+ inst->Instruction.Opcode);
+ }
+ break;
+ case TGSI_OPCODE_ARL:
+ assert(src[0][0]);
+ temp = temp_temp(pc);
+ emit_cvt(pc, temp, src[0][0], -1, CVT_FLOOR | CVT_S32_F32);
+ emit_arl(pc, dst[0], temp, 4);
+ break;
case TGSI_OPCODE_BGNLOOP:
+ pc->loop_brka[pc->loop_lvl] = emit_breakaddr(pc);
pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size;
+ terminate_mbb(pc);
+ break;
+ case TGSI_OPCODE_BGNSUB:
+ assert(!pc->in_subroutine);
+ pc->in_subroutine = TRUE;
+ /* probably not necessary, but align to 8 byte boundary */
+ if (!is_long(pc->p->exec_tail))
+ convert_to_long(pc, pc->p->exec_tail);
break;
case TGSI_OPCODE_BRK:
- emit_branch(pc, -1, 0, NULL);
assert(pc->loop_lvl > 0);
- pc->br_loop[pc->loop_lvl - 1] = pc->p->exec_tail;
+ emit_break(pc, -1, 0);
+ break;
+ case TGSI_OPCODE_CAL:
+ assert(inst->Label.Label < pc->insn_nr);
+ emit_call(pc, -1, 0)->param.index = inst->Label.Label;
+ /* replaced by actual offset in nv50_program_fixup_insns */
break;
case TGSI_OPCODE_CEIL:
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
emit_cvt(pc, dst[c], src[0][c], -1,
- CVTOP_CEIL, CVT_F32_F32 | CVT_RI);
+ CVT_CEIL | CVT_F32_F32 | CVT_RI);
}
break;
+ case TGSI_OPCODE_CMP:
+ pc->allow32 = FALSE;
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_cvt(pc, NULL, src[0][c], 1, CVT_F32_F32);
+ emit_mov(pc, dst[c], src[1][c]);
+ set_pred(pc, 0x1, 1, pc->p->exec_tail); /* @SF */
+ emit_mov(pc, dst[c], src[2][c]);
+ set_pred(pc, 0x6, 1, pc->p->exec_tail); /* @NSF */
+ }
+ break;
+ case TGSI_OPCODE_CONT:
+ assert(pc->loop_lvl > 0);
+ emit_branch(pc, -1, 0)->param.index =
+ pc->loop_pos[pc->loop_lvl - 1];
+ break;
case TGSI_OPCODE_COS:
if (mask & 8) {
emit_precossin(pc, temp, src[0][3]);
- emit_flop(pc, 5, dst[3], temp);
+ emit_flop(pc, NV50_FLOP_COS, dst[3], temp);
if (!(mask &= 7))
break;
if (temp == dst[3])
temp = brdc = temp_temp(pc);
}
emit_precossin(pc, temp, src[0][0]);
- emit_flop(pc, 5, brdc, temp);
+ emit_flop(pc, NV50_FLOP_COS, brdc, temp);
+ break;
+ case TGSI_OPCODE_DDX:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_ddx(pc, dst[c], src[0][c]);
+ }
+ break;
+ case TGSI_OPCODE_DDY:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_ddy(pc, dst[c], src[0][c]);
+ }
break;
case TGSI_OPCODE_DP3:
emit_mul(pc, temp, src[0][0], src[1][0]);
@@ -1688,9 +2488,10 @@ nv50_program_tx_insn(struct nv50_pc *pc,
emit_mov_immdval(pc, dst[0], 1.0f);
break;
case TGSI_OPCODE_ELSE:
- emit_branch(pc, -1, 0, NULL);
+ emit_branch(pc, -1, 0);
pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size;
pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
+ terminate_mbb(pc);
break;
case TGSI_OPCODE_ENDIF:
pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size;
@@ -1699,24 +2500,72 @@ nv50_program_tx_insn(struct nv50_pc *pc,
if (nv50_kill_branch(pc) == TRUE)
break;
- if (pc->br_join[pc->if_lvl]) {
- pc->br_join[pc->if_lvl]->param.index = pc->p->exec_size;
- pc->br_join[pc->if_lvl] = NULL;
+ if (pc->if_join[pc->if_lvl]) {
+ pc->if_join[pc->if_lvl]->param.index = pc->p->exec_size;
+ pc->if_join[pc->if_lvl] = NULL;
}
+ terminate_mbb(pc);
/* emit a NOP as join point, we could set it on the next
* one, but would have to make sure it is long and !immd
*/
- emit_nop(pc);
- pc->p->exec_tail->inst[1] |= 2;
+ JOIN_ON(emit_nop(pc));
break;
case TGSI_OPCODE_ENDLOOP:
- emit_branch(pc, -1, 0, NULL);
- pc->p->exec_tail->param.index = pc->loop_pos[--pc->loop_lvl];
- pc->br_loop[pc->loop_lvl]->param.index = pc->p->exec_size;
+ emit_branch(pc, -1, 0)->param.index =
+ pc->loop_pos[--pc->loop_lvl];
+ pc->loop_brka[pc->loop_lvl]->param.index = pc->p->exec_size;
+ terminate_mbb(pc);
+ break;
+ case TGSI_OPCODE_ENDSUB:
+ assert(pc->in_subroutine);
+ pc->in_subroutine = FALSE;
break;
case TGSI_OPCODE_EX2:
emit_preex2(pc, temp, src[0][0]);
- emit_flop(pc, 6, brdc, temp);
+ emit_flop(pc, NV50_FLOP_EX2, brdc, temp);
+ break;
+ case TGSI_OPCODE_EXP:
+ {
+ struct nv50_reg *t[2];
+
+ assert(!temp);
+ t[0] = temp_temp(pc);
+ t[1] = temp_temp(pc);
+
+ if (mask & 0x6)
+ emit_mov(pc, t[0], src[0][0]);
+ if (mask & 0x3)
+ emit_flr(pc, t[1], src[0][0]);
+
+ if (mask & (1 << 1))
+ emit_sub(pc, dst[1], t[0], t[1]);
+ if (mask & (1 << 0)) {
+ emit_preex2(pc, t[1], t[1]);
+ emit_flop(pc, NV50_FLOP_EX2, dst[0], t[1]);
+ }
+ if (mask & (1 << 2)) {
+ emit_preex2(pc, t[0], t[0]);
+ emit_flop(pc, NV50_FLOP_EX2, dst[2], t[0]);
+ }
+ if (mask & (1 << 3))
+ emit_mov_immdval(pc, dst[3], 1.0f);
+ }
+ break;
+ case TGSI_OPCODE_F2I:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_cvt(pc, dst[c], src[0][c], -1,
+ CVT_TRUNC | CVT_S32_F32);
+ }
+ break;
+ case TGSI_OPCODE_F2U:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_cvt(pc, dst[c], src[0][c], -1,
+ CVT_TRUNC | CVT_U32_F32);
+ }
break;
case TGSI_OPCODE_FLR:
for (c = 0; c < 4; c++) {
@@ -1734,24 +2583,85 @@ nv50_program_tx_insn(struct nv50_pc *pc,
emit_sub(pc, dst[c], src[0][c], temp);
}
break;
+ case TGSI_OPCODE_I2F:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_cvt(pc, dst[c], src[0][c], -1, CVT_F32_S32);
+ }
+ break;
case TGSI_OPCODE_IF:
- /* emitting a join_at may not be necessary */
- assert(pc->if_lvl < MAX_IF_DEPTH);
- set_pred_wr(pc, 1, 0, pc->if_cond);
- emit_branch(pc, 0, 2, &pc->br_join[pc->if_lvl]);
- pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
+ assert(pc->if_lvl < NV50_MAX_COND_NESTING);
+ emit_cvt(pc, NULL, src[0][0], 0, CVT_ABS | CVT_F32_F32);
+ pc->if_join[pc->if_lvl] = emit_joinat(pc);
+ pc->if_insn[pc->if_lvl++] = emit_branch(pc, 0, 2);;
+ terminate_mbb(pc);
+ break;
+ case TGSI_OPCODE_IMAX:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_minmax(pc, 0x08c, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_IMIN:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_minmax(pc, 0x0ac, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_INEG:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_cvt(pc, dst[c], src[0][c], -1,
+ CVT_S32_S32 | CVT_NEG);
+ }
break;
case TGSI_OPCODE_KIL:
+ assert(src[0][0] && src[0][1] && src[0][2] && src[0][3]);
emit_kil(pc, src[0][0]);
emit_kil(pc, src[0][1]);
emit_kil(pc, src[0][2]);
emit_kil(pc, src[0][3]);
break;
+ case TGSI_OPCODE_KILP:
+ emit_kil(pc, NULL);
+ break;
case TGSI_OPCODE_LIT:
emit_lit(pc, &dst[0], mask, &src[0][0]);
break;
case TGSI_OPCODE_LG2:
- emit_flop(pc, 3, brdc, src[0][0]);
+ emit_flop(pc, NV50_FLOP_LG2, brdc, src[0][0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ {
+ struct nv50_reg *t[2];
+
+ t[0] = temp_temp(pc);
+ if (mask & (1 << 1))
+ t[1] = temp_temp(pc);
+ else
+ t[1] = t[0];
+
+ emit_cvt(pc, t[0], src[0][0], -1, CVT_ABS | CVT_F32_F32);
+ emit_flop(pc, NV50_FLOP_LG2, t[1], t[0]);
+ if (mask & (1 << 2))
+ emit_mov(pc, dst[2], t[1]);
+ emit_flr(pc, t[1], t[1]);
+ if (mask & (1 << 0))
+ emit_mov(pc, dst[0], t[1]);
+ if (mask & (1 << 1)) {
+ t[1]->mod = NV50_MOD_NEG;
+ emit_preex2(pc, t[1], t[1]);
+ t[1]->mod = 0;
+ emit_flop(pc, NV50_FLOP_EX2, t[1], t[1]);
+ emit_mul(pc, dst[1], t[0], t[1]);
+ }
+ if (mask & (1 << 3))
+ emit_mov_immdval(pc, dst[3], 1.0f);
+ }
break;
case TGSI_OPCODE_LRP:
temp = temp_temp(pc);
@@ -1773,18 +2683,17 @@ nv50_program_tx_insn(struct nv50_pc *pc,
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
- emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
+ emit_minmax(pc, 0x880, dst[c], src[0][c], src[1][c]);
}
break;
case TGSI_OPCODE_MIN:
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
- emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
+ emit_minmax(pc, 0x8a0, dst[c], src[0][c], src[1][c]);
}
break;
case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_SWZ:
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
@@ -1802,35 +2711,58 @@ nv50_program_tx_insn(struct nv50_pc *pc,
emit_pow(pc, brdc, src[0][0], src[1][0]);
break;
case TGSI_OPCODE_RCP:
- emit_flop(pc, 0, brdc, src[0][0]);
+ emit_flop(pc, NV50_FLOP_RCP, brdc, src[0][0]);
+ break;
+ case TGSI_OPCODE_RET:
+ if (pc->p->type == PIPE_SHADER_FRAGMENT && !pc->in_subroutine)
+ nv50_fp_move_results(pc);
+ emit_ret(pc, -1, 0);
break;
case TGSI_OPCODE_RSQ:
- emit_flop(pc, 2, brdc, src[0][0]);
+ src[0][0]->mod |= NV50_MOD_ABS;
+ emit_flop(pc, NV50_FLOP_RSQ, brdc, src[0][0]);
+ break;
+ case TGSI_OPCODE_SAD:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_sad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
+ }
break;
case TGSI_OPCODE_SCS:
temp = temp_temp(pc);
if (mask & 3)
emit_precossin(pc, temp, src[0][0]);
if (mask & (1 << 0))
- emit_flop(pc, 5, dst[0], temp);
+ emit_flop(pc, NV50_FLOP_COS, dst[0], temp);
if (mask & (1 << 1))
- emit_flop(pc, 4, dst[1], temp);
+ emit_flop(pc, NV50_FLOP_SIN, dst[1], temp);
if (mask & (1 << 2))
emit_mov_immdval(pc, dst[2], 0.0);
if (mask & (1 << 3))
emit_mov_immdval(pc, dst[3], 1.0);
break;
+ case TGSI_OPCODE_SHL:
+ case TGSI_OPCODE_ISHR:
+ case TGSI_OPCODE_USHR:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_shift(pc, dst[c], src[0][c], src[1][c],
+ inst->Instruction.Opcode);
+ }
+ break;
case TGSI_OPCODE_SIN:
if (mask & 8) {
emit_precossin(pc, temp, src[0][3]);
- emit_flop(pc, 4, dst[3], temp);
+ emit_flop(pc, NV50_FLOP_SIN, dst[3], temp);
if (!(mask &= 7))
break;
if (temp == dst[3])
temp = brdc = temp_temp(pc);
}
emit_precossin(pc, temp, src[0][0]);
- emit_flop(pc, 4, brdc, temp);
+ emit_flop(pc, NV50_FLOP_SIN, brdc, temp);
break;
case TGSI_OPCODE_SLT:
case TGSI_OPCODE_SGE:
@@ -1838,12 +2770,23 @@ nv50_program_tx_insn(struct nv50_pc *pc,
case TGSI_OPCODE_SGT:
case TGSI_OPCODE_SLE:
case TGSI_OPCODE_SNE:
- i = map_tgsi_setop_cc(inst->Instruction.Opcode);
+ case TGSI_OPCODE_ISLT:
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_USEQ:
+ case TGSI_OPCODE_USGE:
+ case TGSI_OPCODE_USLT:
+ case TGSI_OPCODE_USNE:
+ {
+ uint8_t cc, ty;
+
+ map_tgsi_setop_hw(inst->Instruction.Opcode, &cc, &ty);
+
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
- emit_set(pc, i, dst[c], -1, src[0][c], src[1][c]);
+ emit_set(pc, cc, dst[c], -1, src[0][c], src[1][c], ty);
}
+ }
break;
case TGSI_OPCODE_SUB:
for (c = 0; c < 4; c++) {
@@ -1854,18 +2797,54 @@ nv50_program_tx_insn(struct nv50_pc *pc,
break;
case TGSI_OPCODE_TEX:
emit_tex(pc, dst, mask, src[0], unit,
- inst->InstructionExtTexture.Texture, FALSE);
+ inst->Texture.Texture, FALSE, 0);
+ break;
+ case TGSI_OPCODE_TXB:
+ emit_tex(pc, dst, mask, src[0], unit,
+ inst->Texture.Texture, FALSE, -1);
+ break;
+ case TGSI_OPCODE_TXL:
+ emit_tex(pc, dst, mask, src[0], unit,
+ inst->Texture.Texture, FALSE, 1);
break;
case TGSI_OPCODE_TXP:
emit_tex(pc, dst, mask, src[0], unit,
- inst->InstructionExtTexture.Texture, TRUE);
+ inst->Texture.Texture, TRUE, 0);
break;
case TGSI_OPCODE_TRUNC:
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
emit_cvt(pc, dst[c], src[0][c], -1,
- CVTOP_TRUNC, CVT_F32_F32 | CVT_RI);
+ CVT_TRUNC | CVT_F32_F32 | CVT_RI);
+ }
+ break;
+ case TGSI_OPCODE_U2F:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_cvt(pc, dst[c], src[0][c], -1, CVT_F32_U32);
+ }
+ break;
+ case TGSI_OPCODE_UADD:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_add_b32(pc, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_UMAX:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_minmax(pc, 0x084, dst[c], src[0][c], src[1][c]);
+ }
+ break;
+ case TGSI_OPCODE_UMIN:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_minmax(pc, 0x0a4, dst[c], src[0][c], src[1][c]);
}
break;
case TGSI_OPCODE_XPD:
@@ -1886,6 +2865,17 @@ nv50_program_tx_insn(struct nv50_pc *pc,
emit_mov_immdval(pc, dst[3], 1.0);
break;
case TGSI_OPCODE_END:
+ if (pc->p->type == PIPE_SHADER_FRAGMENT)
+ nv50_fp_move_results(pc);
+
+ /* last insn must be long so it can have the exit bit set */
+ if (!is_long(pc->p->exec_tail))
+ convert_to_long(pc, pc->p->exec_tail);
+ else
+ if (is_immd(pc->p->exec_tail) || is_join(pc->p->exec_tail))
+ emit_nop(pc);
+
+ pc->p->exec_tail->inst[1] |= 1; /* set exit bit */
break;
default:
NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
@@ -1903,42 +2893,43 @@ nv50_program_tx_insn(struct nv50_pc *pc,
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
- /* in this case we saturate later */
- if (dst[c]->type == P_TEMP && dst[c]->index < 0)
+ /* In this case we saturate later, and dst[c] won't
+ * be another temp_temp (and thus lost), since rdst
+ * already is TEMP (see above). */
+ if (rdst[c]->type == P_TEMP && rdst[c]->index < 0)
continue;
emit_sat(pc, rdst[c], dst[c]);
}
}
- for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- for (c = 0; c < 4; c++) {
- if (!src[i][c])
- continue;
- if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)
- FREE(src[i][c]);
- }
- }
-
kill_temp_temp(pc);
+ pc->reg_instance_nr = 0;
+
return TRUE;
}
static void
prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)
{
- struct nv50_reg *reg = NULL;
+ struct nv50_reg *r, *reg = NULL;
const struct tgsi_full_src_register *src;
const struct tgsi_dst_register *dst;
unsigned i, c, k, mask;
- dst = &insn->FullDstRegisters[0].DstRegister;
+ dst = &insn->Dst[0].Register;
mask = dst->WriteMask;
if (dst->File == TGSI_FILE_TEMPORARY)
- reg = pc->temp;
+ reg = pc->temp;
else
- if (dst->File == TGSI_FILE_OUTPUT)
- reg = pc->result;
+ if (dst->File == TGSI_FILE_OUTPUT) {
+ reg = pc->result;
+
+ if (insn->Instruction.Opcode == TGSI_OPCODE_MOV &&
+ dst->Index == pc->edgeflag_out &&
+ insn->Src[0].Register.File == TGSI_FILE_INPUT)
+ pc->p->cfg.edgeflag_in = insn->Src[0].Register.Index;
+ }
if (reg) {
for (c = 0; c < 4; c++) {
@@ -1949,12 +2940,12 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)
}
for (i = 0; i < insn->Instruction.NumSrcRegs; i++) {
- src = &insn->FullSrcRegisters[i];
+ src = &insn->Src[i];
- if (src->SrcRegister.File == TGSI_FILE_TEMPORARY)
+ if (src->Register.File == TGSI_FILE_TEMPORARY)
reg = pc->temp;
else
- if (src->SrcRegister.File == TGSI_FILE_INPUT)
+ if (src->Register.File == TGSI_FILE_INPUT)
reg = pc->attr;
else
continue;
@@ -1964,12 +2955,17 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
- k = tgsi_util_get_full_src_register_extswizzle(src, c);
+ k = tgsi_util_get_full_src_register_swizzle(src, c);
- if (k > TGSI_EXTSWIZZLE_W)
- continue;
+ r = &reg[src->Register.Index * 4 + k];
+
+ /* If used before written, pre-allocate the reg,
+ * lest we overwrite results from a subroutine.
+ */
+ if (!r->acc && r->type == P_TEMP)
+ alloc_reg(pc, r);
- reg[src->SrcRegister.Index * 4 + k].acc = pc->insn_nr;
+ r->acc = pc->insn_nr;
}
}
}
@@ -2029,13 +3025,13 @@ static struct nv50_reg *
tgsi_broadcast_dst(struct nv50_pc *pc,
const struct tgsi_full_dst_register *fd, unsigned mask)
{
- if (fd->DstRegister.File == TGSI_FILE_TEMPORARY) {
- int c = ffs(~mask & fd->DstRegister.WriteMask);
+ if (fd->Register.File == TGSI_FILE_TEMPORARY) {
+ int c = ffs(~mask & fd->Register.WriteMask);
if (c)
return tgsi_dst(pc, c - 1, fd);
} else {
- int c = ffs(fd->DstRegister.WriteMask) - 1;
- if ((1 << c) == fd->DstRegister.WriteMask)
+ int c = ffs(fd->Register.WriteMask) - 1;
+ if ((1 << c) == fd->Register.WriteMask)
return tgsi_dst(pc, c, fd);
}
@@ -2049,7 +3045,7 @@ static unsigned
nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn,
unsigned rdep[4])
{
- const struct tgsi_full_dst_register *fd = &insn->FullDstRegisters[0];
+ const struct tgsi_full_dst_register *fd = &insn->Dst[0];
const struct tgsi_full_src_register *fs;
unsigned i, deqs = 0;
@@ -2058,11 +3054,11 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn,
for (i = 0; i < insn->Instruction.NumSrcRegs; i++) {
unsigned chn, mask = nv50_tgsi_src_mask(insn, i);
- boolean neg_supp = negate_supported(insn, i);
+ int ms = get_supported_mods(insn, i);
- fs = &insn->FullSrcRegisters[i];
- if (fs->SrcRegister.File != fd->DstRegister.File ||
- fs->SrcRegister.Index != fd->DstRegister.Index)
+ fs = &insn->Src[i];
+ if (fs->Register.File != fd->Register.File ||
+ fs->Register.Index != fd->Register.Index)
continue;
for (chn = 0; chn < 4; ++chn) {
@@ -2070,17 +3066,18 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn,
if (!(mask & (1 << chn))) /* src is not read */
continue;
- c = tgsi_util_get_full_src_register_extswizzle(fs, chn);
+ c = tgsi_util_get_full_src_register_swizzle(fs, chn);
s = tgsi_util_get_full_src_register_sign_mode(fs, chn);
- if (c > TGSI_EXTSWIZZLE_W ||
- !(fd->DstRegister.WriteMask & (1 << c)))
+ if (!(fd->Register.WriteMask & (1 << c)))
continue;
- /* no danger if src is copied to TEMP first */
- if ((s != TGSI_UTIL_SIGN_KEEP) &&
- (s != TGSI_UTIL_SIGN_TOGGLE || !neg_supp))
- continue;
+ if (s == TGSI_UTIL_SIGN_TOGGLE && !(ms & NV50_MOD_NEG))
+ continue;
+ if (s == TGSI_UTIL_SIGN_CLEAR && !(ms & NV50_MOD_ABS))
+ continue;
+ if ((s == TGSI_UTIL_SIGN_SET) && ((ms & 3) != 3))
+ continue;
rdep[c] |= nv50_tgsi_dst_revdep(
insn->Instruction.Opcode, i, chn);
@@ -2098,7 +3095,7 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
const struct tgsi_full_dst_register *fd;
unsigned i, deqs, rdep[4], m[4];
- fd = &tok->FullInstruction.FullDstRegisters[0];
+ fd = &tok->FullInstruction.Dst[0];
deqs = nv50_tgsi_scan_swizzle(&insn, rdep);
if (is_scalar_op(insn.Instruction.Opcode)) {
@@ -2109,7 +3106,7 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
}
pc->r_brdc = NULL;
- if (!deqs)
+ if (!deqs || (!rdep[0] && !rdep[1] && !rdep[2] && !rdep[3]))
return nv50_program_tx_insn(pc, &insn);
deqs = nv50_revdep_reorder(m, rdep);
@@ -2117,10 +3114,10 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
for (i = 0; i < 4; ++i) {
assert(pc->r_dst[m[i]] == NULL);
- insn.FullDstRegisters[0].DstRegister.WriteMask =
- fd->DstRegister.WriteMask & (1 << m[i]);
+ insn.Dst[0].Register.WriteMask =
+ fd->Register.WriteMask & (1 << m[i]);
- if (!insn.FullDstRegisters[0].DstRegister.WriteMask)
+ if (!insn.Dst[0].Register.WriteMask)
continue;
if (deqs & (1 << i))
@@ -2160,7 +3157,7 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg)
iv->rhw = popcnt4(pc->p->cfg.regs[1] >> 24) - 1;
emit_interp(pc, iv, NULL, mode & INTERP_CENTROID);
- emit_flop(pc, 0, iv, iv);
+ emit_flop(pc, NV50_FLOP_RCP, iv, iv);
/* XXX: when loading interpolants dynamically, move these
* to the program head, or make sure it can't be skipped.
@@ -2170,6 +3167,23 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg)
emit_interp(pc, reg, iv, mode);
}
+/* The face input is always at v[255] (varying space), with a
+ * value of 0 for back-facing, and 0xffffffff for front-facing.
+ */
+static void
+load_frontfacing(struct nv50_pc *pc, struct nv50_reg *a)
+{
+ struct nv50_reg *one = alloc_immd(pc, 1.0f);
+
+ assert(a->rhw == -1);
+ alloc_reg(pc, a); /* do this before rhw is set */
+ a->rhw = 255;
+ load_interpolant(pc, a);
+ emit_bitop2(pc, a, a, one, TGSI_OPCODE_AND);
+
+ FREE(one);
+}
+
static boolean
nv50_program_tx_prep(struct nv50_pc *pc)
{
@@ -2189,10 +3203,10 @@ nv50_program_tx_prep(struct nv50_pc *pc)
const struct tgsi_full_immediate *imm =
&tp.FullToken.FullImmediate;
- ctor_immd(pc, imm->u[0].Float,
- imm->u[1].Float,
- imm->u[2].Float,
- imm->u[3].Float);
+ ctor_immd_4f32(pc, imm->u[0].Float,
+ imm->u[1].Float,
+ imm->u[2].Float,
+ imm->u[3].Float);
}
break;
case TGSI_TOKEN_TYPE_DECLARATION:
@@ -2201,8 +3215,8 @@ nv50_program_tx_prep(struct nv50_pc *pc)
unsigned si, last, first, mode;
d = &tp.FullToken.FullDeclaration;
- first = d->DeclarationRange.First;
- last = d->DeclarationRange.Last;
+ first = d->Range.First;
+ last = d->Range.Last;
switch (d->Declaration.File) {
case TGSI_FILE_TEMPORARY:
@@ -2212,8 +3226,8 @@ nv50_program_tx_prep(struct nv50_pc *pc)
p->type == PIPE_SHADER_FRAGMENT)
break;
- si = d->Semantic.SemanticIndex;
- switch (d->Semantic.SemanticName) {
+ si = d->Semantic.Index;
+ switch (d->Semantic.Name) {
case TGSI_SEMANTIC_BCOLOR:
p->cfg.two_side[si].hw = first;
if (p->cfg.io_nr > first)
@@ -2224,6 +3238,9 @@ nv50_program_tx_prep(struct nv50_pc *pc)
if (p->cfg.io_nr > first)
p->cfg.io_nr = first;
break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ pc->edgeflag_out = first;
+ break;
/*
case TGSI_SEMANTIC_CLIP_DISTANCE:
p->cfg.clpd = MIN2(p->cfg.clpd, first);
@@ -2259,8 +3276,8 @@ nv50_program_tx_prep(struct nv50_pc *pc)
pc->interp_mode[i] = mode;
}
break;
+ case TGSI_FILE_ADDRESS:
case TGSI_FILE_CONSTANT:
- break;
case TGSI_FILE_SAMPLER:
break;
default:
@@ -2291,7 +3308,7 @@ nv50_program_tx_prep(struct nv50_pc *pc)
for (i = 0, rid = 0; i < pc->result_nr; ++i) {
p->cfg.io[i].hw = rid;
- p->cfg.io[i].id_vp = i;
+ p->cfg.io[i].id = i;
for (c = 0; c < 4; ++c) {
int n = i * 4 + c;
@@ -2314,6 +3331,8 @@ nv50_program_tx_prep(struct nv50_pc *pc)
int rid, aid;
unsigned n = 0, m = pc->attr_nr - flat_nr;
+ pc->allow32 = TRUE;
+
int base = (TGSI_SEMANTIC_POSITION ==
p->info.input_semantic_name[0]) ? 0 : 1;
@@ -2321,14 +3340,12 @@ nv50_program_tx_prep(struct nv50_pc *pc)
* the lower hardware IDs, so sort them:
*/
for (i = 0; i < pc->attr_nr; i++) {
- if (pc->interp_mode[i] == INTERP_FLAT) {
- p->cfg.io[m].id_vp = i + base;
- p->cfg.io[m++].id_fp = i;
- } else {
+ if (pc->interp_mode[i] == INTERP_FLAT)
+ p->cfg.io[m++].id = i;
+ else {
if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE))
p->cfg.io[n].linear = TRUE;
- p->cfg.io[n].id_vp = i + base;
- p->cfg.io[n++].id_fp = i;
+ p->cfg.io[n++].id = i;
}
}
@@ -2340,7 +3357,13 @@ nv50_program_tx_prep(struct nv50_pc *pc)
for (n = 0; n < pc->attr_nr; ++n) {
p->cfg.io[n].hw = rid = aid;
- i = p->cfg.io[n].id_fp;
+ i = p->cfg.io[n].id;
+
+ if (p->info.input_semantic_name[n] ==
+ TGSI_SEMANTIC_FACE) {
+ load_frontfacing(pc, &pc->attr[i * 4]);
+ continue;
+ }
for (c = 0; c < 4; ++c) {
if (!pc->attr[i * 4 + c].acc)
@@ -2374,8 +3397,8 @@ nv50_program_tx_prep(struct nv50_pc *pc)
for (i = 0; i < pc->attr_nr; i++) {
ubyte si, sn;
- sn = p->info.input_semantic_name[p->cfg.io[i].id_fp];
- si = p->info.input_semantic_index[p->cfg.io[i].id_fp];
+ sn = p->info.input_semantic_name[p->cfg.io[i].id];
+ si = p->info.input_semantic_index[p->cfg.io[i].id];
if (sn == TGSI_SEMANTIC_COLOR) {
p->cfg.two_side[si] = p->cfg.io[i];
@@ -2400,6 +3423,10 @@ nv50_program_tx_prep(struct nv50_pc *pc)
pc->result[2].rhw = rid;
p->cfg.high_result = rid;
+
+ /* separate/different colour results for MRTs ? */
+ if (pc->result_nr - (p->info.writes_z ? 1 : 0) > 1)
+ p->cfg.regs[2] |= 1;
}
if (pc->immd_nr) {
@@ -2454,12 +3481,16 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
pc->attr_nr = p->info.file_max[TGSI_FILE_INPUT] + 1;
pc->result_nr = p->info.file_max[TGSI_FILE_OUTPUT] + 1;
pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1;
+ pc->addr_nr = p->info.file_max[TGSI_FILE_ADDRESS] + 1;
+ assert(pc->addr_nr <= 2);
p->cfg.high_temp = 4;
p->cfg.two_side[0].hw = 0x40;
p->cfg.two_side[1].hw = 0x40;
+ p->cfg.edgeflag_in = pc->edgeflag_out = 0xff;
+
switch (p->type) {
case PIPE_SHADER_VERTEX:
p->cfg.psiz = 0x40;
@@ -2522,31 +3553,21 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
ctor_reg(&pc->param[rid], P_CONST, i, rid);
}
- return TRUE;
-}
-
-static void
-nv50_fp_move_results(struct nv50_pc *pc)
-{
- struct nv50_reg reg;
- unsigned i;
-
- ctor_reg(&reg, P_TEMP, -1, -1);
-
- for (i = 0; i < pc->result_nr * 4; ++i) {
- if (pc->result[i].rhw < 0 || pc->result[i].hw < 0)
- continue;
- if (pc->result[i].rhw != pc->result[i].hw) {
- reg.hw = pc->result[i].rhw;
- emit_mov(pc, &reg, &pc->result[i]);
- }
+ if (pc->addr_nr) {
+ pc->addr = CALLOC(pc->addr_nr * 4, sizeof(struct nv50_reg *));
+ if (!pc->addr)
+ return FALSE;
}
+ for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
+ ctor_reg(&pc->r_addr[i], P_ADDR, -256, i + 1);
+
+ return TRUE;
}
static void
nv50_program_fixup_insns(struct nv50_pc *pc)
{
- struct nv50_program_exec *e, *prev = NULL, **bra_list;
+ struct nv50_program_exec *e, **bra_list;
unsigned i, n, pos;
bra_list = CALLOC(pc->p->exec_size, sizeof(struct nv50_program_exec *));
@@ -2566,27 +3587,24 @@ nv50_program_fixup_insns(struct nv50_pc *pc)
for (i = 0; i < n; ++i)
if (bra_list[i]->param.index >= pos)
bra_list[i]->param.index += 1;
+ for (i = 0; i < pc->insn_nr; ++i)
+ if (pc->insn_pos[i] >= pos)
+ pc->insn_pos[i] += 1;
convert_to_long(pc, e);
++pos;
}
- if (e->next)
- prev = e;
}
- assert(!is_immd(pc->p->exec_head));
- assert(!is_immd(pc->p->exec_tail));
+ FREE(bra_list);
- /* last instruction must be long so it can have the end bit set */
- if (!is_long(pc->p->exec_tail)) {
- convert_to_long(pc, pc->p->exec_tail);
- if (prev)
- convert_to_long(pc, prev);
- }
- assert(!(pc->p->exec_tail->inst[1] & 2));
- /* set the end-bit */
- pc->p->exec_tail->inst[1] |= 1;
+ if (!pc->p->info.opcode_count[TGSI_OPCODE_CAL])
+ return;
- FREE(bra_list);
+ /* fill in CALL offsets */
+ for (e = pc->p->exec_head; e; e = e->next) {
+ if ((e->inst[0] & 2) && (e->inst[0] >> 28) == 0x2)
+ e->param.index = pc->insn_pos[e->param.index];
+ }
}
static boolean
@@ -2608,19 +3626,20 @@ nv50_program_tx(struct nv50_program *p)
if (ret == FALSE)
goto out_cleanup;
+ pc->insn_pos = MALLOC(pc->insn_nr * sizeof(unsigned));
+
tgsi_parse_init(&parse, pc->p->pipe.tokens);
while (!tgsi_parse_end_of_tokens(&parse)) {
const union tgsi_full_token *tok = &parse.FullToken;
- /* don't allow half insn/immd on first and last instruction */
+ /* previously allow32 was FALSE for first & last instruction */
pc->allow32 = TRUE;
- if (pc->insn_cur == 0 || pc->insn_cur + 2 == pc->insn_nr)
- pc->allow32 = FALSE;
tgsi_parse_token(&parse);
switch (tok->Token.Type) {
case TGSI_TOKEN_TYPE_INSTRUCTION:
+ pc->insn_pos[pc->insn_cur] = pc->p->exec_size;
++pc->insn_cur;
ret = nv50_tgsi_insn(pc, tok);
if (ret == FALSE)
@@ -2631,9 +3650,6 @@ nv50_program_tx(struct nv50_program *p)
}
}
- if (pc->p->type == PIPE_SHADER_FRAGMENT)
- nv50_fp_move_results(pc);
-
nv50_program_fixup_insns(pc);
p->param_nr = pc->param_nr * 4;
@@ -2657,7 +3673,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
}
static void
-nv50_program_upload_data(struct nv50_context *nv50, float *map,
+nv50_program_upload_data(struct nv50_context *nv50, uint32_t *map,
unsigned start, unsigned count, unsigned cbuf)
{
struct nouveau_channel *chan = nv50->screen->base.channel;
@@ -2701,12 +3717,12 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
p->immd_nr, NV50_CB_PMISC);
}
- assert(p->param_nr <= 128);
+ assert(p->param_nr <= 512);
if (p->param_nr) {
unsigned cb;
- float *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type],
- PIPE_BUFFER_USAGE_CPU_READ);
+ uint32_t *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type],
+ PIPE_BUFFER_USAGE_CPU_READ);
if (p->type == PIPE_SHADER_VERTEX)
cb = NV50_CB_PVP;
@@ -2722,11 +3738,8 @@ static void
nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
{
struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nv50_program_exec *e;
- struct nouveau_stateobj *so;
- const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
- unsigned start, count, *up, *ptr;
+ uint32_t *up, i;
boolean upload = FALSE;
if (!p->bo) {
@@ -2741,32 +3754,37 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
if (!upload)
return;
- for (e = p->exec_head; e; e = e->next) {
+ up = MALLOC(p->exec_size * 4);
+
+ for (i = 0, e = p->exec_head; e; e = e->next) {
unsigned ei, ci, bs;
- if (e->param.index < 0)
- continue;
+ if (e->param.index >= 0 && e->param.mask) {
+ bs = (e->inst[1] >> 22) & 0x07;
+ assert(bs < 2);
+ ei = e->param.shift >> 5;
+ ci = e->param.index;
+ if (bs == 0)
+ ci += p->data[bs]->start;
- if (e->param.mask == 0) {
+ e->inst[ei] &= ~e->param.mask;
+ e->inst[ei] |= (ci << e->param.shift);
+ } else
+ if (e->param.index >= 0) {
+ /* zero mask means param is a jump/branch offset */
assert(!(e->param.index & 1));
/* seem to be 8 byte steps */
ei = (e->param.index >> 1) + 0 /* START_ID */;
e->inst[0] &= 0xf0000fff;
e->inst[0] |= ei << 12;
- continue;
}
- bs = (e->inst[1] >> 22) & 0x07;
- assert(bs < 2);
- ei = e->param.shift >> 5;
- ci = e->param.index;
- if (bs == 0)
- ci += p->data[bs]->start;
-
- e->inst[ei] &= ~e->param.mask;
- e->inst[ei] |= (ci << e->param.shift);
+ up[i++] = e->inst[0];
+ if (is_long(e))
+ up[i++] = e->inst[1];
}
+ assert(i == p->exec_size);
if (p->data[0])
p->data_start[0] = p->data[0]->start;
@@ -2779,45 +3797,12 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
NOUVEAU_ERR("0x%08x\n", e->inst[1]);
}
#endif
-
- up = ptr = MALLOC(p->exec_size * 4);
- for (e = p->exec_head; e; e = e->next) {
- *(ptr++) = e->inst[0];
- if (is_long(e))
- *(ptr++) = e->inst[1];
- }
-
- so = so_new(4,2);
- so_method(so, nv50->screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
- so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4));
-
- start = 0; count = p->exec_size;
- while (count) {
- struct nouveau_channel *chan = nv50->screen->base.channel;
- unsigned nr;
-
- so_emit(chan, so);
-
- nr = MIN2(count, 2047);
- nr = MIN2(chan->pushbuf->remaining, nr);
- if (chan->pushbuf->remaining < (nr + 3)) {
- FIRE_RING(chan);
- continue;
- }
-
- BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
- OUT_RING (chan, (start << 8) | NV50_CB_PUPLOAD);
- BEGIN_RING(chan, tesla, NV50TCL_CB_DATA(0) | 0x40000000, nr);
- OUT_RINGp (chan, up + start, nr);
-
- start += nr;
- count -= nr;
- }
+ nv50_upload_sifc(nv50, p->bo, 0, NOUVEAU_BO_VRAM,
+ NV50_2D_DST_FORMAT_R8_UNORM, 65536, 1, 262144,
+ up, NV50_2D_SIFC_FORMAT_R8_UNORM, 0,
+ 0, 0, p->exec_size * 4, 1, 1);
FREE(up);
- so_ref(NULL, &so);
}
void
@@ -2836,7 +3821,7 @@ nv50_vertprog_validate(struct nv50_context *nv50)
nv50_program_validate_data(nv50, p);
nv50_program_validate_code(nv50, p);
- so = so_new(13, 2);
+ so = so_new(5, 8, 2);
so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
NOUVEAU_BO_HIGH, 0, 0);
@@ -2872,7 +3857,7 @@ nv50_fragprog_validate(struct nv50_context *nv50)
nv50_program_validate_data(nv50, p);
nv50_program_validate_code(nv50, p);
- so = so_new(64, 2);
+ so = so_new(6, 7, 2);
so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
NOUVEAU_BO_HIGH, 0, 0);
@@ -2882,7 +3867,7 @@ nv50_fragprog_validate(struct nv50_context *nv50)
so_data (so, p->cfg.high_temp);
so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1);
so_data (so, p->cfg.high_result);
- so_method(so, tesla, NV50TCL_FP_CTRL_UNK19A8, 1);
+ so_method(so, tesla, NV50TCL_FP_CONTROL, 1);
so_data (so, p->cfg.regs[2]);
so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1);
so_data (so, p->cfg.regs[3]);
@@ -2899,15 +3884,15 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base)
struct nv50_program *vp = nv50->vertprog;
unsigned i, c, m = base;
- /* XXX: This can't work correctly in all cases yet, we either
- * have to create TGSI_SEMANTIC_PNTC or sprite_coord_mode has
- * to be per FP input instead of per VP output
+ /* XXX: this might not work correctly in all cases yet - we'll
+ * just assume that an FP generic input that is not written in
+ * the VP is PointCoord.
*/
memset(pntc, 0, 8 * sizeof(uint32_t));
for (i = 0; i < fp->cfg.io_nr; i++) {
uint8_t sn, si;
- uint8_t j = fp->cfg.io[i].id_vp, k = fp->cfg.io[i].id_fp;
+ uint8_t j, k = fp->cfg.io[i].id;
unsigned n = popcnt4(fp->cfg.io[i].mask);
if (fp->info.input_semantic_name[k] != TGSI_SEMANTIC_GENERIC) {
@@ -2915,10 +3900,16 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base)
continue;
}
- sn = vp->info.input_semantic_name[j];
- si = vp->info.input_semantic_index[j];
+ for (j = 0; j < vp->info.num_outputs; ++j) {
+ sn = vp->info.output_semantic_name[j];
+ si = vp->info.output_semantic_index[j];
- if (j < fp->cfg.io_nr && sn == TGSI_SEMANTIC_GENERIC) {
+ if (sn == fp->info.input_semantic_name[k] &&
+ si == fp->info.input_semantic_index[k])
+ break;
+ }
+
+ if (j < vp->info.num_outputs) {
ubyte mode =
nv50->rasterizer->pipe.sprite_coord_mode[si];
@@ -3006,20 +3997,24 @@ nv50_linkage_validate(struct nv50_context *nv50)
reg[0] += m - 4; /* adjust FFC0 id */
reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */
- i = 0;
- if (fp->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION)
- i = 1;
- for (; i < fp->cfg.io_nr; i++) {
- ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id_fp];
- ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id_fp];
-
- n = fp->cfg.io[i].id_vp;
- if (n >= vp->cfg.io_nr ||
- vp->info.output_semantic_name[n] != sn ||
- vp->info.output_semantic_index[n] != si)
- vpo = &dummy;
- else
- vpo = &vp->cfg.io[n];
+ for (i = 0; i < fp->cfg.io_nr; i++) {
+ ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id];
+ ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id];
+
+ /* position must be mapped first */
+ assert(i == 0 || sn != TGSI_SEMANTIC_POSITION);
+
+ /* maybe even remove these from cfg.io */
+ if (sn == TGSI_SEMANTIC_POSITION || sn == TGSI_SEMANTIC_FACE)
+ continue;
+
+ /* VP outputs and vp->cfg.io are in the same order */
+ for (n = 0; n < vp->info.num_outputs; ++n) {
+ if (vp->info.output_semantic_name[n] == sn &&
+ vp->info.output_semantic_index[n] == si)
+ break;
+ }
+ vpo = (n < vp->info.num_outputs) ? &vp->cfg.io[n] : &dummy;
m = nv50_sreg4_map(map, m, lin, &fp->cfg.io[i], vpo);
}
@@ -3030,7 +4025,7 @@ nv50_linkage_validate(struct nv50_context *nv50)
}
/* now fill the stateobj */
- so = so_new(64, 0);
+ so = so_new(6, 58, 0);
n = (m + 3) / 4;
so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
@@ -3044,7 +4039,7 @@ nv50_linkage_validate(struct nv50_context *nv50)
so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1);
so_data (so, reg[4]);
- so_method(so, tesla, 0x1540, 4);
+ so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4);
so_datap (so, lin, 4);
if (nv50->rasterizer->pipe.point_sprite) {
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index d78dee083f1..461fec1d89c 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -17,8 +17,7 @@ struct nv50_program_exec {
struct nv50_sreg4 {
uint8_t hw;
- uint8_t id_vp;
- uint8_t id_fp;
+ uint8_t id; /* tgsi index, nv50 needs them sorted: flat ones last */
uint8_t mask;
boolean linear;
@@ -38,7 +37,7 @@ struct nv50_program {
struct nouveau_bo *bo;
- float *immd;
+ uint32_t *immd;
unsigned immd_nr;
unsigned param_nr;
@@ -59,6 +58,7 @@ struct nv50_program {
/* VP only */
uint8_t clpd, clpd_nr;
uint8_t psiz;
+ uint8_t edgeflag_in;
} cfg;
};
diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c
index 5305c93d59a..5d9e18218ae 100644
--- a/src/gallium/drivers/nv50/nv50_query.c
+++ b/src/gallium/drivers/nv50/nv50_query.c
@@ -77,9 +77,9 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nv50_query *q = nv50_query(pq);
- BEGIN_RING(chan, tesla, 0x1530, 1);
+ BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_RESET, 1);
OUT_RING (chan, 1);
- BEGIN_RING(chan, tesla, 0x1514, 1);
+ BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_ENABLE, 1);
OUT_RING (chan, 1);
q->ready = FALSE;
@@ -93,7 +93,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nv50_query *q = nv50_query(pq);
- WAIT_RING (chan, 5);
+ MARK_RING (chan, 5, 2); /* flush on lack of space or relocs */
BEGIN_RING(chan, tesla, NV50TCL_QUERY_ADDRESS_HIGH, 4);
OUT_RELOCh(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RELOCl(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index dd7baecba7c..28e2b35deaa 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -38,6 +38,11 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
case PIPE_FORMAT_X8R8G8B8_UNORM:
case PIPE_FORMAT_A8R8G8B8_UNORM:
case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ case PIPE_FORMAT_R16G16_SNORM:
+ case PIPE_FORMAT_R16G16_UNORM:
return TRUE;
default:
break;
@@ -57,6 +62,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
switch (format) {
case PIPE_FORMAT_A8R8G8B8_UNORM:
case PIPE_FORMAT_X8R8G8B8_UNORM:
+ case PIPE_FORMAT_A8R8G8B8_SRGB:
+ case PIPE_FORMAT_X8R8G8B8_SRGB:
case PIPE_FORMAT_A1R5G5B5_UNORM:
case PIPE_FORMAT_A4R4G4B4_UNORM:
case PIPE_FORMAT_R5G6B5_UNORM:
@@ -68,6 +75,14 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
case PIPE_FORMAT_DXT1_RGBA:
case PIPE_FORMAT_DXT3_RGBA:
case PIPE_FORMAT_DXT5_RGBA:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ case PIPE_FORMAT_R16G16_SNORM:
+ case PIPE_FORMAT_R16G16_UNORM:
return TRUE;
default:
break;
@@ -83,6 +98,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
switch (param) {
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
return 32;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 32;
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return 64;
case PIPE_CAP_NPOT_TEXTURES:
return 1;
case PIPE_CAP_TWO_SIDED_STENCIL:
@@ -108,10 +127,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
return 1;
- case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
- return 0;
case PIPE_CAP_TGSI_CONT_SUPPORTED:
- return 0;
+ return 1;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
return 1;
case NOUVEAU_CAP_HW_VTXBUF:
@@ -148,6 +165,21 @@ static void
nv50_screen_destroy(struct pipe_screen *pscreen)
{
struct nv50_screen *screen = nv50_screen(pscreen);
+ unsigned i;
+
+ for (i = 0; i < 2; i++) {
+ if (screen->constbuf_parm[i])
+ nouveau_bo_ref(NULL, &screen->constbuf_parm[i]);
+ }
+
+ if (screen->constbuf_misc[0])
+ nouveau_bo_ref(NULL, &screen->constbuf_misc[0]);
+ if (screen->tic)
+ nouveau_bo_ref(NULL, &screen->tic);
+ if (screen->tsc)
+ nouveau_bo_ref(NULL, &screen->tsc);
+ if (screen->static_init)
+ so_ref(NULL, &screen->static_init);
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->tesla);
@@ -157,6 +189,28 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
FREE(screen);
}
+static int
+nv50_pre_pipebuffer_map(struct pipe_screen *pscreen, struct pipe_buffer *pb,
+ unsigned usage)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ struct nv50_context *ctx = screen->cur_ctx;
+
+ if (!(pb->usage & PIPE_BUFFER_USAGE_VERTEX))
+ return 0;
+
+ /* Our vtxbuf got mapped, it can no longer be considered part of current
+ * state, remove it to avoid emitting reloc markers.
+ */
+ if (ctx && ctx->state.vtxbuf && so_bo_is_reloc(ctx->state.vtxbuf,
+ nouveau_bo(pb))) {
+ so_ref(NULL, &ctx->state.vtxbuf);
+ ctx->dirty |= NV50_NEW_ARRAYS;
+ }
+
+ return 0;
+}
+
struct pipe_screen *
nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
{
@@ -184,6 +238,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
pscreen->get_param = nv50_screen_get_param;
pscreen->get_paramf = nv50_screen_get_paramf;
pscreen->is_format_supported = nv50_screen_is_format_supported;
+ screen->base.pre_pipebuffer_map_callback = nv50_pre_pipebuffer_map;
nv50_screen_init_miptree_functions(pscreen);
nv50_transfer_init_screen_functions(pscreen);
@@ -196,7 +251,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
nv50_screen_destroy(pscreen);
return NULL;
}
- BIND_RING(chan, screen->m2mf, 1);
/* 2D object */
ret = nouveau_grobj_alloc(chan, 0xbeef502d, NV50_2D, &screen->eng2d);
@@ -205,7 +259,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
nv50_screen_destroy(pscreen);
return NULL;
}
- BIND_RING(chan, screen->eng2d, 2);
/* 3D object */
switch (chipset & 0xf0) {
@@ -214,11 +267,19 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
break;
case 0x80:
case 0x90:
- /* this stupid name should be corrected. */
- tesla_class = NV54TCL;
+ tesla_class = NV84TCL;
break;
case 0xa0:
- tesla_class = NVA0TCL;
+ switch (chipset) {
+ case 0xa0:
+ case 0xaa:
+ case 0xac:
+ tesla_class = NVA0TCL;
+ break;
+ default:
+ tesla_class = NVA8TCL;
+ break;
+ }
break;
default:
NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", chipset);
@@ -226,12 +287,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
return NULL;
}
- if (tesla_class == 0) {
- NOUVEAU_ERR("Unknown G8x chipset: NV%02x\n", chipset);
- nv50_screen_destroy(pscreen);
- return NULL;
- }
-
ret = nouveau_grobj_alloc(chan, 0xbeef5097, tesla_class,
&screen->tesla);
if (ret) {
@@ -239,7 +294,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
nv50_screen_destroy(pscreen);
return NULL;
}
- BIND_RING(chan, screen->tesla, 3);
/* Sync notifier */
ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync);
@@ -250,7 +304,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
}
/* Static M2MF init */
- so = so_new(32, 0);
+ so = so_new(1, 3, 0);
so_method(so, screen->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 3);
so_data (so, screen->sync->handle);
so_data (so, chan->vram->handle);
@@ -259,7 +313,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_ref (NULL, &so);
/* Static 2D init */
- so = so_new(64, 0);
+ so = so_new(4, 7, 0);
so_method(so, screen->eng2d, NV50_2D_DMA_NOTIFY, 4);
so_data (so, screen->sync->handle);
so_data (so, chan->vram->handle);
@@ -267,7 +321,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_data (so, chan->vram->handle);
so_method(so, screen->eng2d, NV50_2D_OPERATION, 1);
so_data (so, NV50_2D_OPERATION_SRCCOPY);
- so_method(so, screen->eng2d, 0x0290, 1);
+ so_method(so, screen->eng2d, NV50_2D_CLIP_ENABLE, 1);
so_data (so, 0);
so_method(so, screen->eng2d, 0x0888, 1);
so_data (so, 1);
@@ -275,33 +329,41 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_ref(NULL, &so);
/* Static tesla init */
- so = so_new(256, 20);
+ so = so_new(40, 84, 20);
- so_method(so, screen->tesla, 0x1558, 1);
- so_data (so, 1);
+ so_method(so, screen->tesla, NV50TCL_COND_MODE, 1);
+ so_data (so, NV50TCL_COND_MODE_ALWAYS);
so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1);
so_data (so, screen->sync->handle);
- so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0),
- NV50TCL_DMA_UNK0__SIZE);
- for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++)
+ so_method(so, screen->tesla, NV50TCL_DMA_ZETA, 11);
+ for (i = 0; i < 11; i++)
so_data(so, chan->vram->handle);
- so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0),
- NV50TCL_DMA_UNK1__SIZE);
- for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++)
+ so_method(so, screen->tesla, NV50TCL_DMA_COLOR(0),
+ NV50TCL_DMA_COLOR__SIZE);
+ for (i = 0; i < NV50TCL_DMA_COLOR__SIZE; i++)
so_data(so, chan->vram->handle);
- so_method(so, screen->tesla, 0x121c, 1);
+ so_method(so, screen->tesla, NV50TCL_RT_CONTROL, 1);
so_data (so, 1);
- so_method(so, screen->tesla, 0x13bc, 1);
+ /* activate all 32 lanes (threads) in a warp */
+ so_method(so, screen->tesla, NV50TCL_WARP_HALVES, 1);
+ so_data (so, 0x2);
+ so_method(so, screen->tesla, 0x1400, 1);
+ so_data (so, 0xf);
+
+ /* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */
+ so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(0), 1);
+ so_data (so, 0x54);
+ so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(2), 1);
so_data (so, 0x54);
/* origin is top left (set to 1 for bottom left) */
- so_method(so, screen->tesla, 0x13ac, 1);
+ so_method(so, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1);
so_data (so, 0);
so_method(so, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
so_data (so, 8);
/* constant buffers for immediates and VP/FP parameters */
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 128*4*4,
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (32 * 4) * 4,
&screen->constbuf_misc[0]);
if (ret) {
nv50_screen_destroy(pscreen);
@@ -309,7 +371,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
}
for (i = 0; i < 2; i++) {
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 128*4*4,
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (128 * 4) * 4,
&screen->constbuf_parm[i]);
if (ret) {
nv50_screen_destroy(pscreen);
@@ -318,8 +380,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
}
if (nouveau_resource_init(&screen->immd_heap[0], 0, 128) ||
- nouveau_resource_init(&screen->parm_heap[0], 0, 128) ||
- nouveau_resource_init(&screen->parm_heap[1], 0, 128))
+ nouveau_resource_init(&screen->parm_heap[0], 0, 512) ||
+ nouveau_resource_init(&screen->parm_heap[1], 0, 512))
{
NOUVEAU_ERR("Error initialising constant buffers.\n");
nv50_screen_destroy(pscreen);
@@ -331,7 +393,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
// B = buffer ID (maybe more than 1 byte)
// N = CB index used in shader instruction
// P = program type (0 = VP, 2 = GP, 3 = FP)
- so_method(so, screen->tesla, 0x1694, 1);
+ so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
so_data (so, 0x000BBNP1);
*/
@@ -340,7 +402,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, (NV50_CB_PMISC << 16) | 0x00000800);
+ so_data (so, (NV50_CB_PMISC << 16) | 0x00000200);
so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
so_data (so, 0x00000001 | (NV50_CB_PMISC << 12));
so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
@@ -364,68 +426,57 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
so_data (so, 0x00000131 | (NV50_CB_PFP << 12));
- /* Texture sampler/image unit setup - we abuse the constant buffer
- * upload mechanism for the moment to upload data to the tex config
- * blocks. At some point we *may* want to go the NVIDIA way of doing
- * things?
- */
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 32*8*4, &screen->tic);
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32,
+ &screen->tic);
if (ret) {
nv50_screen_destroy(pscreen);
return NULL;
}
- so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
- so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, (NV50_CB_TIC << 16) | 0x0800);
so_method(so, screen->tesla, NV50TCL_TIC_ADDRESS_HIGH, 3);
so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, 0x00000800);
+ so_data (so, PIPE_SHADER_TYPES * 32 - 1);
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 32*8*4, &screen->tsc);
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32,
+ &screen->tsc);
if (ret) {
nv50_screen_destroy(pscreen);
return NULL;
}
- so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
- so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, (NV50_CB_TSC << 16) | 0x0800);
so_method(so, screen->tesla, NV50TCL_TSC_ADDRESS_HIGH, 3);
so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, 0x00000800);
+ so_data (so, 0x00000000); /* ignored if TSC_LINKED (0x1234) = 1 */
/* Vertex array limits - max them out */
for (i = 0; i < 16; i++) {
- so_method(so, screen->tesla, NV50TCL_UNK1080_OFFSET_HIGH(i), 2);
+ so_method(so, screen->tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2);
so_data (so, 0x000000ff);
so_data (so, 0xffffffff);
}
- so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2);
+ so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR(0), 2);
so_data (so, fui(0.0));
so_data (so, fui(1.0));
- so_method(so, screen->tesla, 0x1234, 1);
+ /* no dynamic combination of TIC & TSC entries => only BIND_TIC used */
+ so_method(so, screen->tesla, NV50TCL_LINKED_TSC, 1);
so_data (so, 1);
/* activate first scissor rectangle */
- so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE, 1);
+ so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE(0), 1);
so_data (so, 1);
+ so_method(so, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+ so_data (so, 1); /* default edgeflag to TRUE */
+
so_emit(chan, so);
so_ref (so, &screen->static_init);
so_ref (NULL, &so);
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
index 61e24a5b571..a038a4e3c2a 100644
--- a/src/gallium/drivers/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -2,6 +2,7 @@
#define __NV50_SCREEN_H__
#include "nouveau/nouveau_screen.h"
+#include "nv50_context.h"
struct nv50_screen {
struct nouveau_screen base;
@@ -9,6 +10,7 @@ struct nv50_screen {
struct nouveau_winsys *nvws;
unsigned cur_pctx;
+ struct nv50_context *cur_ctx;
struct nouveau_grobj *tesla;
struct nouveau_grobj *eng2d;
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 81fa3e34c59..1f67df814b1 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -35,7 +35,7 @@ static void *
nv50_blend_state_create(struct pipe_context *pipe,
const struct pipe_blend_state *cso)
{
- struct nouveau_stateobj *so = so_new(64, 0);
+ struct nouveau_stateobj *so = so_new(5, 24, 0);
struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj);
unsigned cmask = 0, i;
@@ -183,25 +183,20 @@ nv50_sampler_state_create(struct pipe_context *pipe,
else
if (cso->max_anisotropy >= 12.0)
tsc[0] |= (6 << 20);
- else
- if (cso->max_anisotropy >= 10.0)
- tsc[0] |= (5 << 20);
- else
- if (cso->max_anisotropy >= 8.0)
- tsc[0] |= (4 << 20);
- else
- if (cso->max_anisotropy >= 6.0)
- tsc[0] |= (3 << 20);
- else
- if (cso->max_anisotropy >= 4.0)
- tsc[0] |= (2 << 20);
- else
- if (cso->max_anisotropy >= 2.0)
- tsc[0] |= (1 << 20);
+ else {
+ tsc[0] |= (int)(cso->max_anisotropy * 0.5f) << 20;
+
+ if (cso->max_anisotropy >= 4.0)
+ tsc[1] |= NV50TSC_1_1_UNKN_ANISO_35;
+ else
+ if (cso->max_anisotropy >= 2.0)
+ tsc[1] |= NV50TSC_1_1_UNKN_ANISO_15;
+ }
if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
- tsc[0] |= (1 << 8);
- tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7);
+ /* XXX: must be deactivated for non-shadow textures */
+ tsc[0] |= (1 << 9);
+ tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10;
}
limit = CLAMP(cso->lod_bias, -16.0, 15.0);
@@ -219,46 +214,71 @@ nv50_sampler_state_create(struct pipe_context *pipe,
return (void *)sso;
}
-static void
-nv50_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+static INLINE void
+nv50_sampler_state_bind(struct pipe_context *pipe, unsigned type,
+ unsigned nr, void **sampler)
{
struct nv50_context *nv50 = nv50_context(pipe);
- int i;
- nv50->sampler_nr = nr;
- for (i = 0; i < nv50->sampler_nr; i++)
- nv50->sampler[i] = sampler[i];
+ memcpy(nv50->sampler[type], sampler, nr * sizeof(void *));
+ nv50->sampler_nr[type] = nr;
nv50->dirty |= NV50_NEW_SAMPLER;
}
static void
+nv50_vp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nv50_sampler_state_bind(pipe, PIPE_SHADER_VERTEX, nr, s);
+}
+
+static void
+nv50_fp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nv50_sampler_state_bind(pipe, PIPE_SHADER_FRAGMENT, nr, s);
+}
+
+static void
nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
{
FREE(hwcso);
}
-static void
-nv50_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
- struct pipe_texture **pt)
+static INLINE void
+nv50_set_sampler_texture(struct pipe_context *pipe, unsigned type,
+ unsigned nr, struct pipe_texture **pt)
{
struct nv50_context *nv50 = nv50_context(pipe);
- int i;
+ unsigned i;
for (i = 0; i < nr; i++)
- pipe_texture_reference((void *)&nv50->miptree[i], pt[i]);
- for (i = nr; i < nv50->miptree_nr; i++)
- pipe_texture_reference((void *)&nv50->miptree[i], NULL);
+ pipe_texture_reference((void *)&nv50->miptree[type][i], pt[i]);
+ for (i = nr; i < nv50->miptree_nr[type]; i++)
+ pipe_texture_reference((void *)&nv50->miptree[type][i], NULL);
- nv50->miptree_nr = nr;
+ nv50->miptree_nr[type] = nr;
nv50->dirty |= NV50_NEW_TEXTURE;
}
+static void
+nv50_set_vp_sampler_textures(struct pipe_context *pipe,
+ unsigned nr, struct pipe_texture **pt)
+{
+ nv50_set_sampler_texture(pipe, PIPE_SHADER_VERTEX, nr, pt);
+}
+
+static void
+nv50_set_fp_sampler_textures(struct pipe_context *pipe,
+ unsigned nr, struct pipe_texture **pt)
+{
+ nv50_set_sampler_texture(pipe, PIPE_SHADER_FRAGMENT, nr, pt);
+}
+
static void *
nv50_rasterizer_state_create(struct pipe_context *pipe,
const struct pipe_rasterizer_state *cso)
{
- struct nouveau_stateobj *so = so_new(64, 0);
+ struct nouveau_stateobj *so = so_new(15, 21, 0);
struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
struct nv50_rasterizer_stateobj *rso =
CALLOC_STRUCT(nv50_rasterizer_stateobj);
@@ -273,7 +293,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe,
so_method(so, tesla, NV50TCL_SHADE_MODEL, 1);
so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT :
NV50TCL_SHADE_MODEL_SMOOTH);
- so_method(so, tesla, 0x1684, 1);
+ so_method(so, tesla, NV50TCL_PROVOKING_VERTEX_LAST, 1);
so_data (so, cso->flatshade_first ? 0 : 1);
so_method(so, tesla, NV50TCL_VERTEX_TWO_SIDE_ENABLE, 1);
@@ -370,7 +390,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe,
so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1);
so_data (so, fui(cso->offset_scale));
so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1);
- so_data (so, fui(cso->offset_units));
+ so_data (so, fui(cso->offset_units * 2.0f));
}
rso->pipe = *cso;
@@ -403,7 +423,7 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe,
{
struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
struct nv50_zsa_stateobj *zsa = CALLOC_STRUCT(nv50_zsa_stateobj);
- struct nouveau_stateobj *so = so_new(64, 0);
+ struct nouveau_stateobj *so = so_new(8, 22, 0);
so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1);
so_data (so, cso->depth.writemask ? 1 : 0);
@@ -417,9 +437,8 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe,
so_data (so, 0);
}
- /* XXX: keep hex values until header is updated (names reversed) */
if (cso->stencil[0].enabled) {
- so_method(so, tesla, 0x1380, 8);
+ so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8);
so_data (so, 1);
so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op));
so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
@@ -429,23 +448,23 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe,
so_data (so, cso->stencil[0].writemask);
so_data (so, cso->stencil[0].valuemask);
} else {
- so_method(so, tesla, 0x1380, 1);
+ so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1);
so_data (so, 0);
}
if (cso->stencil[1].enabled) {
- so_method(so, tesla, 0x1594, 5);
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5);
so_data (so, 1);
so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op));
so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
so_data (so, nvgl_comparison_op(cso->stencil[1].func));
- so_method(so, tesla, 0x0f54, 3);
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3);
so_data (so, cso->stencil[1].ref_value);
so_data (so, cso->stencil[1].writemask);
so_data (so, cso->stencil[1].valuemask);
} else {
- so_method(so, tesla, 0x1594, 1);
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1);
so_data (so, 0);
}
@@ -652,9 +671,11 @@ nv50_init_state_functions(struct nv50_context *nv50)
nv50->pipe.delete_blend_state = nv50_blend_state_delete;
nv50->pipe.create_sampler_state = nv50_sampler_state_create;
- nv50->pipe.bind_sampler_states = nv50_sampler_state_bind;
nv50->pipe.delete_sampler_state = nv50_sampler_state_delete;
- nv50->pipe.set_sampler_textures = nv50_set_sampler_texture;
+ nv50->pipe.bind_fragment_sampler_states = nv50_fp_sampler_state_bind;
+ nv50->pipe.bind_vertex_sampler_states = nv50_vp_sampler_state_bind;
+ nv50->pipe.set_fragment_sampler_textures = nv50_set_fp_sampler_textures;
+ nv50->pipe.set_vertex_sampler_textures = nv50_set_vp_sampler_textures;
nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create;
nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind;
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index fd276203710..f83232f43cf 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -23,14 +23,29 @@
#include "nv50_context.h"
#include "nouveau/nouveau_stateobj.h"
+#define NV50_CBUF_FORMAT_CASE(n) \
+ case PIPE_FORMAT_##n: so_data(so, NV50TCL_RT_FORMAT_##n); break
+
+#define NV50_ZETA_FORMAT_CASE(n) \
+ case PIPE_FORMAT_##n: so_data(so, NV50TCL_ZETA_FORMAT_##n); break
+
static void
nv50_state_validate_fb(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_stateobj *so = so_new(128, 18);
+ struct nouveau_stateobj *so = so_new(32, 79, 18);
struct pipe_framebuffer_state *fb = &nv50->framebuffer;
unsigned i, w, h, gw = 0;
+ /* Set nr of active RTs and select RT for each colour output.
+ * FP result 0 always goes to RT[0], bits 4 - 6 are ignored.
+ * Ambiguous assignment results in no rendering (no DATA_ERROR).
+ */
+ so_method(so, tesla, NV50TCL_RT_CONTROL, 1);
+ so_data (so, fb->nr_cbufs |
+ (0 << 4) | (1 << 7) | (2 << 10) | (3 << 13) |
+ (4 << 16) | (5 << 19) | (6 << 22) | (7 << 25));
+
for (i = 0; i < fb->nr_cbufs; i++) {
struct pipe_texture *pt = fb->cbufs[i]->texture;
struct nouveau_bo *bo = nv50_miptree(pt)->base.bo;
@@ -54,15 +69,14 @@ nv50_state_validate_fb(struct nv50_context *nv50)
so_reloc (so, bo, fb->cbufs[i]->offset, NOUVEAU_BO_VRAM |
NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0);
switch (fb->cbufs[i]->format) {
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- so_data(so, NV50TCL_RT_FORMAT_A8R8G8B8_UNORM);
- break;
- case PIPE_FORMAT_X8R8G8B8_UNORM:
- so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM);
- break;
- case PIPE_FORMAT_R5G6B5_UNORM:
- so_data(so, NV50TCL_RT_FORMAT_R5G6B5_UNORM);
- break;
+ NV50_CBUF_FORMAT_CASE(A8R8G8B8_UNORM);
+ NV50_CBUF_FORMAT_CASE(X8R8G8B8_UNORM);
+ NV50_CBUF_FORMAT_CASE(R5G6B5_UNORM);
+ NV50_CBUF_FORMAT_CASE(R16G16B16A16_SNORM);
+ NV50_CBUF_FORMAT_CASE(R16G16B16A16_UNORM);
+ NV50_CBUF_FORMAT_CASE(R32G32B32A32_FLOAT);
+ NV50_CBUF_FORMAT_CASE(R16G16_SNORM);
+ NV50_CBUF_FORMAT_CASE(R16G16_UNORM);
default:
NOUVEAU_ERR("AIIII unknown format %s\n",
pf_name(fb->cbufs[i]->format));
@@ -73,7 +87,7 @@ nv50_state_validate_fb(struct nv50_context *nv50)
level[fb->cbufs[i]->level].tile_mode << 4);
so_data(so, 0x00000000);
- so_method(so, tesla, 0x1224, 1);
+ so_method(so, tesla, NV50TCL_RT_ARRAY_MODE, 1);
so_data (so, 1);
}
@@ -96,18 +110,10 @@ nv50_state_validate_fb(struct nv50_context *nv50)
so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM |
NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0);
switch (fb->zsbuf->format) {
- case PIPE_FORMAT_Z32_FLOAT:
- so_data(so, NV50TCL_ZETA_FORMAT_Z32_FLOAT);
- break;
- case PIPE_FORMAT_Z24S8_UNORM:
- so_data(so, NV50TCL_ZETA_FORMAT_Z24S8_UNORM);
- break;
- case PIPE_FORMAT_X8Z24_UNORM:
- so_data(so, NV50TCL_ZETA_FORMAT_X8Z24_UNORM);
- break;
- case PIPE_FORMAT_S8Z24_UNORM:
- so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM);
- break;
+ NV50_ZETA_FORMAT_CASE(S8Z24_UNORM);
+ NV50_ZETA_FORMAT_CASE(X8Z24_UNORM);
+ NV50_ZETA_FORMAT_CASE(Z24S8_UNORM);
+ NV50_ZETA_FORMAT_CASE(Z32_FLOAT);
default:
NOUVEAU_ERR("AIIII unknown format %s\n",
pf_name(fb->zsbuf->format));
@@ -118,19 +124,22 @@ nv50_state_validate_fb(struct nv50_context *nv50)
level[fb->zsbuf->level].tile_mode << 4);
so_data(so, 0x00000000);
- so_method(so, tesla, 0x1538, 1);
+ so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1);
so_data (so, 1);
so_method(so, tesla, NV50TCL_ZETA_HORIZ, 3);
so_data (so, fb->zsbuf->width);
so_data (so, fb->zsbuf->height);
so_data (so, 0x00010001);
+ } else {
+ so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1);
+ so_data (so, 0);
}
- so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2);
+ so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2);
so_data (so, w << 16);
so_data (so, h << 16);
/* set window lower left corner */
- so_method(so, tesla, NV50TCL_WINDOW_LEFT, 2);
+ so_method(so, tesla, NV50TCL_WINDOW_OFFSET_X, 2);
so_data (so, 0);
so_data (so, 0);
/* set screen scissor rectangle */
@@ -147,11 +156,38 @@ nv50_state_validate_fb(struct nv50_context *nv50)
}
static void
+nv50_validate_samplers(struct nv50_context *nv50, struct nouveau_stateobj *so,
+ unsigned p)
+{
+ struct nouveau_grobj *eng2d = nv50->screen->eng2d;
+ unsigned i, j, dw = nv50->sampler_nr[p] * 8;
+
+ if (!dw)
+ return;
+ nv50_so_init_sifc(nv50, so, nv50->screen->tsc, NOUVEAU_BO_VRAM,
+ p * (32 * 8 * 4), dw * 4);
+
+ so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), dw);
+
+ for (i = 0; i < nv50->sampler_nr[p]; ++i) {
+ if (nv50->sampler[p][i])
+ so_datap(so, nv50->sampler[p][i]->tsc, 8);
+ else {
+ for (j = 0; j < 8; ++j) /* you get punished */
+ so_data(so, 0); /* ... for leaving holes */
+ }
+ }
+}
+
+static void
nv50_state_emit(struct nv50_context *nv50)
{
struct nv50_screen *screen = nv50->screen;
struct nouveau_channel *chan = screen->base.channel;
+ /* I don't want to copy headers from the winsys. */
+ screen->cur_ctx = nv50;
+
if (nv50->pctx_id != screen->cur_pctx) {
if (nv50->state.fb)
nv50->state.dirty |= NV50_NEW_FRAMEBUFFER;
@@ -192,7 +228,8 @@ nv50_state_emit(struct nv50_context *nv50)
so_emit(chan, nv50->state.vertprog);
if (nv50->state.dirty & NV50_NEW_FRAGPROG)
so_emit(chan, nv50->state.fragprog);
- if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG))
+ if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
+ NV50_NEW_RASTERIZER))
so_emit(chan, nv50->state.programs);
if (nv50->state.dirty & NV50_NEW_RASTERIZER)
so_emit(chan, nv50->state.rast);
@@ -222,6 +259,9 @@ nv50_state_flush_notify(struct nouveau_channel *chan)
{
struct nv50_context *nv50 = chan->user_private;
+ if (nv50->state.tic_upload && !(nv50->dirty & NV50_NEW_TEXTURE))
+ so_emit(chan, nv50->state.tic_upload);
+
so_emit_reloc_markers(chan, nv50->state.fb);
so_emit_reloc_markers(chan, nv50->state.vertprog);
so_emit_reloc_markers(chan, nv50->state.fragprog);
@@ -251,14 +291,15 @@ nv50_state_validate(struct nv50_context *nv50)
if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB))
nv50_fragprog_validate(nv50);
- if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG))
+ if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
+ NV50_NEW_RASTERIZER))
nv50_linkage_validate(nv50);
if (nv50->dirty & NV50_NEW_RASTERIZER)
so_ref(nv50->rasterizer->so, &nv50->state.rast);
if (nv50->dirty & NV50_NEW_BLEND_COLOUR) {
- so = so_new(5, 0);
+ so = so_new(1, 4, 0);
so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4);
so_data (so, fui(nv50->blend_colour.color[0]));
so_data (so, fui(nv50->blend_colour.color[1]));
@@ -269,10 +310,10 @@ nv50_state_validate(struct nv50_context *nv50)
}
if (nv50->dirty & NV50_NEW_STIPPLE) {
- so = so_new(33, 0);
+ so = so_new(1, 32, 0);
so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32);
for (i = 0; i < 32; i++)
- so_data(so, nv50->stipple.stipple[i]);
+ so_data(so, util_bswap32(nv50->stipple.stipple[i]));
so_ref(so, &nv50->state.stipple);
so_ref(NULL, &so);
}
@@ -286,8 +327,8 @@ nv50_state_validate(struct nv50_context *nv50)
goto scissor_uptodate;
nv50->state.scissor_enabled = rast->scissor;
- so = so_new(3, 0);
- so_method(so, tesla, NV50TCL_SCISSOR_HORIZ, 2);
+ so = so_new(1, 2, 0);
+ so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2);
if (nv50->state.scissor_enabled) {
so_data(so, (s->maxx << 16) | s->minx);
so_data(so, (s->maxy << 16) | s->miny);
@@ -315,13 +356,13 @@ scissor_uptodate:
goto viewport_uptodate;
nv50->state.viewport_bypass = bypass;
- so = so_new(14, 0);
+ so = so_new(5, 9, 0);
if (!bypass) {
- so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE(0), 3);
+ so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3);
so_data (so, fui(nv50->viewport.translate[0]));
so_data (so, fui(nv50->viewport.translate[1]));
so_data (so, fui(nv50->viewport.translate[2]));
- so_method(so, tesla, NV50TCL_VIEWPORT_SCALE(0), 3);
+ so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3);
so_data (so, fui(nv50->viewport.scale[0]));
so_data (so, fui(nv50->viewport.scale[1]));
so_data (so, fui(nv50->viewport.scale[2]));
@@ -354,18 +395,20 @@ scissor_uptodate:
viewport_uptodate:
if (nv50->dirty & NV50_NEW_SAMPLER) {
- int i;
+ unsigned nr = 0;
- so = so_new(nv50->sampler_nr * 9 + 2, 0);
- so_method(so, tesla, NV50TCL_CB_ADDR, 1);
- so_data (so, NV50_CB_TSC);
- for (i = 0; i < nv50->sampler_nr; i++) {
- if (!nv50->sampler[i])
- continue;
+ for (i = 0; i < PIPE_SHADER_TYPES; ++i)
+ nr += nv50->sampler_nr[i];
+
+ so = so_new(1+ 5 * PIPE_SHADER_TYPES, 1+ 19 * PIPE_SHADER_TYPES
+ + nr * 8, PIPE_SHADER_TYPES * 2);
+
+ nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX);
+ nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT);
+
+ so_method(so, tesla, 0x1334, 1); /* flush TSC */
+ so_data (so, 0);
- so_method(so, tesla, NV50TCL_CB_DATA(0) | (2<<29), 8);
- so_datap (so, nv50->sampler[i]->tsc, 8);
- }
so_ref(so, &nv50->state.tsc_upload);
so_ref(NULL, &so);
}
@@ -383,3 +426,36 @@ viewport_uptodate:
return TRUE;
}
+void nv50_so_init_sifc(struct nv50_context *nv50,
+ struct nouveau_stateobj *so,
+ struct nouveau_bo *bo, unsigned reloc,
+ unsigned offset, unsigned size)
+{
+ struct nouveau_grobj *eng2d = nv50->screen->eng2d;
+
+ reloc |= NOUVEAU_BO_WR;
+
+ so_method(so, eng2d, NV50_2D_DST_FORMAT, 2);
+ so_data (so, NV50_2D_DST_FORMAT_R8_UNORM);
+ so_data (so, 1);
+ so_method(so, eng2d, NV50_2D_DST_PITCH, 5);
+ so_data (so, 262144);
+ so_data (so, 65536);
+ so_data (so, 1);
+ so_reloc (so, bo, offset, reloc | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, bo, offset, reloc | NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2);
+ so_data (so, 0);
+ so_data (so, NV50_2D_SIFC_FORMAT_R8_UNORM);
+ so_method(so, eng2d, NV50_2D_SIFC_WIDTH, 10);
+ so_data (so, size);
+ so_data (so, 1);
+ so_data (so, 0);
+ so_data (so, 1);
+ so_data (so, 0);
+ so_data (so, 1);
+ so_data (so, 0);
+ so_data (so, 0);
+ so_data (so, 0);
+ so_data (so, 0);
+}
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index 6bf6f773b0c..6378132979e 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -62,6 +62,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
return 1;
if (!bo->tile_flags) {
+ MARK_RING (chan, 9, 2); /* flush on lack of space or relocs */
BEGIN_RING(chan, eng2d, mthd, 2);
OUT_RING (chan, format);
OUT_RING (chan, 1);
@@ -72,6 +73,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
OUT_RELOCh(chan, bo, ps->offset, flags);
OUT_RELOCl(chan, bo, ps->offset, flags);
} else {
+ MARK_RING (chan, 11, 2); /* flush on lack of space or relocs */
BEGIN_RING(chan, eng2d, mthd, 5);
OUT_RING (chan, format);
OUT_RING (chan, 0);
@@ -174,11 +176,11 @@ nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
if (ret)
return;
- BEGIN_RING(chan, eng2d, 0x0580, 3);
- OUT_RING (chan, 4);
+ BEGIN_RING(chan, eng2d, NV50_2D_DRAW_SHAPE, 3);
+ OUT_RING (chan, NV50_2D_DRAW_SHAPE_RECTANGLES);
OUT_RING (chan, format);
OUT_RING (chan, value);
- BEGIN_RING(chan, eng2d, NV50_2D_RECT_X1, 4);
+ BEGIN_RING(chan, eng2d, NV50_2D_DRAW_POINT32_X(0), 4);
OUT_RING (chan, destx);
OUT_RING (chan, desty);
OUT_RING (chan, width);
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index 72d33150af1..bef548b7286 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -25,162 +25,207 @@
#include "nouveau/nouveau_stateobj.h"
+#include "util/u_format.h"
+
+#define _MIXED(pf, t0, t1, t2, t3, cr, cg, cb, ca, f) \
+{ \
+ PIPE_FORMAT_##pf, \
+ NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 | \
+ NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 | \
+ NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 | \
+ NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 | \
+ NV50TIC_0_0_FMT_##f \
+}
+
+#define _(pf, t, cr, cg, cb, ca, f) _MIXED(pf, t, t, t, t, cr, cg, cb, ca, f)
+
+struct nv50_texture_format {
+ enum pipe_format pf;
+ uint32_t hw;
+};
+
+#define NV50_TEX_FORMAT_LIST_SIZE \
+ (sizeof(nv50_tex_format_list) / sizeof(struct nv50_texture_format))
+
+static const struct nv50_texture_format nv50_tex_format_list[] =
+{
+ _(A8R8G8B8_UNORM, UNORM, C2, C1, C0, C3, 8_8_8_8),
+ _(A8R8G8B8_SRGB, UNORM, C2, C1, C0, C3, 8_8_8_8),
+ _(X8R8G8B8_UNORM, UNORM, C2, C1, C0, ONE, 8_8_8_8),
+ _(X8R8G8B8_SRGB, UNORM, C2, C1, C0, ONE, 8_8_8_8),
+ _(A1R5G5B5_UNORM, UNORM, C2, C1, C0, C3, 1_5_5_5),
+ _(A4R4G4B4_UNORM, UNORM, C2, C1, C0, C3, 4_4_4_4),
+
+ _(R5G6B5_UNORM, UNORM, C2, C1, C0, ONE, 5_6_5),
+
+ _(L8_UNORM, UNORM, C0, C0, C0, ONE, 8),
+ _(A8_UNORM, UNORM, ZERO, ZERO, ZERO, C0, 8),
+ _(I8_UNORM, UNORM, C0, C0, C0, C0, 8),
+
+ _(A8L8_UNORM, UNORM, C0, C0, C0, C1, 8_8),
+
+ _(DXT1_RGB, UNORM, C0, C1, C2, ONE, DXT1),
+ _(DXT1_RGBA, UNORM, C0, C1, C2, C3, DXT1),
+ _(DXT3_RGBA, UNORM, C0, C1, C2, C3, DXT3),
+ _(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5),
+
+ _MIXED(Z24S8_UNORM, UINT, UNORM, UINT, UINT, C1, C1, C1, ONE, 24_8),
+ _MIXED(S8Z24_UNORM, UNORM, UINT, UINT, UINT, C0, C0, C0, ONE, 8_24),
+
+ _(R16G16B16A16_SNORM, UNORM, C0, C1, C2, C3, 16_16_16_16),
+ _(R16G16B16A16_UNORM, SNORM, C0, C1, C2, C3, 16_16_16_16),
+ _(R32G32B32A32_FLOAT, FLOAT, C0, C1, C2, C3, 32_32_32_32),
+
+ _(R16G16_SNORM, SNORM, C0, C1, ZERO, ONE, 16_16),
+ _(R16G16_UNORM, UNORM, C0, C1, ZERO, ONE, 16_16),
+
+ _MIXED(Z32_FLOAT, FLOAT, UINT, UINT, UINT, C0, C0, C0, ONE, 32_DEPTH)
+
+};
+
+#undef _
+#undef _MIXED
+
static int
nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
- struct nv50_miptree *mt, int unit)
+ struct nv50_miptree *mt, int unit, unsigned p)
{
- switch (mt->base.base.format) {
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
- NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
- NV50TIC_0_0_FMT_8_8_8_8);
- break;
- case PIPE_FORMAT_X8R8G8B8_UNORM:
- so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
- NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
- NV50TIC_0_0_FMT_8_8_8_8);
- break;
- case PIPE_FORMAT_A1R5G5B5_UNORM:
- so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
- NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
- NV50TIC_0_0_FMT_1_5_5_5);
- break;
- case PIPE_FORMAT_A4R4G4B4_UNORM:
- so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
- NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
- NV50TIC_0_0_FMT_4_4_4_4);
- break;
- case PIPE_FORMAT_R5G6B5_UNORM:
- so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
- NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
- NV50TIC_0_0_FMT_5_6_5);
- break;
- case PIPE_FORMAT_L8_UNORM:
- so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
- NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
- NV50TIC_0_0_FMT_8);
- break;
- case PIPE_FORMAT_A8_UNORM:
- so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_ZERO | NV50TIC_0_0_TYPER_UNORM |
- NV50TIC_0_0_MAPG_ZERO | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_ZERO | NV50TIC_0_0_TYPEB_UNORM |
- NV50TIC_0_0_FMT_8);
- break;
- case PIPE_FORMAT_I8_UNORM:
- so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
- NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
- NV50TIC_0_0_FMT_8);
- break;
- case PIPE_FORMAT_A8L8_UNORM:
- so_data(so, NV50TIC_0_0_MAPA_C1 | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
- NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
- NV50TIC_0_0_FMT_8_8);
- break;
- case PIPE_FORMAT_DXT1_RGB:
- so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
- NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
- NV50TIC_0_0_FMT_DXT1);
+ unsigned i;
+ uint32_t mode;
+ const struct util_format_description *desc;
+
+ for (i = 0; i < NV50_TEX_FORMAT_LIST_SIZE; i++)
+ if (nv50_tex_format_list[i].pf == mt->base.base.format)
+ break;
+ if (i == NV50_TEX_FORMAT_LIST_SIZE)
+ return 1;
+
+ if (nv50->sampler[p][unit]->normalized)
+ mode = 0x50001000 | (1 << 31);
+ else {
+ mode = 0x50001000 | (7 << 14);
+ assert(mt->base.base.target == PIPE_TEXTURE_2D);
+ }
+
+ mode |= ((mt->base.bo->tile_mode & 0x0f) << 22) |
+ ((mt->base.bo->tile_mode & 0xf0) << 21);
+
+ desc = util_format_description(mt->base.base.format);
+ assert(desc);
+
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+ mode |= 0x0400;
+
+ switch (mt->base.base.target) {
+ case PIPE_TEXTURE_1D:
break;
- case PIPE_FORMAT_DXT1_RGBA:
- so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
- NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
- NV50TIC_0_0_FMT_DXT1);
+ case PIPE_TEXTURE_2D:
+ mode |= (1 << 14);
break;
- case PIPE_FORMAT_DXT3_RGBA:
- so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
- NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
- NV50TIC_0_0_FMT_DXT3);
+ case PIPE_TEXTURE_3D:
+ mode |= (2 << 14);
break;
- case PIPE_FORMAT_DXT5_RGBA:
- so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
- NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
- NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
- NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
- NV50TIC_0_0_FMT_DXT5);
+ case PIPE_TEXTURE_CUBE:
+ mode |= (3 << 14);
break;
default:
- return 1;
+ assert(!"unsupported texture target");
+ break;
}
+ so_data (so, nv50_tex_format_list[i].hw);
so_reloc(so, mt->base.bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
- NOUVEAU_BO_RD, 0, 0);
- if (nv50->sampler[unit]->normalized)
- so_data (so, 0xd0005000 | mt->base.bo->tile_mode << 22);
- else
- so_data (so, 0x5001d000 | mt->base.bo->tile_mode << 22);
+ NOUVEAU_BO_RD, 0, 0);
+ so_data (so, mode);
so_data (so, 0x00300000);
- so_data (so, mt->base.base.width[0]);
+ so_data (so, mt->base.base.width0 | (1 << 31));
so_data (so, (mt->base.base.last_level << 28) |
- (mt->base.base.depth[0] << 16) | mt->base.base.height[0]);
+ (mt->base.base.depth0 << 16) | mt->base.base.height0);
so_data (so, 0x03000000);
so_data (so, mt->base.base.last_level << 4);
return 0;
}
-void
-nv50_tex_validate(struct nv50_context *nv50)
+#ifndef NV50TCL_BIND_TIC
+#define NV50TCL_BIND_TIC(n) (0x1448 + 8 * n)
+#endif
+
+static boolean
+nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so,
+ unsigned p)
{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_stateobj *so;
- int unit, push;
+ static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2 };
- push = nv50->miptree_nr * 9 + 2;
- push += MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2;
+ struct nouveau_grobj *eng2d = nv50->screen->eng2d;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ unsigned unit, j, p_hw = p_remap[p];
- so = so_new(push, nv50->miptree_nr * 2);
- so_method(so, tesla, NV50TCL_CB_ADDR, 1);
- so_data (so, NV50_CB_TIC);
- for (unit = 0; unit < nv50->miptree_nr; unit++) {
- struct nv50_miptree *mt = nv50->miptree[unit];
+ nv50_so_init_sifc(nv50, so, nv50->screen->tic, NOUVEAU_BO_VRAM,
+ p * (32 * 8 * 4), nv50->miptree_nr[p] * 8 * 4);
- if (!mt)
- continue;
+ for (unit = 0; unit < nv50->miptree_nr[p]; ++unit) {
+ struct nv50_miptree *mt = nv50->miptree[p][unit];
- so_method(so, tesla, NV50TCL_CB_DATA(0) | 0x40000000, 8);
- if (nv50_tex_construct(nv50, so, mt, unit)) {
- NOUVEAU_ERR("failed tex validate\n");
- so_ref(NULL, &so);
- return;
+ so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8);
+ if (mt) {
+ if (nv50_tex_construct(nv50, so, mt, unit, p))
+ return FALSE;
+ /* Set TEX insn $t src binding $unit in program type p
+ * to TIC, TSC entry (32 * p + unit), mark valid (1).
+ */
+ so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+ so_data (so, ((32 * p + unit) << 9) | (unit << 1) | 1);
+ } else {
+ for (j = 0; j < 8; ++j)
+ so_data(so, 0);
+ so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+ so_data (so, (unit << 1) | 0);
}
+ }
- so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1);
- so_data (so, (unit << NV50TCL_SET_SAMPLER_TEX_TIC_SHIFT) |
- (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) |
- NV50TCL_SET_SAMPLER_TEX_VALID);
+ for (; unit < nv50->state.miptree_nr[p]; unit++) {
+ /* Make other bindings invalid. */
+ so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+ so_data (so, (unit << 1) | 0);
}
- for (; unit < nv50->state.miptree_nr; unit++) {
- so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1);
- so_data (so,
- (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | 0);
+ nv50->state.miptree_nr[p] = nv50->miptree_nr[p];
+ return TRUE;
+}
+
+void
+nv50_tex_validate(struct nv50_context *nv50)
+{
+ struct nouveau_stateobj *so;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ unsigned p, start, push, nrlc;
+
+ for (nrlc = 0, start = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) {
+ start += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]);
+ push += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]);
+ nrlc += nv50->miptree_nr[p];
}
+ start = start * 2 + 4 * PIPE_SHADER_TYPES + 2;
+ push = push * 9 + 19 * PIPE_SHADER_TYPES + 2;
+ nrlc = nrlc * 2 + 2 * PIPE_SHADER_TYPES;
+
+ so = so_new(start, push, nrlc);
+
+ if (nv50_validate_textures(nv50, so, PIPE_SHADER_VERTEX) == FALSE ||
+ nv50_validate_textures(nv50, so, PIPE_SHADER_FRAGMENT) == FALSE) {
+ so_ref(NULL, &so);
+
+ NOUVEAU_ERR("failed tex validate\n");
+ return;
+ }
+
+ /* not sure if the following really do what I think: */
+ so_method(so, tesla, 0x1330, 1); /* flush TIC */
+ so_data (so, 0);
+ so_method(so, tesla, 0x1338, 1); /* flush texture caches */
+ so_data (so, 0x20);
so_ref(so, &nv50->state.tic_upload);
so_ref(NULL, &so);
- nv50->state.miptree_nr = nv50->miptree_nr;
}
-
diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h
index 207fb039f70..b870302019a 100644
--- a/src/gallium/drivers/nv50/nv50_texture.h
+++ b/src/gallium/drivers/nv50/nv50_texture.h
@@ -38,18 +38,26 @@
#define NV50TIC_0_0_TYPEA_MASK 0x00038000
#define NV50TIC_0_0_TYPEA_UNORM 0x00010000
#define NV50TIC_0_0_TYPEA_SNORM 0x00008000
+#define NV50TIC_0_0_TYPEA_SINT 0x00018000
+#define NV50TIC_0_0_TYPEA_UINT 0x00020000
#define NV50TIC_0_0_TYPEA_FLOAT 0x00038000
#define NV50TIC_0_0_TYPEB_MASK 0x00007000
#define NV50TIC_0_0_TYPEB_UNORM 0x00002000
#define NV50TIC_0_0_TYPEB_SNORM 0x00001000
+#define NV50TIC_0_0_TYPEB_SINT 0x00003000
+#define NV50TIC_0_0_TYPEB_UINT 0x00004000
#define NV50TIC_0_0_TYPEB_FLOAT 0x00007000
#define NV50TIC_0_0_TYPEG_MASK 0x00000e00
#define NV50TIC_0_0_TYPEG_UNORM 0x00000400
#define NV50TIC_0_0_TYPEG_SNORM 0x00000200
+#define NV50TIC_0_0_TYPEG_SINT 0x00000600
+#define NV50TIC_0_0_TYPEG_UINT 0x00000800
#define NV50TIC_0_0_TYPEG_FLOAT 0x00000e00
#define NV50TIC_0_0_TYPER_MASK 0x000001c0
#define NV50TIC_0_0_TYPER_UNORM 0x00000080
#define NV50TIC_0_0_TYPER_SNORM 0x00000040
+#define NV50TIC_0_0_TYPER_SINT 0x000000c0
+#define NV50TIC_0_0_TYPER_UINT 0x00000100
#define NV50TIC_0_0_TYPER_FLOAT 0x000001c0
#define NV50TIC_0_0_FMT_MASK 0x0000003f
#define NV50TIC_0_0_FMT_32_32_32_32 0x00000001
@@ -57,6 +65,7 @@
#define NV50TIC_0_0_FMT_32_32 0x00000004
#define NV50TIC_0_0_FMT_8_8_8_8 0x00000008
#define NV50TIC_0_0_FMT_2_10_10_10 0x00000009
+#define NV50TIC_0_0_FMT_16_16 0x0000000c
#define NV50TIC_0_0_FMT_32 0x0000000f
#define NV50TIC_0_0_FMT_4_4_4_4 0x00000012
/* #define NV50TIC_0_0_FMT_1_5_5_5 0x00000013 */
@@ -65,12 +74,17 @@
#define NV50TIC_0_0_FMT_8_8 0x00000018
#define NV50TIC_0_0_FMT_16 0x0000001b
#define NV50TIC_0_0_FMT_8 0x0000001d
+#define NV50TIC_0_0_FMT_5_9_9_9 0x00000020
#define NV50TIC_0_0_FMT_10_11_11 0x00000021
#define NV50TIC_0_0_FMT_DXT1 0x00000024
#define NV50TIC_0_0_FMT_DXT3 0x00000025
#define NV50TIC_0_0_FMT_DXT5 0x00000026
#define NV50TIC_0_0_FMT_RGTC1 0x00000027
#define NV50TIC_0_0_FMT_RGTC2 0x00000028
+#define NV50TIC_0_0_FMT_24_8 0x00000029
+#define NV50TIC_0_0_FMT_8_24 0x0000002a
+#define NV50TIC_0_0_FMT_32_DEPTH 0x0000002f
+#define NV50TIC_0_0_FMT_32_8 0x00000030
#define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff
#define NV50TIC_0_1_OFFSET_LOW_SHIFT 0
@@ -133,6 +147,8 @@
#define NV50TSC_1_1_MIPF_NEAREST 0x00000080
#define NV50TSC_1_1_MIPF_LINEAR 0x000000c0
#define NV50TSC_1_1_LOD_BIAS_MASK 0x01fff000
+#define NV50TSC_1_1_UNKN_ANISO_15 0x10000000
+#define NV50TSC_1_1_UNKN_ANISO_35 0x18000000
#define NV50TSC_1_2_MIN_LOD_MASK 0x00000f00
#define NV50TSC_1_2_MAX_LOD_MASK 0x00f00000
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 9c289026bbb..a2f1db2914c 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -1,6 +1,8 @@
#include "pipe/p_context.h"
#include "pipe/p_inlines.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
#include "nv50_context.h"
@@ -12,18 +14,22 @@ struct nv50_transfer {
int level_pitch;
int level_width;
int level_height;
+ int level_depth;
int level_x;
int level_y;
+ int level_z;
+ unsigned nblocksx;
+ unsigned nblocksy;
};
static void
nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
struct nouveau_bo *src_bo, unsigned src_offset,
int src_pitch, unsigned src_tile_mode,
- int sx, int sy, int sw, int sh,
+ int sx, int sy, int sz, int sw, int sh, int sd,
struct nouveau_bo *dst_bo, unsigned dst_offset,
int dst_pitch, unsigned dst_tile_mode,
- int dx, int dy, int dw, int dh,
+ int dx, int dy, int dz, int dw, int dh, int dd,
int cpp, int width, int height,
unsigned src_reloc, unsigned dst_reloc)
{
@@ -41,7 +47,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1);
OUT_RING (chan, 1);
BEGIN_RING(chan, m2mf,
- NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1);
+ NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1);
OUT_RING (chan, src_pitch);
src_offset += (sy * src_pitch) + (sx * cpp);
} else {
@@ -51,8 +57,8 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
OUT_RING (chan, src_tile_mode << 4);
OUT_RING (chan, sw * cpp);
OUT_RING (chan, sh);
- OUT_RING (chan, 1);
- OUT_RING (chan, 0);
+ OUT_RING (chan, sd);
+ OUT_RING (chan, sz); /* copying only 1 zslice per call */
}
if (!dst_bo->tile_flags) {
@@ -60,7 +66,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1);
OUT_RING (chan, 1);
BEGIN_RING(chan, m2mf,
- NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1);
+ NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1);
OUT_RING (chan, dst_pitch);
dst_offset += (dy * dst_pitch) + (dx * cpp);
} else {
@@ -70,20 +76,20 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
OUT_RING (chan, dst_tile_mode << 4);
OUT_RING (chan, dw * cpp);
OUT_RING (chan, dh);
- OUT_RING (chan, 1);
- OUT_RING (chan, 0);
+ OUT_RING (chan, dd);
+ OUT_RING (chan, dz); /* copying only 1 zslice per call */
}
while (height) {
int line_count = height > 2047 ? 2047 : height;
- WAIT_RING (chan, 15);
+ MARK_RING (chan, 15, 4); /* flush on lack of space or relocs */
BEGIN_RING(chan, m2mf,
NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH, 2);
OUT_RELOCh(chan, src_bo, src_offset, src_reloc);
OUT_RELOCh(chan, dst_bo, dst_offset, dst_reloc);
BEGIN_RING(chan, m2mf,
- NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2);
+ NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2);
OUT_RELOCl(chan, src_bo, src_offset, src_reloc);
OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc);
if (src_bo->tile_flags) {
@@ -101,7 +107,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
dst_offset += (line_count * dst_pitch);
}
BEGIN_RING(chan, m2mf,
- NV50_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4);
+ NV04_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4);
OUT_RING (chan, width * cpp);
OUT_RING (chan, line_count);
OUT_RING (chan, 0x00000101);
@@ -124,52 +130,54 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
struct nv50_miptree *mt = nv50_miptree(pt);
struct nv50_miptree_level *lvl = &mt->level[level];
struct nv50_transfer *tx;
- unsigned image = 0;
+ unsigned nx, ny, image = 0;
int ret;
if (pt->target == PIPE_TEXTURE_CUBE)
image = face;
- else
- if (pt->target == PIPE_TEXTURE_3D)
- image = zslice;
tx = CALLOC_STRUCT(nv50_transfer);
if (!tx)
return NULL;
pipe_texture_reference(&tx->base.texture, pt);
- tx->base.format = pt->format;
+ tx->nblocksx = util_format_get_nblocksx(pt->format, u_minify(pt->width0, level));
+ tx->nblocksy = util_format_get_nblocksy(pt->format, u_minify(pt->height0, level));
tx->base.width = w;
tx->base.height = h;
- tx->base.block = pt->block;
- tx->base.nblocksx = pt->nblocksx[level];
- tx->base.nblocksy = pt->nblocksy[level];
- tx->base.stride = (w * pt->block.size);
+ tx->base.stride = tx->nblocksx * util_format_get_blocksize(pt->format);
tx->base.usage = usage;
tx->level_pitch = lvl->pitch;
- tx->level_width = mt->base.base.width[level];
- tx->level_height = mt->base.base.height[level];
+ tx->level_width = u_minify(mt->base.base.width0, level);
+ tx->level_height = u_minify(mt->base.base.height0, level);
+ tx->level_depth = u_minify(mt->base.base.depth0, level);
tx->level_offset = lvl->image_offset[image];
tx->level_tiling = lvl->tile_mode;
- tx->level_x = x;
- tx->level_y = y;
+ tx->level_z = zslice;
+ tx->level_x = util_format_get_nblocksx(pt->format, x);
+ tx->level_y = util_format_get_nblocksy(pt->format, y);
ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
- w * pt->block.size * h, &tx->bo);
+ tx->nblocksy * tx->base.stride, &tx->bo);
if (ret) {
FREE(tx);
return NULL;
}
if (usage & PIPE_TRANSFER_READ) {
+ nx = util_format_get_nblocksx(pt->format, tx->base.width);
+ ny = util_format_get_nblocksy(pt->format, tx->base.height);
+
nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset,
tx->level_pitch, tx->level_tiling,
- x, y,
- tx->level_width, tx->level_height,
- tx->bo, 0, tx->base.stride,
- tx->bo->tile_mode, 0, 0,
- tx->base.width, tx->base.height,
- tx->base.block.size, w, h,
+ x, y, zslice,
+ tx->nblocksx, tx->nblocksy,
+ tx->level_depth,
+ tx->bo, 0,
+ tx->base.stride, tx->bo->tile_mode,
+ 0, 0, 0,
+ tx->nblocksx, tx->nblocksy, 1,
+ util_format_get_blocksize(pt->format), nx, ny,
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
NOUVEAU_BO_GART);
}
@@ -182,18 +190,24 @@ nv50_transfer_del(struct pipe_transfer *ptx)
{
struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
struct nv50_miptree *mt = nv50_miptree(ptx->texture);
+ struct pipe_texture *pt = ptx->texture;
+
+ unsigned nx = util_format_get_nblocksx(pt->format, tx->base.width);
+ unsigned ny = util_format_get_nblocksy(pt->format, tx->base.height);
if (ptx->usage & PIPE_TRANSFER_WRITE) {
- struct pipe_screen *pscreen = ptx->texture->screen;
- nv50_transfer_rect_m2mf(pscreen, tx->bo, 0, tx->base.stride,
- tx->bo->tile_mode, 0, 0,
- tx->base.width, tx->base.height,
+ struct pipe_screen *pscreen = pt->screen;
+
+ nv50_transfer_rect_m2mf(pscreen, tx->bo, 0,
+ tx->base.stride, tx->bo->tile_mode,
+ 0, 0, 0,
+ tx->nblocksx, tx->nblocksy, 1,
mt->base.bo, tx->level_offset,
tx->level_pitch, tx->level_tiling,
- tx->level_x, tx->level_y,
- tx->level_width, tx->level_height,
- tx->base.block.size, tx->base.width,
- tx->base.height,
+ tx->level_x, tx->level_y, tx->level_z,
+ tx->nblocksx, tx->nblocksy,
+ tx->level_depth,
+ util_format_get_blocksize(pt->format), nx, ny,
NOUVEAU_BO_GART, NOUVEAU_BO_VRAM |
NOUVEAU_BO_GART);
}
@@ -237,3 +251,89 @@ nv50_transfer_init_screen_functions(struct pipe_screen *pscreen)
pscreen->transfer_map = nv50_transfer_map;
pscreen->transfer_unmap = nv50_transfer_unmap;
}
+
+void
+nv50_upload_sifc(struct nv50_context *nv50,
+ struct nouveau_bo *bo, unsigned dst_offset, unsigned reloc,
+ unsigned dst_format, int dst_w, int dst_h, int dst_pitch,
+ void *src, unsigned src_format, int src_pitch,
+ int x, int y, int w, int h, int cpp)
+{
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nouveau_grobj *eng2d = nv50->screen->eng2d;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ unsigned line_dwords = (w * cpp + 3) / 4;
+
+ reloc |= NOUVEAU_BO_WR;
+
+ MARK_RING (chan, 32, 2); /* flush on lack of space or relocs */
+
+ if (bo->tile_flags) {
+ BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5);
+ OUT_RING (chan, dst_format);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, bo->tile_mode << 4);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ } else {
+ BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2);
+ OUT_RING (chan, dst_format);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 1);
+ OUT_RING (chan, dst_pitch);
+ }
+
+ BEGIN_RING(chan, eng2d, NV50_2D_DST_WIDTH, 4);
+ OUT_RING (chan, dst_w);
+ OUT_RING (chan, dst_h);
+ OUT_RELOCh(chan, bo, dst_offset, reloc);
+ OUT_RELOCl(chan, bo, dst_offset, reloc);
+
+ /* NV50_2D_OPERATION_SRCCOPY assumed already set */
+
+ BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, src_format);
+ BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10);
+ OUT_RING (chan, w);
+ OUT_RING (chan, h);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, x);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, y);
+
+ while (h--) {
+ const uint32_t *p = src;
+ unsigned count = line_dwords;
+
+ while (count) {
+ unsigned nr = MIN2(count, 1792);
+
+ if (chan->pushbuf->remaining <= nr) {
+ FIRE_RING (chan);
+
+ BEGIN_RING(chan, eng2d,
+ NV50_2D_DST_ADDRESS_HIGH, 2);
+ OUT_RELOCh(chan, bo, dst_offset, reloc);
+ OUT_RELOCl(chan, bo, dst_offset, reloc);
+ }
+ assert(chan->pushbuf->remaining > nr);
+
+ BEGIN_RING(chan, eng2d,
+ NV50_2D_SIFC_DATA | (2 << 29), nr);
+ OUT_RINGp (chan, p, nr);
+
+ p += nr;
+ count -= nr;
+ }
+
+ src += src_pitch;
+ }
+
+ BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1);
+ OUT_RING (chan, 0);
+}
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index eeed148c7b9..f2e510fba61 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -24,8 +24,22 @@
#include "pipe/p_state.h"
#include "pipe/p_inlines.h"
+#include "util/u_format.h"
+
#include "nv50_context.h"
+static boolean
+nv50_push_elements_u08(struct nv50_context *, uint8_t *, unsigned);
+
+static boolean
+nv50_push_elements_u16(struct nv50_context *, uint16_t *, unsigned);
+
+static boolean
+nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned);
+
+static boolean
+nv50_push_arrays(struct nv50_context *, unsigned, unsigned);
+
static INLINE unsigned
nv50_prim(unsigned mode)
{
@@ -50,18 +64,25 @@ nv50_prim(unsigned mode)
}
static INLINE uint32_t
-nv50_vbo_type_to_hw(unsigned type)
+nv50_vbo_type_to_hw(enum pipe_format format)
{
- switch (type) {
- case PIPE_FORMAT_TYPE_FLOAT:
+ const struct util_format_description *desc;
+
+ desc = util_format_description(format);
+ assert(desc);
+
+ switch (desc->channel[0].type) {
+ case UTIL_FORMAT_TYPE_FLOAT:
return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT;
- case PIPE_FORMAT_TYPE_UNORM:
- return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM;
- case PIPE_FORMAT_TYPE_SNORM:
- return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM;
- case PIPE_FORMAT_TYPE_USCALED:
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ if (desc->channel[0].normalized) {
+ return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM;
+ }
return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED;
- case PIPE_FORMAT_TYPE_SSCALED:
+ case UTIL_FORMAT_TYPE_SIGNED:
+ if (desc->channel[0].normalized) {
+ return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM;
+ }
return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED;
/*
case PIPE_FORMAT_TYPE_UINT:
@@ -78,19 +99,19 @@ nv50_vbo_size_to_hw(unsigned size, unsigned nr_c)
{
static const uint32_t hw_values[] = {
0, 0, 0, 0,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8_8,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16_16,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16,
0, 0, 0, 0,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32,
- NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32_32 };
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32,
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 };
/* we'd also have R11G11B10 and R10G10B10A2 */
@@ -108,9 +129,15 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
{
uint32_t hw_type, hw_size;
enum pipe_format pf = ve->src_format;
- unsigned size = pf_size_x(pf) << pf_exp2(pf);
+ const struct util_format_description *desc;
+ unsigned size;
+
+ desc = util_format_description(pf);
+ assert(desc);
- hw_type = nv50_vbo_type_to_hw(pf_type(pf));
+ size = util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0);
+
+ hw_type = nv50_vbo_type_to_hw(pf);
hw_size = nv50_vbo_size_to_hw(size, ve->nr_components);
if (!hw_type || !hw_size) {
@@ -119,19 +146,20 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
return 0x24e80000;
}
- if (pf_swizzle_x(pf) == 2) /* BGRA */
+ if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_Z) /* BGRA */
hw_size |= (1 << 31); /* no real swizzle bits :-( */
return (hw_type | hw_size);
}
-boolean
+void
nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
unsigned count)
{
struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_channel *chan = nv50->screen->tesla->channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
+ boolean ret;
nv50_state_validate(nv50);
@@ -139,24 +167,27 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
OUT_RING (chan, 0);
BEGIN_RING(chan, tesla, 0x142c, 1);
OUT_RING (chan, 0);
- BEGIN_RING(chan, tesla, 0x1440, 1);
- OUT_RING (chan, 0);
- BEGIN_RING(chan, tesla, 0x1334, 1);
- OUT_RING (chan, 0);
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
OUT_RING (chan, nv50_prim(mode));
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
- OUT_RING (chan, start);
- OUT_RING (chan, count);
+
+ if (nv50->vbo_fifo)
+ ret = nv50_push_arrays(nv50, start, count);
+ else {
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
+ OUT_RING (chan, start);
+ OUT_RING (chan, count);
+ ret = TRUE;
+ }
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
- pipe->flush(pipe, 0, NULL);
- return TRUE;
+ /* XXX: not sure what to do if ret != TRUE: flush and retry?
+ */
+ assert(ret);
}
-static INLINE void
+static INLINE boolean
nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
unsigned start, unsigned count)
{
@@ -165,8 +196,11 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
map += start;
+ if (nv50->vbo_fifo)
+ return nv50_push_elements_u08(nv50, map, count);
+
if (count & 1) {
- BEGIN_RING(chan, tesla, 0x15e8, 1);
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
OUT_RING (chan, map[0]);
map++;
count--;
@@ -176,16 +210,17 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
unsigned nr = count > 2046 ? 2046 : count;
int i;
- BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1);
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1);
for (i = 0; i < nr; i += 2)
OUT_RING (chan, (map[i + 1] << 16) | map[i]);
count -= nr;
map += nr;
}
+ return TRUE;
}
-static INLINE void
+static INLINE boolean
nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
unsigned start, unsigned count)
{
@@ -194,8 +229,11 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
map += start;
+ if (nv50->vbo_fifo)
+ return nv50_push_elements_u16(nv50, map, count);
+
if (count & 1) {
- BEGIN_RING(chan, tesla, 0x15e8, 1);
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
OUT_RING (chan, map[0]);
map++;
count--;
@@ -205,16 +243,17 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
unsigned nr = count > 2046 ? 2046 : count;
int i;
- BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1);
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1);
for (i = 0; i < nr; i += 2)
OUT_RING (chan, (map[i + 1] << 16) | map[i]);
count -= nr;
map += nr;
}
+ return TRUE;
}
-static INLINE void
+static INLINE boolean
nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
unsigned start, unsigned count)
{
@@ -223,18 +262,22 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
map += start;
+ if (nv50->vbo_fifo)
+ return nv50_push_elements_u32(nv50, map, count);
+
while (count) {
unsigned nr = count > 2047 ? 2047 : count;
- BEGIN_RING(chan, tesla, 0x400015e8, nr);
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x40000000, nr);
OUT_RINGp (chan, map, nr);
count -= nr;
map += nr;
}
+ return TRUE;
}
-boolean
+void
nv50_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer, unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
@@ -244,6 +287,7 @@ nv50_draw_elements(struct pipe_context *pipe,
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct pipe_screen *pscreen = pipe->screen;
void *map;
+ boolean ret;
map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
@@ -258,23 +302,27 @@ nv50_draw_elements(struct pipe_context *pipe,
OUT_RING (chan, nv50_prim(mode));
switch (indexSize) {
case 1:
- nv50_draw_elements_inline_u08(nv50, map, start, count);
+ ret = nv50_draw_elements_inline_u08(nv50, map, start, count);
break;
case 2:
- nv50_draw_elements_inline_u16(nv50, map, start, count);
+ ret = nv50_draw_elements_inline_u16(nv50, map, start, count);
break;
case 4:
- nv50_draw_elements_inline_u32(nv50, map, start, count);
+ ret = nv50_draw_elements_inline_u32(nv50, map, start, count);
break;
default:
assert(0);
+ ret = FALSE;
+ break;
}
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
pipe_buffer_unmap(pscreen, indexBuffer);
- pipe->flush(pipe, 0, NULL);
- return TRUE;
+
+ /* XXX: what to do if ret != TRUE? Flush and retry?
+ */
+ assert(ret);
}
static INLINE boolean
@@ -290,9 +338,13 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
float *v;
int ret;
enum pipe_format pf = ve->src_format;
+ const struct util_format_description *desc;
- if ((pf_type(pf) != PIPE_FORMAT_TYPE_FLOAT) ||
- (pf_size_x(pf) << pf_exp2(pf)) != 32)
+ desc = util_format_description(pf);
+ assert(desc);
+
+ if ((desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT) ||
+ util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0) != 32)
return FALSE;
ret = nouveau_bo_map(bo, NOUVEAU_BO_RD);
@@ -302,7 +354,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
so = *pso;
if (!so)
- *pso = so = so_new(nv50->vtxelt_nr * 5, 0);
+ *pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0);
switch (ve->nr_components) {
case 4:
@@ -324,6 +376,10 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
so_data (so, fui(v[1]));
break;
case 1:
+ if (attrib == nv50->vertprog->cfg.edgeflag_in) {
+ so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+ so_data (so, v[0] ? 1 : 0);
+ }
so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1);
so_data (so, fui(v[0]));
break;
@@ -341,17 +397,27 @@ nv50_vbo_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nouveau_stateobj *vtxbuf, *vtxfmt, *vtxattr;
- unsigned i;
+ unsigned i, n_ve;
/* don't validate if Gallium took away our buffers */
if (nv50->vtxbuf_nr == 0)
return;
+ nv50->vbo_fifo = 0;
+
+ for (i = 0; i < nv50->vtxbuf_nr; ++i)
+ if (nv50->vtxbuf[i].stride &&
+ !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX))
+ nv50->vbo_fifo = 0xffff;
+
+ if (nv50->vertprog->cfg.edgeflag_in < 16)
+ nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */
+
+ n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr);
vtxattr = NULL;
- vtxbuf = so_new(nv50->vtxelt_nr * 7, nv50->vtxelt_nr * 4);
- vtxfmt = so_new(nv50->vtxelt_nr + 1, 0);
- so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0),
- nv50->vtxelt_nr);
+ vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt_nr * 4);
+ vtxfmt = so_new(1, n_ve, 0);
+ so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve);
for (i = 0; i < nv50->vtxelt_nr; i++) {
struct pipe_vertex_element *ve = &nv50->vtxelt[i];
@@ -367,10 +433,19 @@ nv50_vbo_validate(struct nv50_context *nv50)
so_method(vtxbuf, tesla,
NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
so_data (vtxbuf, 0);
+
+ nv50->vbo_fifo &= ~(1 << i);
continue;
}
so_data(vtxfmt, hw | i);
+ if (nv50->vbo_fifo) {
+ so_method(vtxbuf, tesla,
+ NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
+ so_data (vtxbuf, 0);
+ continue;
+ }
+
so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3);
so_data (vtxbuf, 0x20000000 | vb->stride);
so_reloc (vtxbuf, bo, vb->buffer_offset +
@@ -381,7 +456,7 @@ nv50_vbo_validate(struct nv50_context *nv50)
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
/* vertex array limits */
- so_method(vtxbuf, tesla, 0x1080 + (i * 8), 2);
+ so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2);
so_reloc (vtxbuf, bo, vb->buffer->size - 1,
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
NOUVEAU_BO_HIGH, 0, 0);
@@ -389,6 +464,13 @@ nv50_vbo_validate(struct nv50_context *nv50)
NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
NOUVEAU_BO_LOW, 0, 0);
}
+ for (; i < n_ve; ++i) {
+ so_data (vtxfmt, 0x7e080010);
+
+ so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
+ so_data (vtxbuf, 0);
+ }
+ nv50->state.vtxelt_nr = nv50->vtxelt_nr;
so_ref (vtxfmt, &nv50->state.vtxfmt);
so_ref (vtxbuf, &nv50->state.vtxbuf);
@@ -398,3 +480,359 @@ nv50_vbo_validate(struct nv50_context *nv50)
so_ref (NULL, &vtxattr);
}
+typedef void (*pfn_push)(struct nouveau_channel *, void *);
+
+struct nv50_vbo_emitctx
+{
+ pfn_push push[16];
+ void *map[16];
+ unsigned stride[16];
+ unsigned nr_ve;
+ unsigned vtx_dwords;
+ unsigned vtx_max;
+
+ float edgeflag;
+ unsigned ve_edgeflag;
+};
+
+static INLINE void
+emit_vtx_next(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit)
+{
+ unsigned i;
+
+ for (i = 0; i < emit->nr_ve; ++i) {
+ emit->push[i](chan, emit->map[i]);
+ emit->map[i] += emit->stride[i];
+ }
+}
+
+static INLINE void
+emit_vtx(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit,
+ uint32_t vi)
+{
+ unsigned i;
+
+ for (i = 0; i < emit->nr_ve; ++i)
+ emit->push[i](chan, emit->map[i] + emit->stride[i] * vi);
+}
+
+static INLINE boolean
+nv50_map_vbufs(struct nv50_context *nv50)
+{
+ int i;
+
+ for (i = 0; i < nv50->vtxbuf_nr; ++i) {
+ struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
+ unsigned size, delta;
+
+ if (nouveau_bo(vb->buffer)->map)
+ continue;
+
+ size = vb->stride * (vb->max_index + 1);
+ delta = vb->buffer_offset;
+
+ if (!size)
+ size = vb->buffer->size - vb->buffer_offset;
+
+ if (nouveau_bo_map_range(nouveau_bo(vb->buffer),
+ delta, size, NOUVEAU_BO_RD))
+ break;
+ }
+
+ if (i == nv50->vtxbuf_nr)
+ return TRUE;
+ for (; i >= 0; --i)
+ nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
+ return FALSE;
+}
+
+static INLINE void
+nv50_unmap_vbufs(struct nv50_context *nv50)
+{
+ unsigned i;
+
+ for (i = 0; i < nv50->vtxbuf_nr; ++i)
+ if (nouveau_bo(nv50->vtxbuf[i].buffer)->map)
+ nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
+}
+
+static void
+emit_b32_1(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b32_2(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+ OUT_RING(chan, v[1]);
+}
+
+static void
+emit_b32_3(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+ OUT_RING(chan, v[1]);
+ OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b32_4(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+ OUT_RING(chan, v[1]);
+ OUT_RING(chan, v[2]);
+ OUT_RING(chan, v[3]);
+}
+
+static void
+emit_b16_1(struct nouveau_channel *chan, void *data)
+{
+ uint16_t *v = data;
+
+ OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b16_3(struct nouveau_channel *chan, void *data)
+{
+ uint16_t *v = data;
+
+ OUT_RING(chan, (v[1] << 16) | v[0]);
+ OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b08_1(struct nouveau_channel *chan, void *data)
+{
+ uint8_t *v = data;
+
+ OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b08_3(struct nouveau_channel *chan, void *data)
+{
+ uint8_t *v = data;
+
+ OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
+}
+
+static boolean
+emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
+ unsigned start)
+{
+ unsigned i;
+
+ if (nv50_map_vbufs(nv50) == FALSE)
+ return FALSE;
+
+ emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in;
+
+ emit->edgeflag = 0.5f;
+ emit->nr_ve = 0;
+ emit->vtx_dwords = 0;
+
+ for (i = 0; i < nv50->vtxelt_nr; ++i) {
+ struct pipe_vertex_element *ve;
+ struct pipe_vertex_buffer *vb;
+ unsigned n, size;
+ const struct util_format_description *desc;
+
+ ve = &nv50->vtxelt[i];
+ vb = &nv50->vtxbuf[ve->vertex_buffer_index];
+ if (!(nv50->vbo_fifo & (1 << i)))
+ continue;
+ n = emit->nr_ve++;
+
+ emit->stride[n] = vb->stride;
+ emit->map[n] = nouveau_bo(vb->buffer)->map +
+ (start * vb->stride + ve->src_offset);
+
+ desc = util_format_description(ve->src_format);
+ assert(desc);
+
+ size = util_format_get_component_bits(
+ ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0);
+
+ assert(ve->nr_components > 0 && ve->nr_components <= 4);
+
+ /* It shouldn't be necessary to push the implicit 1s
+ * for case 3 and size 8 cases 1, 2, 3.
+ */
+ switch (size) {
+ default:
+ NOUVEAU_ERR("unsupported vtxelt size: %u\n", size);
+ return FALSE;
+ case 32:
+ switch (ve->nr_components) {
+ case 1: emit->push[n] = emit_b32_1; break;
+ case 2: emit->push[n] = emit_b32_2; break;
+ case 3: emit->push[n] = emit_b32_3; break;
+ case 4: emit->push[n] = emit_b32_4; break;
+ }
+ emit->vtx_dwords += ve->nr_components;
+ break;
+ case 16:
+ switch (ve->nr_components) {
+ case 1: emit->push[n] = emit_b16_1; break;
+ case 2: emit->push[n] = emit_b32_1; break;
+ case 3: emit->push[n] = emit_b16_3; break;
+ case 4: emit->push[n] = emit_b32_2; break;
+ }
+ emit->vtx_dwords += (ve->nr_components + 1) >> 1;
+ break;
+ case 8:
+ switch (ve->nr_components) {
+ case 1: emit->push[n] = emit_b08_1; break;
+ case 2: emit->push[n] = emit_b16_1; break;
+ case 3: emit->push[n] = emit_b08_3; break;
+ case 4: emit->push[n] = emit_b32_1; break;
+ }
+ emit->vtx_dwords += 1;
+ break;
+ }
+ }
+
+ emit->vtx_max = 512 / emit->vtx_dwords;
+ if (emit->ve_edgeflag < 16)
+ emit->vtx_max = 1;
+
+ return TRUE;
+}
+
+static INLINE void
+set_edgeflag(struct nouveau_channel *chan,
+ struct nouveau_grobj *tesla,
+ struct nv50_vbo_emitctx *emit, uint32_t index)
+{
+ unsigned i = emit->ve_edgeflag;
+
+ if (i < 16) {
+ float f = *((float *)(emit->map[i] + index * emit->stride[i]));
+
+ if (emit->edgeflag != f) {
+ emit->edgeflag = f;
+
+ BEGIN_RING(chan, tesla, 0x15e4, 1);
+ OUT_RING (chan, f ? 1 : 0);
+ }
+ }
+}
+
+static boolean
+nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count)
+{
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nv50_vbo_emitctx emit;
+
+ if (emit_prepare(nv50, &emit, start) == FALSE)
+ return FALSE;
+
+ while (count) {
+ unsigned i, dw, nr = MIN2(count, emit.vtx_max);
+ dw = nr * emit.vtx_dwords;
+
+ set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */
+
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
+ for (i = 0; i < nr; ++i)
+ emit_vtx_next(chan, &emit);
+
+ count -= nr;
+ }
+ nv50_unmap_vbufs(nv50);
+
+ return TRUE;
+}
+
+static boolean
+nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count)
+{
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nv50_vbo_emitctx emit;
+
+ if (emit_prepare(nv50, &emit, 0) == FALSE)
+ return FALSE;
+
+ while (count) {
+ unsigned i, dw, nr = MIN2(count, emit.vtx_max);
+ dw = nr * emit.vtx_dwords;
+
+ set_edgeflag(chan, tesla, &emit, *map);
+
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
+ for (i = 0; i < nr; ++i)
+ emit_vtx(chan, &emit, *map++);
+
+ count -= nr;
+ }
+ nv50_unmap_vbufs(nv50);
+
+ return TRUE;
+}
+
+static boolean
+nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count)
+{
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nv50_vbo_emitctx emit;
+
+ if (emit_prepare(nv50, &emit, 0) == FALSE)
+ return FALSE;
+
+ while (count) {
+ unsigned i, dw, nr = MIN2(count, emit.vtx_max);
+ dw = nr * emit.vtx_dwords;
+
+ set_edgeflag(chan, tesla, &emit, *map);
+
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
+ for (i = 0; i < nr; ++i)
+ emit_vtx(chan, &emit, *map++);
+
+ count -= nr;
+ }
+ nv50_unmap_vbufs(nv50);
+
+ return TRUE;
+}
+
+static boolean
+nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count)
+{
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nv50_vbo_emitctx emit;
+
+ if (emit_prepare(nv50, &emit, 0) == FALSE)
+ return FALSE;
+
+ while (count) {
+ unsigned i, dw, nr = MIN2(count, emit.vtx_max);
+ dw = nr * emit.vtx_dwords;
+
+ set_edgeflag(chan, tesla, &emit, *map);
+
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
+ for (i = 0; i < nr; ++i)
+ emit_vtx(chan, &emit, *map++);
+
+ count -= nr;
+ }
+ nv50_unmap_vbufs(nv50);
+
+ return TRUE;
+}
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
index 69e4724790a..afddcb161fa 100644
--- a/src/gallium/drivers/r300/Makefile
+++ b/src/gallium/drivers/r300/Makefile
@@ -4,10 +4,8 @@ include $(TOP)/configs/current
LIBNAME = r300
C_SOURCES = \
- r3xx_fs.c \
- r5xx_fs.c \
+ r300_blit.c \
r300_chipset.c \
- r300_clear.c \
r300_context.c \
r300_debug.c \
r300_emit.c \
@@ -20,12 +18,12 @@ C_SOURCES = \
r300_state_derived.c \
r300_state_invariant.c \
r300_vs.c \
- r300_surface.c \
r300_texture.c \
r300_tgsi_to_rc.c
LIBRARY_INCLUDES = \
- -I$(TOP)/src/mesa/drivers/dri/r300/compiler
+ -I$(TOP)/src/mesa/drivers/dri/r300/compiler \
+ -I$(TOP)/src/gallium/winsys/drm/radeon/core
COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a
@@ -37,4 +35,4 @@ include ../../Makefile.template
.PHONY : $(COMPILER_ARCHIVE)
$(COMPILER_ARCHIVE):
- cd $(TOP)/src/mesa/drivers/dri/r300/compiler; make
+ $(MAKE) -C $(TOP)/src/mesa/drivers/dri/r300/compiler
diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript
index b4c8ba2015a..183aa17f9b3 100644
--- a/src/gallium/drivers/r300/SConscript
+++ b/src/gallium/drivers/r300/SConscript
@@ -4,15 +4,18 @@ r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript')
env = env.Clone()
# add the paths for r300compiler
-env.Append(CPPPATH = ['#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/src/mesa'])
+env.Append(CPPPATH = [
+ '#/src/mesa/drivers/dri/r300/compiler',
+ '#/src/gallium/winsys/drm/radeon/core',
+ '#/include',
+ '#/src/mesa',
+])
r300 = env.ConvenienceLibrary(
target = 'r300',
source = [
- 'r3xx_fs.c',
- 'r5xx_fs.c',
+ 'r300_blit.c',
'r300_chipset.c',
- 'r300_clear.c',
'r300_context.c',
'r300_debug.c',
'r300_emit.c',
@@ -25,7 +28,6 @@ r300 = env.ConvenienceLibrary(
'r300_state_derived.c',
'r300_state_invariant.c',
'r300_vs.c',
- 'r300_surface.c',
'r300_texture.c',
'r300_tgsi_to_rc.c',
] + r300compiler) + r300compiler
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
new file mode 100644
index 00000000000..ffe066d5369
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_blit.h"
+#include "r300_context.h"
+
+#include "util/u_rect.h"
+
+static void r300_blitter_save_states(struct r300_context* r300)
+{
+ util_blitter_save_blend(r300->blitter, r300->blend_state);
+ util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state);
+ util_blitter_save_rasterizer(r300->blitter, r300->rs_state);
+ util_blitter_save_fragment_shader(r300->blitter, r300->fs);
+ util_blitter_save_vertex_shader(r300->blitter, r300->vs);
+}
+
+/* Clear currently bound buffers. */
+void r300_clear(struct pipe_context* pipe,
+ unsigned buffers,
+ const float* rgba,
+ double depth,
+ unsigned stencil)
+{
+ /* XXX Implement fastfill.
+ *
+ * If fastfill is enabled, a few facts should be considered:
+ *
+ * 1) Zbuffer must be micro-tiled and whole microtiles must be
+ * written.
+ *
+ * 2) ZB_DEPTHCLEARVALUE is used to clear a zbuffer and Z Mask must be
+ * equal to 0.
+ *
+ * 3) RB3D_COLOR_CLEAR_VALUE is used to clear a colorbuffer and
+ * RB3D_COLOR_CHANNEL_MASK must be equal to 0.
+ *
+ * 4) ZB_CB_CLEAR can be used to make the ZB units help in clearing
+ * the colorbuffer. The color clear value is supplied through both
+ * RB3D_COLOR_CLEAR_VALUE and ZB_DEPTHCLEARVALUE, and the colorbuffer
+ * must be set in ZB_DEPTHOFFSET and ZB_DEPTHPITCH in addition to
+ * RB3D_COLOROFFSET and RB3D_COLORPITCH. It's obvious that the zbuffer
+ * will not be cleared and multiple render targets cannot be cleared
+ * this way either.
+ *
+ * 5) For 16-bit integer buffering, compression causes a hung with one or
+ * two samples and should not be used.
+ *
+ * 6) Fastfill must not be used if reading of compressed Z data is disabled
+ * and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE),
+ * i.e. it cannot be used to compress the zbuffer.
+ * (what the hell does that mean and how does it fit in clearing
+ * the buffers?)
+ *
+ * - Marek
+ */
+
+ struct r300_context* r300 = r300_context(pipe);
+
+ r300_blitter_save_states(r300);
+
+ util_blitter_clear(r300->blitter,
+ r300->framebuffer_state.width,
+ r300->framebuffer_state.height,
+ r300->framebuffer_state.nr_cbufs,
+ buffers, rgba, depth, stencil);
+}
+
+/* Copy a block of pixels from one surface to another. */
+void r300_surface_copy(struct pipe_context* pipe,
+ struct pipe_surface* dst,
+ unsigned dstx, unsigned dsty,
+ struct pipe_surface* src,
+ unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct r300_context* r300 = r300_context(pipe);
+
+ /* Yeah we have to save all those states to ensure this blitter operation
+ * is really transparent. The states will be restored by the blitter once
+ * copying is done. */
+ r300_blitter_save_states(r300);
+ util_blitter_save_framebuffer(r300->blitter, &r300->framebuffer_state);
+
+ util_blitter_save_fragment_sampler_states(
+ r300->blitter, r300->sampler_count, (void**)r300->sampler_states);
+
+ util_blitter_save_fragment_sampler_textures(
+ r300->blitter, r300->texture_count,
+ (struct pipe_texture**)r300->textures);
+
+ /* Do a copy */
+ util_blitter_copy(r300->blitter,
+ dst, dstx, dsty, src, srcx, srcy, width, height, TRUE);
+}
+
+/* Fill a region of a surface with a constant value. */
+void r300_surface_fill(struct pipe_context* pipe,
+ struct pipe_surface* dst,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height,
+ unsigned value)
+{
+ struct r300_context* r300 = r300_context(pipe);
+
+ r300_blitter_save_states(r300);
+ util_blitter_save_framebuffer(r300->blitter, &r300->framebuffer_state);
+
+ util_blitter_fill(r300->blitter,
+ dst, dstx, dsty, width, height, value);
+}
diff --git a/src/gallium/drivers/r300/r300_clear.h b/src/gallium/drivers/r300/r300_blit.h
index cd5900565e8..029e4f98e7d 100644
--- a/src/gallium/drivers/r300/r300_clear.h
+++ b/src/gallium/drivers/r300/r300_blit.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2008 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -20,12 +20,11 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
-#ifndef R300_CLEAR_H
-#define R300_CLEAR_H
+#ifndef R300_BLIT_H
+#define R300_BLIT_H
-#include "util/u_clear.h"
-
-#include "r300_context.h"
+struct pipe_context;
+struct pipe_surface;
void r300_clear(struct pipe_context* pipe,
unsigned buffers,
@@ -33,4 +32,17 @@ void r300_clear(struct pipe_context* pipe,
double depth,
unsigned stencil);
-#endif /* R300_CLEAR_H */
+void r300_surface_copy(struct pipe_context* pipe,
+ struct pipe_surface* dst,
+ unsigned dstx, unsigned dsty,
+ struct pipe_surface* src,
+ unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height);
+
+void r300_surface_fill(struct pipe_context* pipe,
+ struct pipe_surface* dst,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height,
+ unsigned value);
+
+#endif /* R300_BLIT_H */
diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
index d138866d33c..51fdb82ff34 100644
--- a/src/gallium/drivers/r300/r300_chipset.c
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -21,6 +21,7 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "r300_chipset.h"
+
#include "util/u_debug.h"
/* r300_chipset: A file all to itself for deducing the various properties of
@@ -31,7 +32,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
{
/* Reasonable defaults */
caps->num_vert_fpus = 4;
- caps->has_tcl = getenv("RADEON_NO_TCL") ? FALSE : TRUE;
+ caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE;
caps->is_r500 = FALSE;
caps->high_second_pipe = FALSE;
diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
index 322d4a57e41..0633a8b8a72 100644
--- a/src/gallium/drivers/r300/r300_chipset.h
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -33,9 +33,11 @@ struct r300_capabilities {
/* Chipset family */
int family;
/* The number of vertex floating-point units */
- int num_vert_fpus;
+ unsigned num_vert_fpus;
/* The number of fragment pipes */
- int num_frag_pipes;
+ unsigned num_frag_pipes;
+ /* The number of z pipes */
+ unsigned num_z_pipes;
/* Whether or not TCL is physically present */
boolean has_tcl;
/* Whether or not this is an RV515 or newer; R500s have many differences
diff --git a/src/gallium/drivers/r300/r300_clear.c b/src/gallium/drivers/r300/r300_clear.c
deleted file mode 100644
index 8b9cb819ae6..00000000000
--- a/src/gallium/drivers/r300/r300_clear.c
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#include "r300_clear.h"
-
-/* Clears currently bound buffers. */
-void r300_clear(struct pipe_context* pipe,
- unsigned buffers,
- const float* rgba,
- double depth,
- unsigned stencil)
-{
- /* XXX we can and should do one clear if both color and zs are set */
- util_clear(pipe, &r300_context(pipe)->framebuffer_state,
- buffers, rgba, depth, stencil);
-}
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 9cc455135db..d5c2d63d393 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -20,178 +20,167 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
-#include "r300_context.h"
-
-#include "r300_flush.h"
-#include "r300_state_invariant.h"
-
-static boolean r300_draw_range_elements(struct pipe_context* pipe,
- struct pipe_buffer* indexBuffer,
- unsigned indexSize,
- unsigned minIndex,
- unsigned maxIndex,
- unsigned mode,
- unsigned start,
- unsigned count)
-{
- struct r300_context* r300 = r300_context(pipe);
- int i;
-
- for (i = 0; i < r300->vertex_buffer_count; i++) {
- void* buf = pipe_buffer_map(pipe->screen,
- r300->vertex_buffers[i].buffer,
- PIPE_BUFFER_USAGE_CPU_READ);
- draw_set_mapped_vertex_buffer(r300->draw, i, buf);
- }
-
- if (indexBuffer) {
- void* indices = pipe_buffer_map(pipe->screen, indexBuffer,
- PIPE_BUFFER_USAGE_CPU_READ);
- draw_set_mapped_element_buffer_range(r300->draw, indexSize,
- minIndex, maxIndex, indices);
- } else {
- draw_set_mapped_element_buffer(r300->draw, 0, NULL);
- }
-
- draw_set_mapped_constant_buffer(r300->draw,
- r300->shader_constants[PIPE_SHADER_VERTEX].constants,
- r300->shader_constants[PIPE_SHADER_VERTEX].count *
- (sizeof(float) * 4));
-
- draw_arrays(r300->draw, mode, start, count);
+#include "draw/draw_context.h"
- for (i = 0; i < r300->vertex_buffer_count; i++) {
- pipe_buffer_unmap(pipe->screen, r300->vertex_buffers[i].buffer);
- draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
- }
+#include "tgsi/tgsi_scan.h"
- if (indexBuffer) {
- pipe_buffer_unmap(pipe->screen, indexBuffer);
- draw_set_mapped_element_buffer_range(r300->draw, 0, start,
- start + count - 1, NULL);
- }
+#include "util/u_hash_table.h"
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
- return TRUE;
-}
+#include "r300_blit.h"
+#include "r300_context.h"
+#include "r300_flush.h"
+#include "r300_query.h"
+#include "r300_render.h"
+#include "r300_screen.h"
+#include "r300_state_derived.h"
+#include "r300_state_invariant.h"
+#include "r300_texture.h"
+#include "r300_winsys.h"
-static boolean r300_draw_elements(struct pipe_context* pipe,
- struct pipe_buffer* indexBuffer,
- unsigned indexSize, unsigned mode,
- unsigned start, unsigned count)
+static enum pipe_error r300_clear_hash_table(void* key, void* value,
+ void* data)
{
- return r300_draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0,
- mode, start, count);
+ FREE(key);
+ FREE(value);
+ return PIPE_OK;
}
-static boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
- unsigned start, unsigned count)
+static void r300_destroy_context(struct pipe_context* context)
{
- return r300_draw_elements(pipe, NULL, 0, mode, start, count);
-}
-
-static void r300_destroy_context(struct pipe_context* context) {
struct r300_context* r300 = r300_context(context);
struct r300_query* query, * temp;
+ util_blitter_destroy(r300->blitter);
+
+ util_hash_table_foreach(r300->shader_hash_table, r300_clear_hash_table,
+ NULL);
+ util_hash_table_destroy(r300->shader_hash_table);
+
draw_destroy(r300->draw);
/* Free the OQ BO. */
context->screen->buffer_destroy(r300->oqbo);
/* If there are any queries pending or not destroyed, remove them now. */
- if (r300->query_list) {
- foreach_s(query, temp, r300->query_list) {
- remove_from_list(query);
- FREE(query);
- }
+ foreach_s(query, temp, &r300->query_list) {
+ remove_from_list(query);
+ FREE(query);
}
FREE(r300->blend_color_state);
FREE(r300->rs_block);
FREE(r300->scissor_state);
+ FREE(r300->vertex_info);
FREE(r300->viewport_state);
FREE(r300);
}
static unsigned int
-r300_is_texture_referenced( struct pipe_context *pipe,
- struct pipe_texture *texture,
- unsigned face, unsigned level)
+r300_is_texture_referenced(struct pipe_context *pipe,
+ struct pipe_texture *texture,
+ unsigned face, unsigned level)
{
- /**
- * FIXME: Optimize.
- */
+ struct pipe_buffer* buf = 0;
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+ r300_get_texture_buffer(texture, &buf, NULL);
+
+ return pipe->is_buffer_referenced(pipe, buf);
}
static unsigned int
-r300_is_buffer_referenced( struct pipe_context *pipe,
- struct pipe_buffer *buf)
+r300_is_buffer_referenced(struct pipe_context *pipe,
+ struct pipe_buffer *buf)
+{
+ /* This only checks to see whether actual hardware buffers are
+ * referenced. Since we use managed BOs and transfers, it's actually not
+ * possible for pipe_buffers to ever reference the actual hardware, so
+ * buffers are never referenced. */
+ return 0;
+}
+
+static void r300_flush_cb(void *data)
{
- /**
- * FIXME: Optimize.
- */
+ struct r300_context* const cs_context_copy = data;
- return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+ cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL);
}
struct pipe_context* r300_create_context(struct pipe_screen* screen,
- struct r300_winsys* r300_winsys)
+ struct radeon_winsys* radeon_winsys)
{
struct r300_context* r300 = CALLOC_STRUCT(r300_context);
+ struct r300_screen* r300screen = r300_screen(screen);
if (!r300)
return NULL;
- r300->winsys = r300_winsys;
+ r300->winsys = radeon_winsys;
- r300->context.winsys = (struct pipe_winsys*)r300_winsys;
- r300->context.screen = r300_screen(screen);
+ r300->context.winsys = (struct pipe_winsys*)radeon_winsys;
+ r300->context.screen = screen;
r300_init_debug(r300);
r300->context.destroy = r300_destroy_context;
r300->context.clear = r300_clear;
+ r300->context.surface_copy = r300_surface_copy;
+ r300->context.surface_fill = r300_surface_fill;
- r300->context.draw_arrays = r300_draw_arrays;
- r300->context.draw_elements = r300_draw_elements;
- r300->context.draw_range_elements = r300_draw_range_elements;
+ if (r300screen->caps->has_tcl) {
+ r300->context.draw_arrays = r300_draw_arrays;
+ r300->context.draw_elements = r300_draw_elements;
+ r300->context.draw_range_elements = r300_draw_range_elements;
+ } else {
+ r300->context.draw_arrays = r300_swtcl_draw_arrays;
+ r300->context.draw_elements = r300_draw_elements;
+ r300->context.draw_range_elements = r300_swtcl_draw_range_elements;
+
+ /* Create a Draw. This is used for SW TCL. */
+ r300->draw = draw_create();
+ /* Enable our renderer. */
+ draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300));
+ /* Enable Draw's clipping. */
+ draw_set_driver_clipping(r300->draw, FALSE);
+ /* Force Draw to never do viewport transform, since we can do
+ * transform in hardware, always. */
+ draw_set_viewport_state(r300->draw, &r300_viewport_identity);
+ }
r300->context.is_texture_referenced = r300_is_texture_referenced;
r300->context.is_buffer_referenced = r300_is_buffer_referenced;
+ r300->shader_hash_table = util_hash_table_create(r300_shader_key_hash,
+ r300_shader_key_compare);
+
r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state);
r300->rs_block = CALLOC_STRUCT(r300_rs_block);
r300->scissor_state = CALLOC_STRUCT(r300_scissor_state);
+ r300->vertex_info = CALLOC_STRUCT(r300_vertex_info);
r300->viewport_state = CALLOC_STRUCT(r300_viewport_state);
- /* Create a Draw. This is used for vert collation and SW TCL. */
- r300->draw = draw_create();
- /* Enable our renderer. */
- draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300));
- /* Disable Draw's clipping if TCL is present. */
- draw_set_driver_clipping(r300->draw, r300_screen(screen)->caps->has_tcl);
- /* Force Draw to never do viewport transform, since (again) we can do
- * transform in hardware, always. */
- draw_set_viewport_state(r300->draw, &r300_viewport_identity);
-
/* Open up the OQ BO. */
r300->oqbo = screen->buffer_create(screen, 4096,
PIPE_BUFFER_USAGE_VERTEX, 4096);
+ make_empty_list(&r300->query_list);
r300_init_flush_functions(r300);
r300_init_query_functions(r300);
- r300_init_surface_functions(r300);
+ /* r300_init_surface_functions(r300); */
r300_init_state_functions(r300);
r300_emit_invariant_state(r300);
+
+ r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300);
r300->dirty_state = R300_NEW_KITCHEN_SINK;
r300->dirty_hw++;
+ r300->blitter = util_blitter_create(&r300->context);
+
return &r300->context;
}
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 52b1c9a6b26..232530b7dc4 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -23,20 +23,12 @@
#ifndef R300_CONTEXT_H
#define R300_CONTEXT_H
-#include "draw/draw_context.h"
#include "draw/draw_vertex.h"
-#include "pipe/p_context.h"
-
-#include "tgsi/tgsi_scan.h"
+#include "util/u_blitter.h"
-#include "util/u_memory.h"
-#include "util/u_simple_list.h"
-
-#include "r300_clear.h"
-#include "r300_query.h"
-#include "r300_screen.h"
-#include "r300_winsys.h"
+#include "pipe/p_context.h"
+#include "pipe/p_inlines.h"
struct r300_fragment_shader;
struct r300_vertex_shader;
@@ -44,6 +36,7 @@ struct r300_vertex_shader;
struct r300_blend_state {
uint32_t blend_control; /* R300_RB3D_CBLEND: 0x4e04 */
uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */
+ uint32_t color_channel_mask; /* R300_RB3D_COLOR_CHANNEL_MASK: 0x4e0c */
uint32_t rop; /* R300_RB3D_ROPCNTL: 0x4e18 */
uint32_t dither; /* R300_RB3D_DITHER_CTL: 0x4e50 */
};
@@ -62,7 +55,6 @@ struct r300_dsa_state {
uint32_t z_buffer_control; /* R300_ZB_CNTL: 0x4f00 */
uint32_t z_stencil_control; /* R300_ZB_ZSTENCILCNTL: 0x4f04 */
uint32_t stencil_ref_mask; /* R300_ZB_STENCILREFMASK: 0x4f08 */
- uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */
uint32_t stencil_ref_bf; /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */
};
@@ -88,6 +80,7 @@ struct r300_rs_state {
uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */
uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */
uint32_t color_control; /* R300_GA_COLOR_CONTROL: 0x4278 */
+ uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */
};
struct r300_rs_block {
@@ -98,14 +91,28 @@ struct r300_rs_block {
};
struct r300_sampler_state {
+ struct pipe_sampler_state state;
+
uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */
uint32_t filter1; /* R300_TX_FILTER1: 0x4440 */
uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */
+
+ /* Min/max LOD must be clamped to [0, last_level], thus
+ * it's dependent on a currently bound texture */
+ unsigned min_lod, max_lod;
+};
+
+struct r300_scissor_regs {
+ uint32_t top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */
+ uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */
+
+ /* Whether everything is culled by scissoring. */
+ boolean empty_area;
};
struct r300_scissor_state {
- uint32_t scissor_top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */
- uint32_t scissor_bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */
+ struct r300_scissor_regs framebuffer;
+ struct r300_scissor_regs scissor;
};
struct r300_texture_state {
@@ -124,13 +131,17 @@ struct r300_viewport_state {
uint32_t vte_control; /* R300_VAP_VTE_CNTL: 0x20b0 */
};
+struct r300_ztop_state {
+ uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */
+};
+
#define R300_NEW_BLEND 0x00000001
#define R300_NEW_BLEND_COLOR 0x00000002
#define R300_NEW_CLIP 0x00000004
-#define R300_NEW_CONSTANTS 0x00000008
-#define R300_NEW_DSA 0x00000010
-#define R300_NEW_FRAMEBUFFERS 0x00000020
-#define R300_NEW_FRAGMENT_SHADER 0x00000040
+#define R300_NEW_DSA 0x00000008
+#define R300_NEW_FRAMEBUFFERS 0x00000010
+#define R300_NEW_FRAGMENT_SHADER 0x00000020
+#define R300_NEW_FRAGMENT_SHADER_CONSTANTS 0x00000040
#define R300_NEW_RASTERIZER 0x00000080
#define R300_NEW_RS_BLOCK 0x00000100
#define R300_NEW_SAMPLER 0x00000200
@@ -140,8 +151,10 @@ struct r300_viewport_state {
#define R300_ANY_NEW_TEXTURES 0x03fc0000
#define R300_NEW_VERTEX_FORMAT 0x04000000
#define R300_NEW_VERTEX_SHADER 0x08000000
-#define R300_NEW_VIEWPORT 0x10000000
-#define R300_NEW_KITCHEN_SINK 0x1fffffff
+#define R300_NEW_VERTEX_SHADER_CONSTANTS 0x10000000
+#define R300_NEW_VIEWPORT 0x20000000
+#define R300_NEW_QUERY 0x40000000
+#define R300_NEW_KITCHEN_SINK 0x7fffffff
/* The next several objects are not pure Radeon state; they inherit from
* various Gallium classes. */
@@ -172,6 +185,10 @@ struct r300_query {
unsigned int count;
/* The offset of this query into the query buffer, in bytes. */
unsigned offset;
+ /* if we've flushed the query */
+ boolean flushed;
+ /* if begin has been emitted */
+ boolean begin_emitted;
/* Linked list members. */
struct r300_query* prev;
struct r300_query* next;
@@ -184,6 +201,12 @@ struct r300_texture {
/* Offsets into the buffer. */
unsigned offset[PIPE_MAX_TEXTURE_LEVELS];
+ /* A pitch for each mip-level */
+ unsigned pitch[PIPE_MAX_TEXTURE_LEVELS];
+
+ /* Size of one zslice or face based on the texture target */
+ unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS];
+
/**
* If non-zero, override the natural texture layout with
* a custom stride (in bytes).
@@ -197,6 +220,11 @@ struct r300_texture {
/* Total size of this texture, in bytes. */
unsigned size;
+ /* Whether this texture has non-power-of-two dimensions.
+ * It can be either a regular texture or a rectangle one.
+ */
+ boolean is_npot;
+
/* Pipe buffer backing this texture. */
struct pipe_buffer* buffer;
@@ -204,18 +232,14 @@ struct r300_texture {
struct r300_texture_state state;
};
-struct r300_vertex_format {
+struct r300_vertex_info {
/* Parent class */
struct vertex_info vinfo;
+
/* R300_VAP_PROG_STREAK_CNTL_[0-7] */
uint32_t vap_prog_stream_cntl[8];
/* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */
uint32_t vap_prog_stream_cntl_ext[8];
- /* Map of vertex attributes into PVS memory for HW TCL,
- * or GA memory for SW TCL. */
- int vs_tab[16];
- /* Map of rasterizer attributes from GB through RS to US. */
- int fs_tab[16];
};
extern struct pipe_viewport_state r300_viewport_identity;
@@ -225,9 +249,11 @@ struct r300_context {
struct pipe_context context;
/* The interface to the windowing system, etc. */
- struct r300_winsys* winsys;
+ struct radeon_winsys* winsys;
/* Draw module. Used mostly for SW TCL. */
struct draw_context* draw;
+ /* Accelerated blit support. */
+ struct blitter_context* blitter;
/* Vertex buffer for rendering. */
struct pipe_buffer* vbo;
@@ -237,7 +263,14 @@ struct r300_context {
/* Occlusion query buffer. */
struct pipe_buffer* oqbo;
/* Query list. */
- struct r300_query* query_list;
+ struct r300_query *query_current;
+ struct r300_query query_list;
+
+ /* Shader hash table. Used to store vertex formatting information, which
+ * depends on the combination of both currently loaded shaders. */
+ struct util_hash_table* shader_hash_table;
+ /* Vertex formatting information. */
+ struct r300_vertex_info* vertex_info;
/* Various CSO state objects. */
/* Blend state. */
@@ -266,15 +299,20 @@ struct r300_context {
/* Texture states. */
struct r300_texture* textures[8];
int texture_count;
- /* Vertex buffers for Gallium. */
- struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
- int vertex_buffer_count;
- /* Vertex information. */
- struct r300_vertex_format vertex_info;
/* Vertex shader. */
struct r300_vertex_shader* vs;
/* Viewport state. */
struct r300_viewport_state* viewport_state;
+ /* ZTOP state. */
+ struct r300_ztop_state ztop_state;
+
+ /* Vertex buffers for Gallium. */
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ int vertex_buffer_count;
+ /* Vertex elements for Gallium. */
+ struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+ int vertex_element_count;
+
/* Bitmask of dirty state objects. */
uint32_t dirty_state;
/* Flag indicating whether or not the HW is dirty. */
@@ -312,11 +350,13 @@ void r300_init_surface_functions(struct r300_context* r300);
#define DBG_VP 0x0000004
#define DBG_CS 0x0000008
#define DBG_DRAW 0x0000010
+#define DBG_TEX 0x0000020
+#define DBG_FALL 0x0000040
/*@}*/
static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags)
{
- return (ctx->debug & flags) ? true : false;
+ return (ctx->debug & flags) ? TRUE : FALSE;
}
static INLINE void DBG(struct r300_context * ctx, unsigned flags, const char * fmt, ...)
diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
index 883f0a02dc5..d142fee0502 100644
--- a/src/gallium/drivers/r300/r300_cs.h
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -26,7 +26,8 @@
#include "util/u_math.h"
#include "r300_reg.h"
-#include "r300_winsys.h"
+
+#include "radeon_winsys.h"
/* Yes, I know macros are ugly. However, they are much prettier than the code
* that they neatly hide away, and don't have the cost of function setup,so
@@ -34,8 +35,8 @@
#define MAX_CS_SIZE 64 * 1024 / 4
-#define VERY_VERBOSE_CS 0
-#define VERY_VERBOSE_REGISTERS 0
+#define VERY_VERBOSE_CS 1
+#define VERY_VERBOSE_REGISTERS 1
/* XXX stolen from radeon_drm.h */
#define RADEON_GEM_DOMAIN_CPU 0x1
@@ -50,11 +51,11 @@
#define CS_LOCALS(context) \
struct r300_context* const cs_context_copy = (context); \
- struct r300_winsys* cs_winsys = cs_context_copy->winsys; \
+ struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \
int cs_count = 0;
#define CHECK_CS(size) \
- cs_winsys->check_cs(cs_winsys, (size))
+ assert(cs_winsys->check_cs(cs_winsys, (size)))
#define BEGIN_CS(size) do { \
CHECK_CS(size); \
@@ -114,6 +115,15 @@
cs_count -= 3; \
} while (0)
+#define OUT_CS_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \
+ DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, " \
+ "domains (%d, %d, %d)\n", \
+ bo, rd, wd, flags); \
+ assert(bo); \
+ cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \
+ cs_count -= 2; \
+} while (0)
+
#define END_CS do { \
if (VERY_VERBOSE_CS) { \
DBG(cs_context_copy, DBG_CS, "r300: END_CS in %s (%s:%d)\n", __FUNCTION__, \
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index 85d69c07479..2a6ed54ac9b 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -37,6 +37,8 @@ static struct debug_option debug_options[] = {
{ "vp", DBG_VP, "Vertex program handling" },
{ "cs", DBG_CS, "Command submissions" },
{ "draw", DBG_DRAW, "Draw and emit" },
+ { "tex", DBG_TEX, "Textures" },
+ { "fall", DBG_FALL, "Fallbacks" },
{ "all", ~0, "Convenience option that enables all debug flags" },
@@ -47,7 +49,7 @@ static struct debug_option debug_options[] = {
void r300_init_debug(struct r300_context * ctx)
{
const char * options = debug_get_option("RADEON_DEBUG", 0);
- boolean printhint = false;
+ boolean printhint = FALSE;
size_t length;
struct debug_option * opt;
@@ -69,14 +71,14 @@ void r300_init_debug(struct r300_context * ctx)
if (!opt->name) {
debug_printf("Unknown debug option: %s\n", options);
- printhint = true;
+ printhint = TRUE;
}
options += length;
}
if (!ctx->debug)
- printhint = true;
+ printhint = TRUE;
}
if (printhint || ctx->debug & DBG_HELP) {
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 77ce431cdc3..f8bfa714fef 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -1,5 +1,6 @@
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -22,20 +23,35 @@
/* r300_emit: Functions for emitting state. */
-#include "r300_emit.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "r300_context.h"
+#include "r300_cs.h"
+#include "r300_emit.h"
#include "r300_fs.h"
+#include "r300_screen.h"
#include "r300_state_derived.h"
+#include "r300_state_inlines.h"
+#include "r300_texture.h"
#include "r300_vs.h"
void r300_emit_blend_state(struct r300_context* r300,
struct r300_blend_state* blend)
{
CS_LOCALS(r300);
- BEGIN_CS(7);
- OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 2);
- OUT_CS(blend->blend_control);
- OUT_CS(blend->alpha_blend_control);
+ BEGIN_CS(8);
+ OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3);
+ if (r300->framebuffer_state.nr_cbufs) {
+ OUT_CS(blend->blend_control);
+ OUT_CS(blend->alpha_blend_control);
+ OUT_CS(blend->color_channel_mask);
+ } else {
+ OUT_CS(0);
+ OUT_CS(0);
+ OUT_CS(0);
+ /* XXX also disable fastfill here once it's supported */
+ }
OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop);
OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither);
END_CS;
@@ -96,19 +112,30 @@ void r300_emit_dsa_state(struct r300_context* r300,
struct r300_screen* r300screen = r300_screen(r300->context.screen);
CS_LOCALS(r300);
- BEGIN_CS(r300screen->caps->is_r500 ? 8 : 8);
+ BEGIN_CS(r300screen->caps->is_r500 ? 10 : 8);
OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function);
- /* XXX figure out the r300 counterpart for this */
- if (r300screen->caps->is_r500) {
- /* OUT_CS_REG(R500_FG_ALPHA_VALUE, dsa->alpha_reference); */
- }
+
+ /* not needed since we use the 8bit alpha ref */
+ /*if (r300screen->caps->is_r500) {
+ OUT_CS_REG(R500_FG_ALPHA_VALUE, dsa->alpha_reference);
+ }*/
+
OUT_CS_REG_SEQ(R300_ZB_CNTL, 3);
- OUT_CS(dsa->z_buffer_control);
- OUT_CS(dsa->z_stencil_control);
+
+ if (r300->framebuffer_state.zsbuf) {
+ OUT_CS(dsa->z_buffer_control);
+ OUT_CS(dsa->z_stencil_control);
+ } else {
+ OUT_CS(0);
+ OUT_CS(0);
+ }
+
OUT_CS(dsa->stencil_ref_mask);
- OUT_CS_REG(R300_ZB_ZTOP, dsa->z_buffer_top);
+ OUT_CS_REG(R300_ZB_ZTOP, r300->ztop_state.z_buffer_top);
+
+ /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */
if (r300screen->caps->is_r500) {
- /* OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); */
+ OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf);
}
END_CS;
}
@@ -118,7 +145,9 @@ static const float * get_shader_constant(
struct rc_constant * constant,
struct r300_constant_buffer * externals)
{
- static const float zero[4] = { 0.0, 0.0, 0.0, 0.0 };
+ static float vec[4] = { 0.0, 0.0, 0.0, 1.0 };
+ struct pipe_texture *tex;
+
switch(constant->Type) {
case RC_CONSTANT_EXTERNAL:
return externals->constants[constant->u.External];
@@ -126,11 +155,60 @@ static const float * get_shader_constant(
case RC_CONSTANT_IMMEDIATE:
return constant->u.Immediate;
+ case RC_CONSTANT_STATE:
+ switch (constant->u.State[0]) {
+ /* Factor for converting rectangle coords to
+ * normalized coords. Should only show up on non-r500. */
+ case RC_STATE_R300_TEXRECT_FACTOR:
+ tex = &r300->textures[constant->u.State[1]]->tex;
+ vec[0] = 1.0 / tex->width0;
+ vec[1] = 1.0 / tex->height0;
+ break;
+
+ /* Texture compare-fail value. */
+ /* XXX Since Gallium doesn't support GL_ARB_shadow_ambient,
+ * this is always (0,0,0,0). */
+ case RC_STATE_SHADOW_AMBIENT:
+ vec[3] = 0;
+ break;
+
+ case RC_STATE_R300_VIEWPORT_SCALE:
+ if (r300->rs_state->enable_vte) {
+ vec[0] = r300->viewport_state->xscale;
+ vec[1] = r300->viewport_state->yscale;
+ vec[2] = r300->viewport_state->zscale;
+ } else {
+ vec[0] = 1;
+ vec[1] = 1;
+ vec[2] = 1;
+ }
+ break;
+
+ case RC_STATE_R300_VIEWPORT_OFFSET:
+ if (r300->rs_state->enable_vte) {
+ vec[0] = r300->viewport_state->xoffset;
+ vec[1] = r300->viewport_state->yoffset;
+ vec[2] = r300->viewport_state->zoffset;
+ } else {
+ /* Zeros. */
+ }
+ break;
+
+ default:
+ debug_printf("r300: Implementation error: "
+ "Unknown RC_CONSTANT type %d\n", constant->u.State[0]);
+ }
+ break;
+
default:
- debug_printf("r300: Implementation error: Unhandled constant type %i\n",
- constant->Type);
- return zero;
+ debug_printf("r300: Implementation error: "
+ "Unhandled constant type %d\n", constant->Type);
}
+
+ /* This should either be (0, 0, 0, 1), which should be a relatively safe
+ * RGBA or STRQ value, or it could be one of the RC_CONSTANT_STATE
+ * state factors. */
+ return vec;
}
/* Convert a normal single-precision float into the 7.16 format
@@ -168,18 +246,15 @@ static uint32_t pack_float24(float f)
}
void r300_emit_fragment_program_code(struct r300_context* r300,
- struct rX00_fragment_program_code* generic_code,
- struct r300_constant_buffer* externals)
+ struct rX00_fragment_program_code* generic_code)
{
struct r300_fragment_program_code * code = &generic_code->code.r300;
- struct rc_constant_list * constants = &generic_code->constants;
int i;
CS_LOCALS(r300);
BEGIN_CS(15 +
code->alu.length * 4 +
- (code->tex.length ? (1 + code->tex.length) : 0) +
- (constants->Count ? (1 + constants->Count * 4) : 0));
+ (code->tex.length ? (1 + code->tex.length) : 0));
OUT_CS_REG(R300_US_CONFIG, code->config);
OUT_CS_REG(R300_US_PIXSIZE, code->pixsize);
@@ -211,33 +286,58 @@ void r300_emit_fragment_program_code(struct r300_context* r300,
OUT_CS(code->tex.inst[i]);
}
- if (constants->Count) {
- OUT_CS_ONE_REG(R300_PFS_PARAM_0_X, constants->Count * 4);
- for(i = 0; i < constants->Count; ++i) {
- const float * data = get_shader_constant(r300, &constants->Constants[i], externals);
- OUT_CS(pack_float24(data[0]));
- OUT_CS(pack_float24(data[1]));
- OUT_CS(pack_float24(data[2]));
- OUT_CS(pack_float24(data[3]));
- }
+ END_CS;
+}
+
+void r300_emit_fs_constant_buffer(struct r300_context* r300,
+ struct rc_constant_list* constants)
+{
+ int i;
+ CS_LOCALS(r300);
+
+ if (constants->Count == 0)
+ return;
+
+ BEGIN_CS(constants->Count * 4 + 1);
+ OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, constants->Count * 4);
+ for(i = 0; i < constants->Count; ++i) {
+ const float * data = get_shader_constant(r300,
+ &constants->Constants[i],
+ &r300->shader_constants[PIPE_SHADER_FRAGMENT]);
+ OUT_CS(pack_float24(data[0]));
+ OUT_CS(pack_float24(data[1]));
+ OUT_CS(pack_float24(data[2]));
+ OUT_CS(pack_float24(data[3]));
}
+ END_CS;
+}
+static void r300_emit_fragment_depth_config(struct r300_context* r300,
+ struct r300_fragment_shader* fs)
+{
+ CS_LOCALS(r300);
+
+ BEGIN_CS(4);
+ if (r300_fragment_shader_writes_depth(fs)) {
+ OUT_CS_REG(R300_FG_DEPTH_SRC, R300_FG_DEPTH_SRC_SHADER);
+ OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W24 | R300_W_SRC_US);
+ } else {
+ OUT_CS_REG(R300_FG_DEPTH_SRC, R300_FG_DEPTH_SRC_SCAN);
+ OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W0 | R300_W_SRC_US);
+ }
END_CS;
}
void r500_emit_fragment_program_code(struct r300_context* r300,
- struct rX00_fragment_program_code* generic_code,
- struct r300_constant_buffer* externals)
+ struct rX00_fragment_program_code* generic_code)
{
struct r500_fragment_program_code * code = &generic_code->code.r500;
- struct rc_constant_list * constants = &generic_code->constants;
int i;
CS_LOCALS(r300);
BEGIN_CS(13 +
- ((code->inst_end + 1) * 6) +
- (constants->Count ? (3 + (constants->Count * 4)) : 0));
- OUT_CS_REG(R500_US_CONFIG, 0);
+ ((code->inst_end + 1) * 6));
+ OUT_CS_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
OUT_CS_REG(R500_US_PIXSIZE, code->max_temp_idx);
OUT_CS_REG(R500_US_CODE_RANGE,
R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end));
@@ -256,18 +356,30 @@ void r500_emit_fragment_program_code(struct r300_context* r300,
OUT_CS(code->inst[i].inst5);
}
- if (constants->Count) {
- OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST);
- OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, constants->Count * 4);
- for (i = 0; i < constants->Count; i++) {
- const float * data = get_shader_constant(r300, &constants->Constants[i], externals);
- OUT_CS_32F(data[0]);
- OUT_CS_32F(data[1]);
- OUT_CS_32F(data[2]);
- OUT_CS_32F(data[3]);
- }
- }
+ END_CS;
+}
+
+void r500_emit_fs_constant_buffer(struct r300_context* r300,
+ struct rc_constant_list* constants)
+{
+ int i;
+ CS_LOCALS(r300);
+
+ if (constants->Count == 0)
+ return;
+ BEGIN_CS(constants->Count * 4 + 3);
+ OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST);
+ OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, constants->Count * 4);
+ for (i = 0; i < constants->Count; i++) {
+ const float * data = get_shader_constant(r300,
+ &constants->Constants[i],
+ &r300->shader_constants[PIPE_SHADER_FRAGMENT]);
+ OUT_CS_32F(data[0]);
+ OUT_CS_32F(data[1]);
+ OUT_CS_32F(data[2]);
+ OUT_CS_32F(data[3]);
+ }
END_CS;
}
@@ -275,76 +387,98 @@ void r300_emit_fb_state(struct r300_context* r300,
struct pipe_framebuffer_state* fb)
{
struct r300_texture* tex;
- unsigned pixpitch;
+ struct pipe_surface* surf;
int i;
CS_LOCALS(r300);
- BEGIN_CS((10 * fb->nr_cbufs) + (fb->zsbuf ? 10 : 0) + 4);
+ /* Shouldn't fail unless there is a bug in the state tracker. */
+ assert(fb->nr_cbufs <= 4);
+
+ BEGIN_CS((10 * fb->nr_cbufs) + (2 * (4 - fb->nr_cbufs)) +
+ (fb->zsbuf ? 10 : 0) + 6);
+
+ /* Flush and free renderbuffer caches. */
+ OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+ OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT,
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
+
+ /* Set the number of colorbuffers. */
+ OUT_CS_REG(R300_RB3D_CCTL, R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs));
+
+ /* Set up colorbuffers. */
for (i = 0; i < fb->nr_cbufs; i++) {
- tex = (struct r300_texture*)fb->cbufs[i]->texture;
+ surf = fb->cbufs[i];
+ tex = (struct r300_texture*)surf->texture;
assert(tex && tex->buffer && "cbuf is marked, but NULL!");
- pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size;
OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1);
- OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_CS_RELOC(tex->buffer, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1);
- OUT_CS_RELOC(tex->buffer, pixpitch |
+ OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] |
r300_translate_colorformat(tex->tex.format), 0,
RADEON_GEM_DOMAIN_VRAM, 0);
OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i),
- r300_translate_out_fmt(fb->cbufs[i]->format));
+ r300_translate_out_fmt(surf->format));
+ }
+
+ /* Disable unused colorbuffers. */
+ for (; i < 4; i++) {
+ OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), R300_US_OUT_FMT_UNUSED);
}
+ /* Set up a zbuffer. */
if (fb->zsbuf) {
- tex = (struct r300_texture*)fb->zsbuf->texture;
+ surf = fb->zsbuf;
+ tex = (struct r300_texture*)surf->texture;
assert(tex && tex->buffer && "zsbuf is marked, but NULL!");
- pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size;
OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
- OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_CS_RELOC(tex->buffer, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format));
OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
- OUT_CS_RELOC(tex->buffer, pixpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level], 0,
+ RADEON_GEM_DOMAIN_VRAM, 0);
}
- OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
- R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
- R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
- OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT,
- R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
- R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
END_CS;
}
-void r300_emit_query_begin(struct r300_context* r300,
- struct r300_query* query)
+static void r300_emit_query_start(struct r300_context *r300)
{
+ struct r300_capabilities *caps = r300_screen(r300->context.screen)->caps;
+ struct r300_query *query = r300->query_current;
CS_LOCALS(r300);
- /* XXX This will almost certainly not return good results
- * for overlapping queries. */
- BEGIN_CS(2);
+ if (!query)
+ return;
+
+ BEGIN_CS(4);
+ if (caps->family == CHIP_FAMILY_RV530) {
+ OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
+ } else {
+ OUT_CS_REG(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL);
+ }
OUT_CS_REG(R300_ZB_ZPASS_DATA, 0);
END_CS;
+ query->begin_emitted = TRUE;
}
-void r300_emit_query_end(struct r300_context* r300,
- struct r300_query* query)
+
+static void r300_emit_query_finish(struct r300_context *r300,
+ struct r300_query *query)
{
struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
CS_LOCALS(r300);
- if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo,
- 0, RADEON_GEM_DOMAIN_GTT)) {
- debug_printf("r300: There wasn't room for the OQ buffer!?"
- " Oh noes!\n");
- }
-
assert(caps->num_frag_pipes);
+
BEGIN_CS(6 * caps->num_frag_pipes + 2);
/* I'm not so sure I like this switch, but it's hard to be elegant
* when there's so many special cases...
@@ -381,7 +515,7 @@ void r300_emit_query_end(struct r300_context* r300,
OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 0),
0, RADEON_GEM_DOMAIN_GTT, 0);
- break;
+ break;
default:
debug_printf("r300: Implementation error: Chipset reports %d"
" pixel pipes!\n", caps->num_frag_pipes);
@@ -391,14 +525,62 @@ void r300_emit_query_end(struct r300_context* r300,
/* And, finally, reset it to normal... */
OUT_CS_REG(R300_SU_REG_DEST, 0xF);
END_CS;
+}
+
+static void rv530_emit_query_single(struct r300_context *r300,
+ struct r300_query *query)
+{
+ CS_LOCALS(r300);
+ BEGIN_CS(8);
+ OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
+ OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_CS_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
+ END_CS;
+}
+
+static void rv530_emit_query_double(struct r300_context *r300,
+ struct r300_query *query)
+{
+ CS_LOCALS(r300);
+
+ BEGIN_CS(14);
+ OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
+ OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_CS_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1);
+ OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_CS_RELOC(r300->oqbo, query->offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
+ OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
+ END_CS;
+}
+
+void r300_emit_query_end(struct r300_context* r300)
+{
+ struct r300_capabilities *caps = r300_screen(r300->context.screen)->caps;
+ struct r300_query *query = r300->query_current;
+
+ if (!query)
+ return;
+
+ if (query->begin_emitted == FALSE)
+ return;
+
+ if (caps->family == CHIP_FAMILY_RV530) {
+ if (caps->num_z_pipes == 2)
+ rv530_emit_query_double(r300, query);
+ else
+ rv530_emit_query_single(r300, query);
+ } else
+ r300_emit_query_finish(r300, query);
}
void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs)
{
CS_LOCALS(r300);
- BEGIN_CS(20);
+ BEGIN_CS(22);
OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status);
OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size);
OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2);
@@ -414,6 +596,7 @@ void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs)
OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config);
OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, rs->line_stipple_value);
OUT_CS_REG(R300_GA_COLOR_CONTROL, rs->color_control);
+ OUT_CS_REG(R300_GA_POLY_MODE, rs->polygon_mode);
END_CS;
}
@@ -424,6 +607,8 @@ void r300_emit_rs_block_state(struct r300_context* r300,
struct r300_screen* r300screen = r300_screen(r300->context.screen);
CS_LOCALS(r300);
+ DBG(r300, DBG_DRAW, "r300: RS emit:\n");
+
BEGIN_CS(21);
if (r300screen->caps->is_r500) {
OUT_CS_REG_SEQ(R500_RS_IP_0, 8);
@@ -432,7 +617,7 @@ void r300_emit_rs_block_state(struct r300_context* r300,
}
for (i = 0; i < 8; i++) {
OUT_CS(rs->ip[i]);
- /* debug_printf("ip %d: 0x%08x\n", i, rs->ip[i]); */
+ DBG(r300, DBG_DRAW, " : ip %d: 0x%08x\n", i, rs->ip[i]);
}
OUT_CS_REG_SEQ(R300_RS_COUNT, 2);
@@ -446,40 +631,67 @@ void r300_emit_rs_block_state(struct r300_context* r300,
}
for (i = 0; i < 8; i++) {
OUT_CS(rs->inst[i]);
- /* debug_printf("inst %d: 0x%08x\n", i, rs->inst[i]); */
+ DBG(r300, DBG_DRAW, " : inst %d: 0x%08x\n", i, rs->inst[i]);
}
- /* debug_printf("count: 0x%08x inst_count: 0x%08x\n", rs->count,
- * rs->inst_count); */
+ DBG(r300, DBG_DRAW, " : count: 0x%08x inst_count: 0x%08x\n",
+ rs->count, rs->inst_count);
END_CS;
}
-void r300_emit_scissor_state(struct r300_context* r300,
- struct r300_scissor_state* scissor)
+static void r300_emit_scissor_regs(struct r300_context* r300,
+ struct r300_scissor_regs* scissor)
{
CS_LOCALS(r300);
BEGIN_CS(3);
OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
- OUT_CS(scissor->scissor_top_left);
- OUT_CS(scissor->scissor_bottom_right);
+ OUT_CS(scissor->top_left);
+ OUT_CS(scissor->bottom_right);
END_CS;
}
+void r300_emit_scissor_state(struct r300_context* r300,
+ struct r300_scissor_state* scissor)
+{
+ if (r300->rs_state->rs.scissor) {
+ r300_emit_scissor_regs(r300, &scissor->scissor);
+ } else {
+ r300_emit_scissor_regs(r300, &scissor->framebuffer);
+ }
+}
+
void r300_emit_texture(struct r300_context* r300,
struct r300_sampler_state* sampler,
struct r300_texture* tex,
unsigned offset)
{
+ uint32_t filter0 = sampler->filter0;
+ uint32_t format0 = tex->state.format0;
+ unsigned min_level, max_level;
CS_LOCALS(r300);
+ /* to emulate 1D textures through 2D ones correctly */
+ if (tex->tex.target == PIPE_TEXTURE_1D) {
+ filter0 &= ~R300_TX_WRAP_T_MASK;
+ filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE);
+ }
+
+ /* determine min/max levels */
+ /* the MAX_MIP level is the largest (finest) one */
+ max_level = MIN2(sampler->max_lod, tex->tex.last_level);
+ min_level = MIN2(sampler->min_lod, max_level);
+ format0 |= R300_TX_NUM_LEVELS(max_level);
+ filter0 |= R300_TX_MAX_MIP_LEVEL(min_level);
+
BEGIN_CS(16);
- OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), sampler->filter0);
+ OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), filter0 |
+ (offset << 28));
OUT_CS_REG(R300_TX_FILTER1_0 + (offset * 4), sampler->filter1);
OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (offset * 4), sampler->border_color);
- OUT_CS_REG(R300_TX_FORMAT0_0 + (offset * 4), tex->state.format0);
+ OUT_CS_REG(R300_TX_FORMAT0_0 + (offset * 4), format0);
OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1);
OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2);
OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1);
@@ -488,13 +700,76 @@ void r300_emit_texture(struct r300_context* r300,
END_CS;
}
-void r300_emit_vertex_buffer(struct r300_context* r300)
+static boolean r300_validate_aos(struct r300_context *r300)
+{
+ struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
+ struct pipe_vertex_element *velem = r300->vertex_element;
+ int i;
+
+ /* Check if formats and strides are aligned to the size of DWORD. */
+ for (i = 0; i < r300->vertex_element_count; i++) {
+ if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 ||
+ util_format_get_blocksize(velem[i].src_format) % 4 != 0) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+void r300_emit_aos(struct r300_context* r300, unsigned offset)
+{
+ struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer;
+ struct pipe_vertex_element *velem = r300->vertex_element;
+ int i;
+ unsigned size1, size2, aos_count = r300->vertex_element_count;
+ unsigned packet_size = (aos_count * 3 + 1) / 2;
+ CS_LOCALS(r300);
+
+ /* XXX Move this checking to a more approriate place. */
+ if (!r300_validate_aos(r300)) {
+ /* XXX We should fallback using Draw. */
+ assert(0);
+ }
+
+ BEGIN_CS(2 + packet_size + aos_count * 2);
+ OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
+ OUT_CS(aos_count);
+
+ for (i = 0; i < aos_count - 1; i += 2) {
+ vb1 = &vbuf[velem[i].vertex_buffer_index];
+ vb2 = &vbuf[velem[i+1].vertex_buffer_index];
+ size1 = util_format_get_blocksize(velem[i].src_format);
+ size2 = util_format_get_blocksize(velem[i+1].src_format);
+
+ OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) |
+ R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride));
+ OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
+ OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride);
+ }
+
+ if (aos_count & 1) {
+ vb1 = &vbuf[velem[i].vertex_buffer_index];
+ size1 = util_format_get_blocksize(velem[i].src_format);
+
+ OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride));
+ OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
+ }
+
+ for (i = 0; i < aos_count; i++) {
+ OUT_CS_RELOC_NO_OFFSET(vbuf[velem[i].vertex_buffer_index].buffer,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ }
+ END_CS;
+}
+
+#if 0
+void r300_emit_draw_packet(struct r300_context* r300)
{
CS_LOCALS(r300);
DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, "
"vertex size %d\n", r300->vbo,
- r300->vertex_info.vinfo.size);
+ r300->vertex_info->vinfo.size);
/* Set the pointer to our vertex buffer. The emitted values are this:
* PACKET3 [3D_LOAD_VBPNTR]
* COUNT [1]
@@ -505,54 +780,66 @@ void r300_emit_vertex_buffer(struct r300_context* r300)
BEGIN_CS(7);
OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3);
OUT_CS(1);
- OUT_CS(r300->vertex_info.vinfo.size |
- (r300->vertex_info.vinfo.size << 8));
+ OUT_CS(r300->vertex_info->vinfo.size |
+ (r300->vertex_info->vinfo.size << 8));
OUT_CS(r300->vbo_offset);
OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0);
END_CS;
}
+#endif
void r300_emit_vertex_format_state(struct r300_context* r300)
{
int i;
CS_LOCALS(r300);
+ DBG(r300, DBG_DRAW, "r300: VAP/PSC emit:\n");
+
BEGIN_CS(26);
- OUT_CS_REG(R300_VAP_VTX_SIZE, r300->vertex_info.vinfo.size);
+ OUT_CS_REG(R300_VAP_VTX_SIZE, r300->vertex_info->vinfo.size);
OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2);
- OUT_CS(r300->vertex_info.vinfo.hwfmt[0]);
- OUT_CS(r300->vertex_info.vinfo.hwfmt[1]);
+ OUT_CS(r300->vertex_info->vinfo.hwfmt[0]);
+ OUT_CS(r300->vertex_info->vinfo.hwfmt[1]);
OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
- OUT_CS(r300->vertex_info.vinfo.hwfmt[2]);
- OUT_CS(r300->vertex_info.vinfo.hwfmt[3]);
- /* for (i = 0; i < 4; i++) {
- * debug_printf("hwfmt%d: 0x%08x\n", i,
- * r300->vertex_info.vinfo.hwfmt[i]);
- * } */
+ OUT_CS(r300->vertex_info->vinfo.hwfmt[2]);
+ OUT_CS(r300->vertex_info->vinfo.hwfmt[3]);
+ for (i = 0; i < 4; i++) {
+ DBG(r300, DBG_DRAW, " : hwfmt%d: 0x%08x\n", i,
+ r300->vertex_info->vinfo.hwfmt[i]);
+ }
OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, 8);
for (i = 0; i < 8; i++) {
- OUT_CS(r300->vertex_info.vap_prog_stream_cntl[i]);
- /* debug_printf("prog_stream_cntl%d: 0x%08x\n", i,
- * r300->vertex_info.vap_prog_stream_cntl[i]); */
+ OUT_CS(r300->vertex_info->vap_prog_stream_cntl[i]);
+ DBG(r300, DBG_DRAW, " : prog_stream_cntl%d: 0x%08x\n", i,
+ r300->vertex_info->vap_prog_stream_cntl[i]);
}
OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, 8);
for (i = 0; i < 8; i++) {
- OUT_CS(r300->vertex_info.vap_prog_stream_cntl_ext[i]);
- /* debug_printf("prog_stream_cntl_ext%d: 0x%08x\n", i,
- * r300->vertex_info.vap_prog_stream_cntl_ext[i]); */
+ OUT_CS(r300->vertex_info->vap_prog_stream_cntl_ext[i]);
+ DBG(r300, DBG_DRAW, " : prog_stream_cntl_ext%d: 0x%08x\n", i,
+ r300->vertex_info->vap_prog_stream_cntl_ext[i]);
}
END_CS;
}
+
void r300_emit_vertex_program_code(struct r300_context* r300,
- struct r300_vertex_program_code* code,
- struct r300_constant_buffer* constants)
+ struct r300_vertex_program_code* code)
{
int i;
struct r300_screen* r300screen = r300_screen(r300->context.screen);
unsigned instruction_count = code->length / 4;
+
+ int vtx_mem_size = r300screen->caps->is_r500 ? 128 : 72;
+ int input_count = MAX2(util_bitcount(code->InputsRead), 1);
+ int output_count = MAX2(util_bitcount(code->OutputsWritten), 1);
+ int temp_count = MAX2(code->num_temporaries, 1);
+ int pvs_num_slots = MIN3(vtx_mem_size / input_count,
+ vtx_mem_size / output_count, 10);
+ int pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 6);
+
CS_LOCALS(r300);
if (!r300screen->caps->has_tcl) {
@@ -561,17 +848,11 @@ void r300_emit_vertex_program_code(struct r300_context* r300,
return;
}
- if (code->constants.Count) {
- BEGIN_CS(14 + code->length + (code->constants.Count * 4));
- } else {
- BEGIN_CS(11 + code->length);
- }
-
+ BEGIN_CS(9 + code->length);
/* R300_VAP_PVS_CODE_CNTL_0
* R300_VAP_PVS_CONST_CNTL
* R300_VAP_PVS_CODE_CNTL_1
- * See the r5xx docs for instructions on how to use these.
- * XXX these could be optimized to select better values... */
+ * See the r5xx docs for instructions on how to use these. */
OUT_CS_REG_SEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
OUT_CS(R300_PVS_FIRST_INST(0) |
R300_PVS_XYZW_VALID_INST(instruction_count - 1) |
@@ -584,32 +865,51 @@ void r300_emit_vertex_program_code(struct r300_context* r300,
for (i = 0; i < code->length; i++)
OUT_CS(code->body.d[i]);
- if (code->constants.Count) {
- OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG,
- (r300screen->caps->is_r500 ?
- R500_PVS_CONST_START : R300_PVS_CONST_START));
- OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->constants.Count * 4);
- for (i = 0; i < code->constants.Count; i++) {
- const float * data = get_shader_constant(r300, &code->constants.Constants[i], constants);
- OUT_CS_32F(data[0]);
- OUT_CS_32F(data[1]);
- OUT_CS_32F(data[2]);
- OUT_CS_32F(data[3]);
- }
- }
-
- OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(10) |
- R300_PVS_NUM_CNTLRS(5) |
+ OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) |
+ R300_PVS_NUM_CNTLRS(pvs_num_controllers) |
R300_PVS_NUM_FPUS(r300screen->caps->num_vert_fpus) |
- R300_PVS_VF_MAX_VTX_NUM(12));
- OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
+ R300_PVS_VF_MAX_VTX_NUM(12) |
+ (r300screen->caps->is_r500 ? R500_TCL_STATE_OPTIMIZATION : 0));
END_CS;
}
void r300_emit_vertex_shader(struct r300_context* r300,
struct r300_vertex_shader* vs)
{
- r300_emit_vertex_program_code(r300, &vs->code, &r300->shader_constants[PIPE_SHADER_VERTEX]);
+ r300_emit_vertex_program_code(r300, &vs->code);
+}
+
+void r300_emit_vs_constant_buffer(struct r300_context* r300,
+ struct rc_constant_list* constants)
+{
+ int i;
+ struct r300_screen* r300screen = r300_screen(r300->context.screen);
+ CS_LOCALS(r300);
+
+ if (!r300screen->caps->has_tcl) {
+ debug_printf("r300: Implementation error: emit_vertex_shader called,"
+ " but has_tcl is FALSE!\n");
+ return;
+ }
+
+ if (constants->Count == 0)
+ return;
+
+ BEGIN_CS(constants->Count * 4 + 3);
+ OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG,
+ (r300screen->caps->is_r500 ?
+ R500_PVS_CONST_START : R300_PVS_CONST_START));
+ OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, constants->Count * 4);
+ for (i = 0; i < constants->Count; i++) {
+ const float * data = get_shader_constant(r300,
+ &constants->Constants[i],
+ &r300->shader_constants[PIPE_SHADER_VERTEX]);
+ OUT_CS_32F(data[0]);
+ OUT_CS_32F(data[1]);
+ OUT_CS_32F(data[2]);
+ OUT_CS_32F(data[3]);
+ }
+ END_CS;
}
void r300_emit_viewport_state(struct r300_context* r300,
@@ -634,13 +934,42 @@ void r300_emit_viewport_state(struct r300_context* r300,
END_CS;
}
+void r300_emit_texture_count(struct r300_context* r300)
+{
+ uint32_t tx_enable = 0;
+ int i;
+ CS_LOCALS(r300);
+
+ /* Notice that texture_count and sampler_count are just sizes
+ * of the respective arrays. We still have to check for the individual
+ * elements. */
+ for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) {
+ if (r300->textures[i]) {
+ tx_enable |= 1 << i;
+ }
+ }
+
+ BEGIN_CS(2);
+ OUT_CS_REG(R300_TX_ENABLE, tx_enable);
+ END_CS;
+
+}
+
void r300_flush_textures(struct r300_context* r300)
{
CS_LOCALS(r300);
- BEGIN_CS(4);
+ BEGIN_CS(2);
OUT_CS_REG(R300_TX_INVALTAGS, 0);
- OUT_CS_REG(R300_TX_ENABLE, (1 << r300->texture_count) - 1);
+ END_CS;
+}
+
+static void r300_flush_pvs(struct r300_context* r300)
+{
+ CS_LOCALS(r300);
+
+ BEGIN_CS(2);
+ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
END_CS;
}
@@ -656,9 +985,17 @@ void r300_emit_dirty_state(struct r300_context* r300)
return;
}
- r300_update_derived_state(r300);
+ /* Check size of CS. */
+ /* Make sure we have at least 8*1024 spare dwords. */
+ /* XXX It would be nice to know the number of dwords we really need to
+ * XXX emit. */
+ if (!r300->winsys->check_cs(r300->winsys, 8*1024)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ }
+
+ /* Clean out BOs. */
+ r300->winsys->reset_bos(r300->winsys);
- /* XXX check size */
validate:
/* Color buffers... */
for (i = 0; i < r300->framebuffer_state.nr_cbufs; i++) {
@@ -683,7 +1020,8 @@ validate:
/* ...textures... */
for (i = 0; i < r300->texture_count; i++) {
tex = r300->textures[i];
- assert(tex && tex->buffer && "texture is marked, but NULL!");
+ if (!tex)
+ continue;
if (!r300->winsys->add_buffer(r300->winsys, tex->buffer,
RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) {
r300->context.flush(&r300->context, 0, NULL);
@@ -704,7 +1042,7 @@ validate:
goto validate;
}
} else {
- debug_printf("No VBO while emitting dirty state!\n");
+ /* debug_printf("No VBO while emitting dirty state!\n"); */
}
if (!r300->winsys->validate(r300->winsys)) {
r300->context.flush(&r300->context, 0, NULL);
@@ -717,6 +1055,11 @@ validate:
goto validate;
}
+ if (r300->dirty_state & R300_NEW_QUERY) {
+ r300_emit_query_start(r300);
+ r300->dirty_state &= ~R300_NEW_QUERY;
+ }
+
if (r300->dirty_state & R300_NEW_BLEND) {
r300_emit_blend_state(r300, r300->blend_state);
r300->dirty_state &= ~R300_NEW_BLEND;
@@ -738,14 +1081,26 @@ validate:
}
if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) {
+ r300_emit_fragment_depth_config(r300, r300->fs);
if (r300screen->caps->is_r500) {
- r500_emit_fragment_program_code(r300, &r300->fs->code, &r300->shader_constants[PIPE_SHADER_FRAGMENT]);
+ r500_emit_fragment_program_code(r300, &r300->fs->shader->code);
} else {
- r300_emit_fragment_program_code(r300, &r300->fs->code, &r300->shader_constants[PIPE_SHADER_FRAGMENT]);
+ r300_emit_fragment_program_code(r300, &r300->fs->shader->code);
}
r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER;
}
+ if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER_CONSTANTS) {
+ if (r300screen->caps->is_r500) {
+ r500_emit_fs_constant_buffer(r300,
+ &r300->fs->shader->code.constants);
+ } else {
+ r300_emit_fs_constant_buffer(r300,
+ &r300->fs->shader->code.constants);
+ }
+ r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER_CONSTANTS;
+ }
+
if (r300->dirty_state & R300_NEW_FRAMEBUFFERS) {
r300_emit_fb_state(r300, &r300->framebuffer_state);
r300->dirty_state &= ~R300_NEW_FRAMEBUFFERS;
@@ -769,13 +1124,16 @@ validate:
/* Samplers and textures are tracked separately but emitted together. */
if (r300->dirty_state &
(R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) {
+ r300_emit_texture_count(r300);
+
for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) {
- if (r300->dirty_state &
- ((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i))) {
- r300_emit_texture(r300,
- r300->sampler_states[i],
- r300->textures[i],
- i);
+ if (r300->dirty_state &
+ ((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i))) {
+ if (r300->textures[i])
+ r300_emit_texture(r300,
+ r300->sampler_states[i],
+ r300->textures[i],
+ i);
r300->dirty_state &=
~((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i));
dirty_tex++;
@@ -798,17 +1156,26 @@ validate:
r300->dirty_state &= ~R300_NEW_VERTEX_FORMAT;
}
+ if (r300->dirty_state & (R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS)) {
+ r300_flush_pvs(r300);
+ }
+
if (r300->dirty_state & R300_NEW_VERTEX_SHADER) {
r300_emit_vertex_shader(r300, r300->vs);
r300->dirty_state &= ~R300_NEW_VERTEX_SHADER;
}
+ if (r300->dirty_state & R300_NEW_VERTEX_SHADER_CONSTANTS) {
+ r300_emit_vs_constant_buffer(r300, &r300->vs->code.constants);
+ r300->dirty_state &= ~R300_NEW_VERTEX_SHADER_CONSTANTS;
+ }
+
/* XXX
assert(r300->dirty_state == 0);
*/
/* Finally, emit the VBO. */
- r300_emit_vertex_buffer(r300);
+ /* r300_emit_vertex_buffer(r300); */
r300->dirty_hw++;
}
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index c4002b8e5d0..3797d3d332a 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -23,16 +23,14 @@
#ifndef R300_EMIT_H
#define R300_EMIT_H
-#include "util/u_math.h"
-
#include "r300_context.h"
-#include "r300_cs.h"
-#include "r300_screen.h"
-#include "r300_state_inlines.h"
+#include "radeon_code.h"
struct rX00_fragment_program_code;
struct r300_vertex_program_code;
+void r300_emit_aos(struct r300_context* r300, unsigned offset);
+
void r300_emit_blend_state(struct r300_context* r300,
struct r300_blend_state* blend);
@@ -46,20 +44,24 @@ void r300_emit_dsa_state(struct r300_context* r300,
struct r300_dsa_state* dsa);
void r300_emit_fragment_program_code(struct r300_context* r300,
- struct rX00_fragment_program_code* generic_code,
- struct r300_constant_buffer* externals);
+ struct rX00_fragment_program_code* generic_code);
+
+void r300_emit_fs_constant_buffer(struct r300_context* r300,
+ struct rc_constant_list* constants);
void r500_emit_fragment_program_code(struct r300_context* r300,
- struct rX00_fragment_program_code* generic_code,
- struct r300_constant_buffer* externals);
+ struct rX00_fragment_program_code* generic_code);
+
+void r500_emit_fs_constant_buffer(struct r300_context* r300,
+ struct rc_constant_list* constants);
void r300_emit_fb_state(struct r300_context* r300,
struct pipe_framebuffer_state* fb);
void r300_emit_query_begin(struct r300_context* r300,
struct r300_query* query);
-void r300_emit_query_end(struct r300_context* r300,
- struct r300_query* query);
+
+void r300_emit_query_end(struct r300_context* r300);
void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs);
@@ -79,8 +81,10 @@ void r300_emit_vertex_buffer(struct r300_context* r300);
void r300_emit_vertex_format_state(struct r300_context* r300);
void r300_emit_vertex_program_code(struct r300_context* r300,
- struct r300_vertex_program_code* code,
- struct r300_constant_buffer* constants);
+ struct r300_vertex_program_code* code);
+
+void r300_emit_vs_constant_buffer(struct r300_context* r300,
+ struct rc_constant_list* constants);
void r300_emit_vertex_shader(struct r300_context* r300,
struct r300_vertex_shader* vs);
@@ -88,6 +92,8 @@ void r300_emit_vertex_shader(struct r300_context* r300,
void r300_emit_viewport_state(struct r300_context* r300,
struct r300_viewport_state* viewport);
+void r300_emit_texture_count(struct r300_context* r300);
+
void r300_flush_textures(struct r300_context* r300);
/* Emit all dirty state. */
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index 0dff1c6f4fb..14a08241fc4 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -20,29 +20,48 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+
+#include "util/u_simple_list.h"
+
+#include "r300_context.h"
+#include "r300_cs.h"
+#include "r300_emit.h"
#include "r300_flush.h"
+#include "r300_state_invariant.h"
static void r300_flush(struct pipe_context* pipe,
unsigned flags,
struct pipe_fence_handle** fence)
{
- struct r300_context* r300 = r300_context(pipe);
- CS_LOCALS(r300);
+ struct r300_context *r300 = r300_context(pipe);
+ struct r300_query *query;
+ CS_LOCALS(r300);
/* We probably need to flush Draw, but we may have been called from
- * within Draw. This feels kludgy, but it might be the best thing. */
- if (!r300->draw->flushing) {
+ * within Draw. This feels kludgy, but it might be the best thing.
+ *
+ * Of course, the best thing is to kill Draw with fire. :3 */
+ if (r300->draw && !r300->draw->flushing) {
draw_flush(r300->draw);
}
+ r300_emit_query_end(r300);
+
if (r300->dirty_hw) {
FLUSH_CS;
r300_emit_invariant_state(r300);
r300->dirty_state = R300_NEW_KITCHEN_SINK;
r300->dirty_hw = 0;
}
+ /* reset flushed query */
+ foreach(query, &r300->query_list) {
+ query->flushed = TRUE;
+ }
}
+
void r300_init_flush_functions(struct r300_context* r300)
{
r300->context.flush = r300_flush;
diff --git a/src/gallium/drivers/r300/r300_flush.h b/src/gallium/drivers/r300/r300_flush.h
index 9a83d89daab..0e9e6106bb7 100644
--- a/src/gallium/drivers/r300/r300_flush.h
+++ b/src/gallium/drivers/r300/r300_flush.h
@@ -23,13 +23,6 @@
#ifndef R300_FLUSH_H
#define R300_FLUSH_H
-#include "draw/draw_private.h"
-
-#include "pipe/p_context.h"
-
-#include "r300_context.h"
-#include "r300_cs.h"
-
void r300_init_flush_functions(struct r300_context* r300);
#endif /* R300_FLUSH_H */
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 546ad545a53..60ea9c171d5 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -1,6 +1,7 @@
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Joakim Sindholt <opensource@zhasha.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -21,12 +22,58 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
-#include "r300_fs.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "tgsi/tgsi_dump.h"
+#include "r300_context.h"
+#include "r300_screen.h"
+#include "r300_fs.h"
#include "r300_tgsi_to_rc.h"
+#include "radeon_code.h"
#include "radeon_compiler.h"
+/* Convert info about FS input semantics to r300_shader_semantics. */
+void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
+ struct r300_shader_semantics* fs_inputs)
+{
+ int i;
+ unsigned index;
+
+ r300_shader_semantics_reset(fs_inputs);
+
+ for (i = 0; i < info->num_inputs; i++) {
+ index = info->input_semantic_index[i];
+
+ switch (info->input_semantic_name[i]) {
+ case TGSI_SEMANTIC_COLOR:
+ assert(index <= ATTR_COLOR_COUNT);
+ fs_inputs->color[index] = i;
+ break;
+
+ case TGSI_SEMANTIC_GENERIC:
+ assert(index <= ATTR_GENERIC_COUNT);
+ fs_inputs->generic[index] = i;
+ break;
+
+ case TGSI_SEMANTIC_FOG:
+ assert(index == 0);
+ fs_inputs->fog = i;
+ break;
+
+ case TGSI_SEMANTIC_POSITION:
+ assert(index == 0);
+ fs_inputs->wpos = i;
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+}
+
static void find_output_registers(struct r300_fragment_program_compiler * compiler,
struct r300_fragment_shader * fs)
{
@@ -54,61 +101,69 @@ static void allocate_hardware_inputs(
void (*allocate)(void * data, unsigned input, unsigned hwreg),
void * mydata)
{
- struct tgsi_shader_info* info = &((struct r300_fragment_shader*)c->UserData)->info;
- int total_colors = 0;
- int colors = 0;
- int total_generic = 0;
- int generic = 0;
- int i;
-
- for (i = 0; i < info->num_inputs; i++) {
- switch (info->input_semantic_name[i]) {
- case TGSI_SEMANTIC_COLOR:
- total_colors++;
- break;
- case TGSI_SEMANTIC_FOG:
- case TGSI_SEMANTIC_GENERIC:
- total_generic++;
- break;
+ struct r300_shader_semantics* inputs =
+ (struct r300_shader_semantics*)c->UserData;
+ int i, reg = 0;
+
+ /* Allocate input registers. */
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ if (inputs->color[i] != ATTR_UNUSED) {
+ allocate(mydata, inputs->color[i], reg++);
+ }
+ }
+ for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+ if (inputs->generic[i] != ATTR_UNUSED) {
+ allocate(mydata, inputs->generic[i], reg++);
}
}
+ if (inputs->fog != ATTR_UNUSED) {
+ allocate(mydata, inputs->fog, reg++);
+ }
+ if (inputs->wpos != ATTR_UNUSED) {
+ allocate(mydata, inputs->wpos, reg++);
+ }
+}
- for(i = 0; i < info->num_inputs; i++) {
- switch (info->input_semantic_name[i]) {
- case TGSI_SEMANTIC_COLOR:
- allocate(mydata, i, colors);
- colors++;
- break;
- case TGSI_SEMANTIC_FOG:
- case TGSI_SEMANTIC_GENERIC:
- allocate(mydata, i, total_colors + generic);
- generic++;
- break;
+static void get_compare_state(
+ struct r300_context* r300,
+ struct r300_fragment_program_external_state* state,
+ unsigned shadow_samplers)
+{
+ memset(state, 0, sizeof(*state));
+
+ for (int i = 0; i < r300->sampler_count; i++) {
+ struct r300_sampler_state* s = r300->sampler_states[i];
+
+ if (s && s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ /* XXX Gallium doesn't provide us with any information regarding
+ * this mode, so we are screwed. I'm setting 0 = LUMINANCE. */
+ state->unit[i].depth_texture_mode = 0;
+
+ /* Fortunately, no need to translate this. */
+ state->unit[i].texture_compare_func = s->state.compare_func;
}
}
}
-void r300_translate_fragment_shader(struct r300_context* r300,
- struct r300_fragment_shader* fs)
+static void r300_translate_fragment_shader(
+ struct r300_context* r300,
+ struct r300_fragment_shader_code* shader)
{
+ struct r300_fragment_shader* fs = r300->fs;
struct r300_fragment_program_compiler compiler;
struct tgsi_to_rc ttr;
+ int wpos = fs->inputs.wpos;
+ /* Setup the compiler. */
memset(&compiler, 0, sizeof(compiler));
rc_init(&compiler.Base);
compiler.Base.Debug = DBG_ON(r300, DBG_FP);
- compiler.code = &fs->code;
+ compiler.code = &shader->code;
+ compiler.state = shader->compare_state;
compiler.is_r500 = r300_screen(r300->context.screen)->caps->is_r500;
compiler.AllocateHwInputs = &allocate_hardware_inputs;
- compiler.UserData = fs;
-
- /* TODO: Program compilation depends on texture compare modes,
- * which are sampler state. Therefore, programs need to be recompiled
- * depending on this state as in the classic Mesa driver.
- *
- * This is not yet handled correctly.
- */
+ compiler.UserData = &fs->inputs;
find_output_registers(&compiler, fs);
@@ -123,20 +178,76 @@ void r300_translate_fragment_shader(struct r300_context* r300,
r300_tgsi_to_rc(&ttr, fs->state.tokens);
+ fs->shadow_samplers = compiler.Base.Program.ShadowSamplers;
+
+ /**
+ * Transform the program to support WPOS.
+ *
+ * Introduce a small fragment at the start of the program that will be
+ * the only code that directly reads the WPOS input.
+ * All other code pieces that reference that input will be rewritten
+ * to read from a newly allocated temporary. */
+ if (wpos != ATTR_UNUSED) {
+ /* Moving the input to some other reg is not really necessary. */
+ rc_transform_fragment_wpos(&compiler.Base, wpos, wpos, TRUE);
+ }
+
/* Invoke the compiler */
r3xx_compile_fragment_program(&compiler);
if (compiler.Base.Error) {
- /* Todo: Fallback to software rendering gracefully? */
- fprintf(stderr, "r300 FP: Compiler error: %s\n", compiler.Base.ErrorMsg);
-
- if (compiler.is_r500) {
- memcpy(compiler.code, &r5xx_passthrough_fragment_shader, sizeof(r5xx_passthrough_fragment_shader));
- } else {
- memcpy(compiler.code, &r3xx_passthrough_fragment_shader, sizeof(r3xx_passthrough_fragment_shader));
- }
+ /* XXX failover maybe? */
+ DBG(r300, DBG_FP, "r300: Error compiling fragment program: %s\n",
+ compiler.Base.ErrorMsg);
+ assert(0);
}
/* And, finally... */
rc_destroy(&compiler.Base);
- fs->translated = TRUE;
+}
+
+boolean r300_pick_fragment_shader(struct r300_context* r300)
+{
+ struct r300_fragment_shader* fs = r300->fs;
+ struct r300_fragment_program_external_state state;
+ struct r300_fragment_shader_code* ptr;
+
+ if (!fs->first) {
+ /* Build the fragment shader for the first time. */
+ fs->first = fs->shader = CALLOC_STRUCT(r300_fragment_shader_code);
+
+ /* BTW shadow samplers will be known after the first translation,
+ * therefore we set ~0, which means it should look at all sampler
+ * states. This choice doesn't have any impact on the correctness. */
+ get_compare_state(r300, &fs->shader->compare_state, ~0);
+ r300_translate_fragment_shader(r300, fs->shader);
+ return TRUE;
+
+ } else if (fs->shadow_samplers) {
+ get_compare_state(r300, &state, fs->shadow_samplers);
+
+ /* Check if the currently-bound shader has been compiled
+ * with the texture-compare state we need. */
+ if (memcmp(&fs->shader->compare_state, &state, sizeof(state)) != 0) {
+ /* Search for the right shader. */
+ ptr = fs->first;
+ while (ptr) {
+ if (memcmp(&ptr->compare_state, &state, sizeof(state)) == 0) {
+ fs->shader = ptr;
+ return TRUE;
+ }
+ ptr = ptr->next;
+ }
+
+ /* Not found, gotta compile a new one. */
+ ptr = CALLOC_STRUCT(r300_fragment_shader_code);
+ ptr->next = fs->first;
+ fs->first = fs->shader = ptr;
+
+ ptr->compare_state = state;
+ r300_translate_fragment_shader(r300, ptr);
+ return TRUE;
+ }
+ }
+
+ return FALSE;
}
diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h
index 967e9f697e9..40ce874353c 100644
--- a/src/gallium/drivers/r300/r300_fs.h
+++ b/src/gallium/drivers/r300/r300_fs.h
@@ -1,6 +1,7 @@
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Joakim Sindholt <opensource@zhasha.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -24,28 +25,47 @@
#ifndef R300_FS_H
#define R300_FS_H
-#include "tgsi/tgsi_dump.h"
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+#include "radeon_code.h"
+#include "r300_shader_semantics.h"
-#include "r300_context.h"
-#include "r3xx_fs.h"
-#include "r5xx_fs.h"
+struct r300_fragment_shader_code {
+ struct r300_fragment_program_external_state compare_state;
+ struct rX00_fragment_program_code code;
-#include "radeon_code.h"
+ struct r300_fragment_shader_code* next;
+};
struct r300_fragment_shader {
/* Parent class */
struct pipe_shader_state state;
+
struct tgsi_shader_info info;
+ struct r300_shader_semantics inputs;
- /* Has this shader been translated yet? */
- boolean translated;
+ /* Bits 0-15: TRUE if it's a shadow sampler, FALSE otherwise. */
+ unsigned shadow_samplers;
- /* Compiled code */
- struct rX00_fragment_program_code code;
+ /* Currently-bound fragment shader. */
+ struct r300_fragment_shader_code* shader;
+
+ /* List of the same shaders compiled with different texture-compare
+ * states. */
+ struct r300_fragment_shader_code* first;
};
+void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
+ struct r300_shader_semantics* fs_inputs);
+
+/* Return TRUE if the shader was switched and should be re-emitted. */
+boolean r300_pick_fragment_shader(struct r300_context* r300);
-void r300_translate_fragment_shader(struct r300_context* r300,
- struct r300_fragment_shader* fs);
+static INLINE boolean r300_fragment_shader_writes_depth(struct r300_fragment_shader *fs)
+{
+ if (!fs)
+ return FALSE;
+ return (fs->shader->code.writes_depth) ? TRUE : FALSE;
+}
#endif /* R300_FS_H */
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 2880d34877f..ca00b043c51 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -20,17 +20,23 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
-#include "r300_query.h"
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
+#include "r300_context.h"
+#include "r300_screen.h"
+#include "r300_cs.h"
#include "r300_emit.h"
+#include "r300_query.h"
+#include "r300_reg.h"
-static struct pipe_query* r300_create_query(struct pipe_context* pipe,
+static struct pipe_query *r300_create_query(struct pipe_context *pipe,
unsigned query_type)
{
- struct r300_context* r300 = r300_context(pipe);
- struct r300_screen* r300screen = r300_screen(r300->context.screen);
- unsigned query_size = r300screen->caps->num_frag_pipes * 4;
- struct r300_query* q, * qptr;
+ struct r300_context *r300 = r300_context(pipe);
+ struct r300_screen *r300screen = r300_screen(r300->context.screen);
+ unsigned query_size;
+ struct r300_query *q, *qptr;
q = CALLOC_STRUCT(r300_query);
@@ -39,13 +45,16 @@ static struct pipe_query* r300_create_query(struct pipe_context* pipe,
q->active = FALSE;
- if (!r300->query_list) {
- r300->query_list = q;
- } else if (!is_empty_list(r300->query_list)) {
- qptr = last_elem(r300->query_list);
+ if (r300screen->caps->family == CHIP_FAMILY_RV530)
+ query_size = r300screen->caps->num_z_pipes * sizeof(uint32_t);
+ else
+ query_size = r300screen->caps->num_frag_pipes * sizeof(uint32_t);
+
+ if (!is_empty_list(&r300->query_list)) {
+ qptr = last_elem(&r300->query_list);
q->offset = qptr->offset + query_size;
- insert_at_tail(r300->query_list, q);
}
+ insert_at_tail(&r300->query_list, q);
/* XXX */
if (q->offset >= 4096) {
@@ -71,24 +80,26 @@ static void r300_begin_query(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
struct r300_query* q = (struct r300_query*)query;
+ assert(r300->query_current == NULL);
+
map = pipe->screen->buffer_map(pipe->screen, r300->oqbo,
PIPE_BUFFER_USAGE_CPU_WRITE);
map += q->offset / 4;
- *map = ~0;
+ *map = ~0U;
pipe->screen->buffer_unmap(pipe->screen, r300->oqbo);
- r300_emit_dirty_state(r300);
- r300_emit_query_begin(r300, q);
+ q->flushed = FALSE;
+ r300->query_current = q;
+ r300->dirty_state |= R300_NEW_QUERY;
}
static void r300_end_query(struct pipe_context* pipe,
- struct pipe_query* query)
+ struct pipe_query* query)
{
struct r300_context* r300 = r300_context(pipe);
- struct r300_query* q = (struct r300_query*)query;
- r300_emit_dirty_state(r300);
- r300_emit_query_end(r300, q);
+ r300_emit_query_end(r300);
+ r300->query_current = NULL;
}
static boolean r300_get_query_result(struct pipe_context* pipe,
@@ -98,28 +109,36 @@ static boolean r300_get_query_result(struct pipe_context* pipe,
{
struct r300_context* r300 = r300_context(pipe);
struct r300_screen* r300screen = r300_screen(r300->context.screen);
- struct r300_query* q = (struct r300_query*)query;
+ struct r300_query *q = (struct r300_query*)query;
unsigned flags = PIPE_BUFFER_USAGE_CPU_READ;
uint32_t* map;
- uint32_t temp;
- unsigned i;
+ uint32_t temp = 0;
+ unsigned i, num_results;
- if (wait) {
+ if (q->flushed == FALSE)
pipe->flush(pipe, 0, NULL);
- } else {
+ if (!wait) {
flags |= PIPE_BUFFER_USAGE_DONTBLOCK;
}
map = pipe->screen->buffer_map(pipe->screen, r300->oqbo, flags);
+ if (!map)
+ return FALSE;
map += q->offset / 4;
- for (i = 0; i < r300screen->caps->num_frag_pipes; i++) {
- if (*map == ~0) {
+
+ if (r300screen->caps->family == CHIP_FAMILY_RV530)
+ num_results = r300screen->caps->num_z_pipes;
+ else
+ num_results = r300screen->caps->num_frag_pipes;
+
+ for (i = 0; i < num_results; i++) {
+ if (*map == ~0U) {
/* Looks like our results aren't ready yet. */
if (wait) {
debug_printf("r300: Despite waiting, OQ results haven't"
" come in yet.\n");
}
- temp = ~0;
+ temp = ~0U;
break;
}
temp += *map;
@@ -127,7 +146,7 @@ static boolean r300_get_query_result(struct pipe_context* pipe,
}
pipe->screen->buffer_unmap(pipe->screen, r300->oqbo);
- if (temp == ~0) {
+ if (temp == ~0U) {
/* Our results haven't been written yet... */
return FALSE;
}
diff --git a/src/gallium/drivers/r300/r300_query.h b/src/gallium/drivers/r300/r300_query.h
index 4f50e8f8440..48876da3123 100644
--- a/src/gallium/drivers/r300/r300_query.h
+++ b/src/gallium/drivers/r300/r300_query.h
@@ -23,10 +23,6 @@
#ifndef R300_QUERY_H
#define R300_QUERY_H
-#include "r300_context.h"
-#include "r300_cs.h"
-#include "r300_reg.h"
-
struct r300_context;
static INLINE struct r300_query* r300_query(struct pipe_query* q)
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 3abff5db622..034bfc15cf9 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -348,6 +348,27 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_WRITE_ENA_W 8
# define R300_SWIZZLE1_SHIFT 16
+# define R300_VAP_SWIZZLE_X001 \
+ ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \
+ (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Y_SHIFT) | \
+ (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | \
+ (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \
+ (0xf << R300_WRITE_ENA_SHIFT))
+
+# define R300_VAP_SWIZZLE_XY01 \
+ ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \
+ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \
+ (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | \
+ (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \
+ (0xf << R300_WRITE_ENA_SHIFT))
+
+# define R300_VAP_SWIZZLE_XYZ1 \
+ ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \
+ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \
+ (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | \
+ (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \
+ (0xf << R300_WRITE_ENA_SHIFT))
+
# define R300_VAP_SWIZZLE_XYZW \
((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \
(R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \
@@ -640,20 +661,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_GB_SUPER_TILE_B (1 << 15)
# define R300_GB_SUBPIXEL_1_12 (0 << 16)
# define R300_GB_SUBPIXEL_1_16 (1 << 16)
-# define GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17)
-# define GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17)
-# define GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17)
-# define GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17)
-# define GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19)
-# define GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19)
-# define GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20)
-# define GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20)
-# define GB_TILE_CONFIG_ALT_OFFSET (0 << 21)
-# define GB_TILE_CONFIG_SUBPRECISION (0 << 22)
-# define GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23)
-# define GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23)
-# define GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24)
-# define GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24)
+# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17)
+# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17)
+# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17)
+# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17)
+# define R300_GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19)
+# define R300_GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19)
+# define R300_GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20)
+# define R300_GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20)
+# define R300_GB_TILE_CONFIG_ALT_OFFSET (0 << 21)
+# define R300_GB_TILE_CONFIG_SUBPRECISION (0 << 22)
+# define R300_GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23)
+# define R300_GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23)
+# define R300_GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24)
+# define R300_GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24)
/* Specifies the sizes of the various FIFO`s in the sc/rs/us. This register must be the first one written */
#define R300_GB_FIFO_SIZE 0x4024
@@ -679,9 +700,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */
# define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24
-#define GB_Z_PEQ_CONFIG 0x4028
-# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0)
-# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0)
+#define R300_GB_Z_PEQ_CONFIG 0x4028
+# define R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0)
+# define R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0)
/* Specifies various polygon specific selects (fog, depth, perspective). */
#define R300_GB_SELECT 0x401c
@@ -704,39 +725,39 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* Specifies the graphics pipeline configuration for antialiasing. */
#define R300_GB_AA_CONFIG 0x4020
-# define GB_AA_CONFIG_AA_DISABLE (0 << 0)
-# define GB_AA_CONFIG_AA_ENABLE (1 << 0)
-# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1)
-# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1)
-# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1)
-# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1)
+# define R300_GB_AA_CONFIG_AA_DISABLE (0 << 0)
+# define R300_GB_AA_CONFIG_AA_ENABLE (1 << 0)
+# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1)
+# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1)
+# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1)
+# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1)
/* Selects which of 4 pipes are active. */
-#define GB_PIPE_SELECT 0x402c
-# define GB_PIPE_SELECT_PIPE0_ID_SHIFT 0
-# define GB_PIPE_SELECT_PIPE1_ID_SHIFT 2
-# define GB_PIPE_SELECT_PIPE2_ID_SHIFT 4
-# define GB_PIPE_SELECT_PIPE3_ID_SHIFT 6
-# define GB_PIPE_SELECT_PIPE_MASK_SHIFT 8
-# define GB_PIPE_SELECT_MAX_PIPE 12
-# define GB_PIPE_SELECT_BAD_PIPES 14
-# define GB_PIPE_SELECT_CONFIG_PIPES 18
+#define R300_GB_PIPE_SELECT 0x402c
+# define R300_GB_PIPE_SELECT_PIPE0_ID_SHIFT 0
+# define R300_GB_PIPE_SELECT_PIPE1_ID_SHIFT 2
+# define R300_GB_PIPE_SELECT_PIPE2_ID_SHIFT 4
+# define R300_GB_PIPE_SELECT_PIPE3_ID_SHIFT 6
+# define R300_GB_PIPE_SELECT_PIPE_MASK_SHIFT 8
+# define R300_GB_PIPE_SELECT_MAX_PIPE 12
+# define R300_GB_PIPE_SELECT_BAD_PIPES 14
+# define R300_GB_PIPE_SELECT_CONFIG_PIPES 18
/* Specifies the sizes of the various FIFO`s in the sc/rs. */
-#define GB_FIFO_SIZE1 0x4070
+#define R300_GB_FIFO_SIZE1 0x4070
/* High water mark for SC input fifo */
-# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0
-# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f
+# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0
+# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f
/* High water mark for SC input fifo (B) */
-# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6
-# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0
+# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6
+# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0
/* High water mark for RS colors' fifo */
-# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12
-# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000
+# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12
+# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000
/* High water mark for RS textures' fifo */
-# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18
-# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000
+# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18
+# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000
/* This table specifies the source location and format for up to 16 texture
* addresses (i[0]:i[15]) and four colors (c[0]:c[3])
@@ -841,10 +862,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_POINTSIZE_X_MASK 0xffff0000
# define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6)
-/* Blue fill color */
+/* Red fill color */
#define R500_GA_FILL_R 0x4220
-/* Blue fill color */
+/* Green fill color */
#define R500_GA_FILL_G 0x4224
/* Blue fill color */
@@ -1172,6 +1193,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* SU Depth Offset value */
#define R300_SU_DEPTH_OFFSET 0x42c4
+#define R300_SU_REG_DEST 0x42c8
+# define R300_RASTER_PIPE_SELECT_0 (1 << 0)
+# define R300_RASTER_PIPE_SELECT_1 (1 << 1)
+# define R300_RASTER_PIPE_SELECT_2 (1 << 2)
+# define R300_RASTER_PIPE_SELECT_3 (1 << 3)
+# define R300_RASTER_PIPE_SELECT_ALL 0xf
+
/* BEGIN: Rasterization / Interpolators - many guesses */
@@ -1265,7 +1293,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R500_RS_INST_TEX_ID(x) ((x) << 0)
#define R500_RS_INST_TEX_CN_WRITE (1 << 4)
#define R500_RS_INST_TEX_ADDR_SHIFT 5
-# define R500_RS_INST_TEX_ADDR(x) ((x) << 0)
+# define R500_RS_INST_TEX_ADDR(x) ((x) << 5)
#define R500_RS_INST_COL_ID_SHIFT 12
# define R500_RS_INST_COL_ID(x) ((x) << 12)
#define R500_RS_INST_COL_CN_NO_WRITE (0 << 16)
@@ -1435,6 +1463,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TX_MIN_FILTER_MIP_NEAREST (1 << 13)
# define R300_TX_MIN_FILTER_MIP_LINEAR (2 << 13)
# define R300_TX_MIN_FILTER_MIP_MASK (3 << 13)
+# define R300_TX_MAX_MIP_LEVEL_SHIFT 17
+# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 17)
# define R300_TX_MAX_ANISO_1_TO_1 (0 << 21)
# define R300_TX_MAX_ANISO_2_TO_1 (1 << 21)
# define R300_TX_MAX_ANISO_4_TO_1 (2 << 21)
@@ -1443,6 +1473,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TX_MAX_ANISO_MASK (7 << 21)
# define R300_TX_WRAP_S(x) ((x) << 0)
# define R300_TX_WRAP_T(x) ((x) << 3)
+# define R300_TX_MAX_MIP_LEVEL(x) ((x) << 17)
#define R300_TX_FILTER1_0 0x4440
# define R300_CHROMA_KEY_MODE_DISABLE 0
@@ -1472,8 +1503,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TX_HEIGHTMASK_MASK (2047 << 11)
# define R300_TX_DEPTHMASK_SHIFT 22
# define R300_TX_DEPTHMASK_MASK (0xf << 22)
-# define R300_TX_MAX_MIP_LEVEL_SHIFT 26
-# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 26)
# define R300_TX_SIZE_PROJECTED (1 << 30)
# define R300_TX_PITCH_EN (1 << 31)
# define R300_TX_WIDTH(x) ((x) << 0)
@@ -1856,6 +1885,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_RGB_ADDR0(x) ((x) << 0)
# define R300_RGB_ADDR1(x) ((x) << 6)
# define R300_RGB_ADDR2(x) ((x) << 12)
+# define R300_RGB_TARGET(x) ((x) << 29)
#define R300_US_ALU_ALPHA_ADDR_0 0x47C0
# define R300_ALU_SRC0A_SHIFT 0
@@ -1873,9 +1903,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_ALU_DSTA_REG (1 << 23)
# define R300_ALU_DSTA_OUTPUT (1 << 24)
# define R300_ALU_DSTA_DEPTH (1 << 27)
-# define R300_ALPHA_ADDR0(x) ((x) << 0)
-# define R300_ALPHA_ADDR1(x) ((x) << 6)
-# define R300_ALPHA_ADDR2(x) ((x) << 12)
+# define R300_ALPHA_ADDR0(x) ((x) << 0)
+# define R300_ALPHA_ADDR1(x) ((x) << 6)
+# define R300_ALPHA_ADDR2(x) ((x) << 12)
+# define R300_ALPHA_TARGET(x) ((x) << 25)
#define R300_US_ALU_RGB_INST_0 0x48C0
# define R300_ALU_ARGC_SRC0C_XYZ 0
@@ -2095,6 +2126,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R500_FG_ALPHA_VALUE 0x4be0
# define R500_FG_ALPHA_VALUE_MASK 0x0000ffff
+#define RV530_FG_ZBREG_DEST 0x4be8
+# define RV530_FG_ZBREG_DEST_PIPE_SELECT_0 (1 << 0)
+# define RV530_FG_ZBREG_DEST_PIPE_SELECT_1 (1 << 1)
+# define RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL (3 << 0)
/* gap */
/* Fragment program parameters in 7.16 floating point */
@@ -2110,6 +2145,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* Unpipelined. */
#define R300_RB3D_CCTL 0x4e00
+# define R300_RB3D_CCTL_NUM_MULTIWRITES(x) (MAX2(((x)-1), 0) << 5)
# define R300_RB3D_CCTL_NUM_MULTIWRITES_1_BUFFER (0 << 5)
# define R300_RB3D_CCTL_NUM_MULTIWRITES_2_BUFFERS (1 << 5)
# define R300_RB3D_CCTL_NUM_MULTIWRITES_3_BUFFERS (2 << 5)
@@ -2150,6 +2186,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1 (4 << 3)
# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1 (5 << 3)
# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1 (6 << 3)
+# define R500_SRC_ALPHA_0_NO_READ (1 << 30)
+# define R500_SRC_ALPHA_1_NO_READ (1 << 31)
/* the following are shared between CBLEND and ABLEND */
# define R300_FCN_MASK (3 << 12)
@@ -2384,6 +2422,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_Z_WRITE_ENABLE (1 << 2)
# define R300_Z_SIGNED_COMPARE (1 << 3)
# define R300_STENCIL_FRONT_BACK (1 << 4)
+# define R500_STENCIL_ZSIGNED_MAGNITUDE (1 << 5)
+# define R500_STENCIL_REFMASK_FRONT_BACK (1 << 6)
#define R300_ZB_ZSTENCILCNTL 0x4f04
/* functions */
@@ -2600,7 +2640,7 @@ enum {
VE_COND_MUX_GTE = 25,
VE_SET_GREATER_THAN = 26,
VE_SET_EQUAL = 27,
- VE_SET_NOT_EQUAL = 28,
+ VE_SET_NOT_EQUAL = 28
};
enum {
@@ -2634,20 +2674,20 @@ enum {
ME_PRED_SET_CLR = 25,
ME_PRED_SET_INV = 26,
ME_PRED_SET_POP = 27,
- ME_PRED_SET_RESTORE = 28,
+ ME_PRED_SET_RESTORE = 28
};
enum {
/* R3XX */
PVS_MACRO_OP_2CLK_MADD = 0,
- PVS_MACRO_OP_2CLK_M2X_ADD = 1,
+ PVS_MACRO_OP_2CLK_M2X_ADD = 1
};
enum {
PVS_SRC_REG_TEMPORARY = 0, /* Intermediate Storage */
PVS_SRC_REG_INPUT = 1, /* Input Vertex Storage */
PVS_SRC_REG_CONSTANT = 2, /* Constant State Storage */
- PVS_SRC_REG_ALT_TEMPORARY = 3, /* Alternate Intermediate Storage */
+ PVS_SRC_REG_ALT_TEMPORARY = 3 /* Alternate Intermediate Storage */
};
enum {
@@ -2656,7 +2696,7 @@ enum {
PVS_DST_REG_OUT = 2, /* Output Memory. Used for all outputs */
PVS_DST_REG_OUT_REPL_X = 3, /* Output Memory & Replicate X to all channels */
PVS_DST_REG_ALT_TEMPORARY = 4, /* Alternate Intermediate Storage */
- PVS_DST_REG_INPUT = 5, /* Output Memory & Replicate X to all channels */
+ PVS_DST_REG_INPUT = 5 /* Output Memory & Replicate X to all channels */
};
enum {
@@ -2665,7 +2705,7 @@ enum {
PVS_SRC_SELECT_Z = 2, /* Select Z Component */
PVS_SRC_SELECT_W = 3, /* Select W Component */
PVS_SRC_SELECT_FORCE_0 = 4, /* Force Component to 0.0 */
- PVS_SRC_SELECT_FORCE_1 = 5, /* Force Component to 1.0 */
+ PVS_SRC_SELECT_FORCE_1 = 5 /* Force Component to 1.0 */
};
/* PVS Opcode & Destination Operand Description */
@@ -2704,7 +2744,7 @@ enum {
PVS_DST_ADDR_SEL_MASK = 0x3,
PVS_DST_ADDR_SEL_SHIFT = 29,
PVS_DST_ADDR_MODE_0_MASK = 0x1,
- PVS_DST_ADDR_MODE_0_SHIFT = 31,
+ PVS_DST_ADDR_MODE_0_SHIFT = 31
};
/* PVS Source Operand Description */
@@ -2739,7 +2779,7 @@ enum {
PVS_SRC_ADDR_SEL_MASK = 0x3,
PVS_SRC_ADDR_SEL_SHIFT = 29,
PVS_SRC_ADDR_MODE_1_MASK = 0x0,
- PVS_SRC_ADDR_MODE_1_SHIFT = 32,
+ PVS_SRC_ADDR_MODE_1_SHIFT = 32
};
/*\}*/
@@ -3256,6 +3296,11 @@ enum {
*/
#define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00
+# define R300_VBPNTR_SIZE0(x) ((x) >> 2)
+# define R300_VBPNTR_STRIDE0(x) (((x) >> 2) << 8)
+# define R300_VBPNTR_SIZE1(x) (((x) >> 2) << 16)
+# define R300_VBPNTR_STRIDE1(x) (((x) >> 2) << 24)
+
#define R300_PACKET3_INDX_BUFFER 0x00003300
# define R300_INDX_BUFFER_DST_SHIFT 0
# define R300_INDX_BUFFER_SKIP_SHIFT 16
@@ -3313,10 +3358,6 @@ enum {
#define R200_3D_DRAW_IMMD_2 0xC0003500
-/* XXX Oh look, stuff not brought over from docs yet */
-
-#define R300_SU_REG_DEST 0x42C8
-
#endif /* _R300_REG_H */
/* *INDENT-ON* */
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index ca44e0f6615..a4ac9ad9a7b 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -20,19 +20,389 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
-#include "draw/draw_pipe.h"
+/* r300_render: Vertex and index buffer primitive emission. Contains both
+ * HW TCL fastpath rendering, and SW TCL Draw-assisted rendering. */
+
+#include "draw/draw_context.h"
#include "draw/draw_vbuf.h"
+
+#include "pipe/p_inlines.h"
+
#include "util/u_memory.h"
+#include "util/u_prim.h"
#include "r300_cs.h"
#include "r300_context.h"
#include "r300_emit.h"
#include "r300_reg.h"
+#include "r300_render.h"
#include "r300_state_derived.h"
/* r300_render: Vertex and index buffer primitive emission. */
#define R300_MAX_VBO_SIZE (1024 * 1024)
+uint32_t r300_translate_primitive(unsigned prim)
+{
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ return R300_VAP_VF_CNTL__PRIM_POINTS;
+ case PIPE_PRIM_LINES:
+ return R300_VAP_VF_CNTL__PRIM_LINES;
+ case PIPE_PRIM_LINE_LOOP:
+ return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
+ case PIPE_PRIM_LINE_STRIP:
+ return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
+ case PIPE_PRIM_TRIANGLES:
+ return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
+ case PIPE_PRIM_QUADS:
+ return R300_VAP_VF_CNTL__PRIM_QUADS;
+ case PIPE_PRIM_QUAD_STRIP:
+ return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
+ case PIPE_PRIM_POLYGON:
+ return R300_VAP_VF_CNTL__PRIM_POLYGON;
+ default:
+ return 0;
+ }
+}
+
+static boolean r300_nothing_to_draw(struct r300_context *r300)
+{
+ return r300->rs_state->rs.scissor &&
+ r300->scissor_state->scissor.empty_area;
+}
+
+static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300,
+ unsigned mode)
+{
+ uint32_t color_control = r300->rs_state->color_control;
+
+ /* By default (see r300_state.c:r300_create_rs_state) color_control is
+ * initialized to provoking the first vertex.
+ *
+ * Triangle fans must be reduced to the second vertex, not the first, in
+ * Gallium flatshade-first mode, as per the GL spec.
+ * (http://www.opengl.org/registry/specs/ARB/provoking_vertex.txt)
+ *
+ * Quads never provoke correctly in flatshade-first mode. The first
+ * vertex is never considered as provoking, so only the second, third,
+ * and fourth vertices can be selected, and both "third" and "last" modes
+ * select the fourth vertex. This is probably due to D3D lacking quads.
+ *
+ * Similarly, polygons reduce to the first, not the last, vertex, when in
+ * "last" mode, and all other modes start from the second vertex.
+ *
+ * ~ C.
+ */
+
+ if (r300->rs_state->rs.flatshade_first) {
+ switch (mode) {
+ case PIPE_PRIM_TRIANGLE_FAN:
+ color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND;
+ break;
+ case PIPE_PRIM_QUADS:
+ case PIPE_PRIM_QUAD_STRIP:
+ case PIPE_PRIM_POLYGON:
+ color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
+ break;
+ default:
+ color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST;
+ break;
+ }
+ } else {
+ color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
+ }
+
+ return color_control;
+}
+
+static void r300_emit_draw_arrays(struct r300_context *r300,
+ unsigned mode,
+ unsigned count)
+{
+ CS_LOCALS(r300);
+
+ BEGIN_CS(8);
+ OUT_CS_REG(R300_GA_COLOR_CONTROL,
+ r300_provoking_vertex_fixes(r300, mode));
+ OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0);
+ OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
+ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
+ r300_translate_primitive(mode));
+ END_CS;
+}
+
+static void r300_emit_draw_elements(struct r300_context *r300,
+ struct pipe_buffer* indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
+{
+ uint32_t count_dwords;
+ uint32_t offset_dwords = indexSize * start / sizeof(uint32_t);
+ CS_LOCALS(r300);
+
+ /* XXX most of these are stupid */
+ assert(indexSize == 4 || indexSize == 2);
+ assert((start * indexSize) % 4 == 0);
+ assert(offset_dwords == 0);
+
+ BEGIN_CS(14);
+ OUT_CS_REG(R300_GA_COLOR_CONTROL,
+ r300_provoking_vertex_fixes(r300, mode));
+ OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, minIndex);
+ OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex);
+ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
+ if (indexSize == 4) {
+ count_dwords = count + start;
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
+ R300_VAP_VF_CNTL__INDEX_SIZE_32bit |
+ r300_translate_primitive(mode));
+ } else {
+ count_dwords = (count + start + 1) / 2;
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
+ r300_translate_primitive(mode));
+ }
+
+ /* INDX_BUFFER is a truly special packet3.
+ * Unlike most other packet3, where the offset is after the count,
+ * the order is reversed, so the relocation ends up carrying the
+ * size of the indexbuf instead of the offset.
+ *
+ * XXX Fix offset
+ */
+ OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2);
+ OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) |
+ (0 << R300_INDX_BUFFER_SKIP_SHIFT));
+ OUT_CS(offset_dwords);
+ OUT_CS_RELOC(indexBuffer, count_dwords,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+
+ END_CS;
+}
+
+
+static boolean r300_setup_vertex_buffers(struct r300_context *r300)
+{
+ struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
+ struct pipe_vertex_element *velem = r300->vertex_element;
+
+validate:
+ for (int i = 0; i < r300->vertex_element_count; i++) {
+ if (!r300->winsys->add_buffer(r300->winsys,
+ vbuf[velem[i].vertex_buffer_index].buffer,
+ RADEON_GEM_DOMAIN_GTT, 0)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ goto validate;
+ }
+ }
+
+ if (!r300->winsys->validate(r300->winsys)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ return r300->winsys->validate(r300->winsys);
+ }
+
+ return TRUE;
+}
+
+/* This is the fast-path drawing & emission for HW TCL. */
+void r300_draw_range_elements(struct pipe_context* pipe,
+ struct pipe_buffer* indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
+{
+ struct r300_context* r300 = r300_context(pipe);
+
+ if (!u_trim_pipe_prim(mode, &count)) {
+ return;
+ }
+
+ if (count > 65535) {
+ /* XXX: use aux/indices functions to split this into smaller
+ * primitives.
+ */
+ return;
+ }
+
+ if (r300_nothing_to_draw(r300)) {
+ return;
+ }
+
+ r300_update_derived_state(r300);
+
+ if (!r300_setup_vertex_buffers(r300)) {
+ return;
+ }
+
+ if (!r300->winsys->add_buffer(r300->winsys, indexBuffer,
+ RADEON_GEM_DOMAIN_GTT, 0)) {
+ return;
+ }
+
+ if (!r300->winsys->validate(r300->winsys)) {
+ return;
+ }
+
+ r300_emit_dirty_state(r300);
+
+ r300_emit_aos(r300, 0);
+
+ r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex,
+ mode, start, count);
+}
+
+/* Simple helpers for context setup. Should probably be moved to util. */
+void r300_draw_elements(struct pipe_context* pipe,
+ struct pipe_buffer* indexBuffer,
+ unsigned indexSize, unsigned mode,
+ unsigned start, unsigned count)
+{
+ pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0,
+ mode, start, count);
+}
+
+void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
+ unsigned start, unsigned count)
+{
+ struct r300_context* r300 = r300_context(pipe);
+
+ if (!u_trim_pipe_prim(mode, &count)) {
+ return;
+ }
+
+ if (count > 65535) {
+ /* XXX: driver needs to handle this -- use the functions in
+ * aux/indices to split this into several smaller primitives.
+ */
+ return;
+ }
+
+ if (r300_nothing_to_draw(r300)) {
+ return;
+ }
+
+ r300_update_derived_state(r300);
+
+ if (!r300_setup_vertex_buffers(r300)) {
+ return;
+ }
+
+ r300_emit_dirty_state(r300);
+
+ r300_emit_aos(r300, start);
+
+ r300_emit_draw_arrays(r300, mode, count);
+}
+
+/****************************************************************************
+ * The rest of this file is for SW TCL rendering only. Please be polite and *
+ * keep these functions separated so that they are easier to locate. ~C. *
+ ***************************************************************************/
+
+/* SW TCL arrays, using Draw. */
+void r300_swtcl_draw_arrays(struct pipe_context* pipe,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ int i;
+
+ if (!u_trim_pipe_prim(mode, &count)) {
+ return;
+ }
+
+ if (r300_nothing_to_draw(r300)) {
+ return;
+ }
+
+ for (i = 0; i < r300->vertex_buffer_count; i++) {
+ void* buf = pipe_buffer_map(pipe->screen,
+ r300->vertex_buffer[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_vertex_buffer(r300->draw, i, buf);
+ }
+
+ draw_set_mapped_element_buffer(r300->draw, 0, NULL);
+
+ draw_set_mapped_constant_buffer(r300->draw,
+ PIPE_SHADER_VERTEX,
+ r300->shader_constants[PIPE_SHADER_VERTEX].constants,
+ r300->shader_constants[PIPE_SHADER_VERTEX].count *
+ (sizeof(float) * 4));
+
+ draw_arrays(r300->draw, mode, start, count);
+
+ for (i = 0; i < r300->vertex_buffer_count; i++) {
+ pipe_buffer_unmap(pipe->screen, r300->vertex_buffer[i].buffer);
+ draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
+ }
+}
+
+/* SW TCL elements, using Draw. */
+void r300_swtcl_draw_range_elements(struct pipe_context* pipe,
+ struct pipe_buffer* indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ int i;
+ void* indices;
+
+ if (!u_trim_pipe_prim(mode, &count)) {
+ return;
+ }
+
+ if (r300_nothing_to_draw(r300)) {
+ return;
+ }
+
+ for (i = 0; i < r300->vertex_buffer_count; i++) {
+ void* buf = pipe_buffer_map(pipe->screen,
+ r300->vertex_buffer[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_vertex_buffer(r300->draw, i, buf);
+ }
+
+ indices = pipe_buffer_map(pipe->screen, indexBuffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer_range(r300->draw, indexSize,
+ minIndex, maxIndex, indices);
+
+ draw_set_mapped_constant_buffer(r300->draw,
+ PIPE_SHADER_VERTEX,
+ r300->shader_constants[PIPE_SHADER_VERTEX].constants,
+ r300->shader_constants[PIPE_SHADER_VERTEX].count *
+ (sizeof(float) * 4));
+
+ draw_arrays(r300->draw, mode, start, count);
+
+ for (i = 0; i < r300->vertex_buffer_count; i++) {
+ pipe_buffer_unmap(pipe->screen, r300->vertex_buffer[i].buffer);
+ draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
+ }
+
+ pipe_buffer_unmap(pipe->screen, indexBuffer);
+ draw_set_mapped_element_buffer_range(r300->draw, 0, start,
+ start + count - 1, NULL);
+}
+
+/* Object for rendering using Draw. */
struct r300_render {
/* Parent class */
struct vbuf_render base;
@@ -67,7 +437,7 @@ r300_render_get_vertex_info(struct vbuf_render* render)
r300_update_derived_state(r300);
- return &r300->vertex_info.vinfo;
+ return &r300->vertex_info->vinfo;
}
static boolean r300_render_allocate_vertices(struct vbuf_render* render,
@@ -105,7 +475,7 @@ static void* r300_render_map_vertices(struct vbuf_render* render)
r300render->vbo_ptr = pipe_buffer_map(screen, r300render->vbo,
PIPE_BUFFER_USAGE_CPU_WRITE);
- return (r300render->vbo_ptr + r300render->vbo_offset);
+ return ((uint8_t*)r300render->vbo_ptr + r300render->vbo_offset);
}
static void r300_render_unmap_vertices(struct vbuf_render* render,
@@ -127,7 +497,6 @@ static void r300_render_unmap_vertices(struct vbuf_render* render,
static void r300_render_release_vertices(struct vbuf_render* render)
{
struct r300_render* r300render = r300_render(render);
- struct r300_context* r300 = r300render->r300;
r300render->vbo_offset += r300render->vbo_max_used;
r300render->vbo_max_used = 0;
@@ -137,56 +506,13 @@ static boolean r300_render_set_primitive(struct vbuf_render* render,
unsigned prim)
{
struct r300_render* r300render = r300_render(render);
- r300render->prim = prim;
- switch (prim) {
- case PIPE_PRIM_POINTS:
- r300render->hwprim = R300_VAP_VF_CNTL__PRIM_POINTS;
- break;
- case PIPE_PRIM_LINES:
- r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINES;
- break;
- case PIPE_PRIM_LINE_LOOP:
- r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
- break;
- case PIPE_PRIM_LINE_STRIP:
- r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
- break;
- case PIPE_PRIM_TRIANGLES:
- r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLES;
- break;
- case PIPE_PRIM_TRIANGLE_STRIP:
- r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
- break;
- case PIPE_PRIM_TRIANGLE_FAN:
- r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
- break;
- case PIPE_PRIM_QUADS:
- r300render->hwprim = R300_VAP_VF_CNTL__PRIM_QUADS;
- break;
- case PIPE_PRIM_QUAD_STRIP:
- r300render->hwprim = R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
- break;
- case PIPE_PRIM_POLYGON:
- r300render->hwprim = R300_VAP_VF_CNTL__PRIM_POLYGON;
- break;
- default:
- return FALSE;
- break;
- }
+ r300render->prim = prim;
+ r300render->hwprim = r300_translate_primitive(prim);
return TRUE;
}
-static void r300_prepare_render(struct r300_render* render, unsigned count)
-{
- struct r300_context* r300 = render->r300;
-
- CS_LOCALS(r300);
-
- r300_emit_dirty_state(r300);
-}
-
static void r300_render_draw_arrays(struct vbuf_render* render,
unsigned start,
unsigned count)
@@ -196,7 +522,7 @@ static void r300_render_draw_arrays(struct vbuf_render* render,
CS_LOCALS(r300);
- r300_prepare_render(r300render, count);
+ r300_emit_dirty_state(r300);
DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count);
@@ -213,15 +539,11 @@ static void r300_render_draw(struct vbuf_render* render,
{
struct r300_render* r300render = r300_render(render);
struct r300_context* r300 = r300render->r300;
- struct pipe_screen* screen = r300->context.screen;
- struct pipe_buffer* index_buffer;
- void* index_map;
int i;
- uint32_t index;
CS_LOCALS(r300);
- r300_prepare_render(r300render, count);
+ r300_emit_dirty_state(r300);
BEGIN_CS(2 + (count+1)/2);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2);
diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h
new file mode 100644
index 00000000000..27b5e6a9630
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_render.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_RENDER_H
+#define R300_RENDER_H
+
+uint32_t r300_translate_primitive(unsigned prim);
+
+void r300_draw_range_elements(struct pipe_context* pipe,
+ struct pipe_buffer* indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count);
+
+void r300_draw_elements(struct pipe_context* pipe,
+ struct pipe_buffer* indexBuffer,
+ unsigned indexSize, unsigned mode,
+ unsigned start, unsigned count);
+
+void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
+ unsigned start, unsigned count);
+
+void r300_swtcl_draw_arrays(struct pipe_context* pipe,
+ unsigned mode,
+ unsigned start,
+ unsigned count);
+
+void r300_swtcl_draw_range_elements(struct pipe_context* pipe,
+ struct pipe_buffer* indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count);
+
+#endif /* R300_RENDER_H */
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 81d01b1320c..287664b1d20 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -20,7 +20,16 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#include "pipe/p_inlines.h"
+#include "util/u_format.h"
+#include "util/u_memory.h"
+#include "util/u_simple_screen.h"
+
+#include "r300_context.h"
#include "r300_screen.h"
+#include "r300_texture.h"
+
+#include "radeon_winsys.h"
/* Return the identifier behind whom the brave coders responsible for this
* amalgamation of code, sweat, and duct tape, routinely obscure their names.
@@ -73,14 +82,14 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
struct r300_screen* r300screen = r300_screen(pscreen);
switch (param) {
- /* XXX cases marked "IN THEORY" are possible on the hardware,
- * but haven't been implemented yet. */
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
/* XXX I'm told this goes up to 16 */
return 8;
case PIPE_CAP_NPOT_TEXTURES:
- /* IN THEORY */
- return 0;
+ /* XXX enable now to get GL2.1 API,
+ * figure out later how to emulate this */
+ return 1;
case PIPE_CAP_TWO_SIDED_STENCIL:
if (r300screen->caps->is_r500) {
return 1;
@@ -88,16 +97,26 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
return 0;
}
case PIPE_CAP_GLSL:
- if (r300screen->caps->is_r500) {
- return 1;
- } else {
- return 0;
- }
+ /* I'll be frank. This is a lie.
+ *
+ * We don't truly support GLSL on any of this driver's chipsets.
+ * To be fair, no chipset supports the full GLSL specification
+ * to the best of our knowledge, but some of the less esoteric
+ * features are still missing here.
+ *
+ * Rather than cripple ourselves intentionally, I'm going to set
+ * this flag, and as Gallium's interface continues to change, I
+ * hope that this single monolithic GLSL enable can slowly get
+ * split down into many different pieces and the state tracker
+ * will handle fallbacks transparently, like it should.
+ *
+ * ~ C.
+ */
+ return 1;
case PIPE_CAP_ANISOTROPIC_FILTER:
return 1;
case PIPE_CAP_POINT_SPRITE:
- /* IN THEORY */
- return 0;
+ return 1;
case PIPE_CAP_MAX_RENDER_TARGETS:
return 4;
case PIPE_CAP_OCCLUSION_QUERY:
@@ -105,32 +124,13 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
case PIPE_CAP_TEXTURE_SHADOW_MAP:
return 1;
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
- if (r300screen->caps->is_r500) {
- /* 13 == 4096x4096 */
- return 13;
- } else {
- /* 12 == 2048x2048 */
- return 12;
- }
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
- /* So, technically, the limit is the same as above, but some math
- * shows why this is silly. Assuming RGBA, 4cpp, we can see that
- * 4096*4096*4096 = 64.0 GiB exactly, so it's not exactly
- * practical. However, if at some point a game really wants this,
- * then we can remove or raise this limit. */
- if (r300screen->caps->is_r500) {
- /* 9 == 256x256x256 */
- return 9;
- } else {
- /* 8 == 128*128*128 */
- return 8;
- }
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
if (r300screen->caps->is_r500) {
- /* 13 == 4096x4096 */
+ /* 13 == 4096 */
return 13;
} else {
- /* 12 == 2048x2048 */
+ /* 12 == 2048 */
return 12;
}
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
@@ -138,13 +138,17 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
return 1;
case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
- /* XXX guessing (what a terrible guess) */
- return 2;
+ return 0;
case PIPE_CAP_TGSI_CONT_SUPPORTED:
- /* XXX */
return 0;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
return 1;
+ case PIPE_CAP_SM3:
+ if (r300screen->caps->is_r500) {
+ return 1;
+ } else {
+ return 0;
+ }
default:
debug_printf("r300: Implementation error: Bad param %d\n",
param);
@@ -179,19 +183,22 @@ static float r300_get_paramf(struct pipe_screen* pscreen, int param)
}
}
-static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage,
- boolean is_r500)
+static boolean check_tex_format(enum pipe_format format, uint32_t usage,
+ boolean is_r500)
{
+ uint32_t retval = 0;
+
switch (format) {
/* Supported formats. */
/* Colorbuffer */
case PIPE_FORMAT_A4R4G4B4_UNORM:
case PIPE_FORMAT_R5G6B5_UNORM:
case PIPE_FORMAT_A1R5G5B5_UNORM:
- return usage &
+ retval = usage &
(PIPE_TEXTURE_USAGE_RENDER_TARGET |
PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
PIPE_TEXTURE_USAGE_PRIMARY);
+ break;
/* Texture */
case PIPE_FORMAT_A8R8G8B8_SRGB:
@@ -201,7 +208,10 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage,
case PIPE_FORMAT_DXT3_RGBA:
case PIPE_FORMAT_DXT5_RGBA:
case PIPE_FORMAT_YCBCR:
- return usage & PIPE_TEXTURE_USAGE_SAMPLER;
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_A8L8_UNORM:
+ retval = usage & PIPE_TEXTURE_USAGE_SAMPLER;
+ break;
/* Colorbuffer or texture */
case PIPE_FORMAT_A8R8G8B8_UNORM:
@@ -209,25 +219,32 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage,
case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8X8_UNORM:
case PIPE_FORMAT_I8_UNORM:
- return usage &
+ retval = usage &
(PIPE_TEXTURE_USAGE_RENDER_TARGET |
PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
PIPE_TEXTURE_USAGE_PRIMARY |
PIPE_TEXTURE_USAGE_SAMPLER);
+ break;
- /* Z buffer */
+ /* Z buffer or texture */
case PIPE_FORMAT_Z16_UNORM:
- return usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL;
+ retval = usage &
+ (PIPE_TEXTURE_USAGE_DEPTH_STENCIL |
+ PIPE_TEXTURE_USAGE_SAMPLER);
+ break;
+ /* 24bit Z buffer can only be used as a texture on R500. */
+ case PIPE_FORMAT_Z24X8_UNORM:
/* Z buffer with stencil or texture */
case PIPE_FORMAT_Z24S8_UNORM:
- return usage &
+ retval = usage &
(PIPE_TEXTURE_USAGE_DEPTH_STENCIL |
- PIPE_TEXTURE_USAGE_SAMPLER);
+ (is_r500 ? PIPE_TEXTURE_USAGE_SAMPLER : 0));
+ break;
/* Definitely unsupported formats. */
/* Non-usable Z buffer/stencil formats. */
- case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z32_UNORM:
case PIPE_FORMAT_S8Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
debug_printf("r300: Note: Got unsupported format: %s in %s\n",
@@ -237,7 +254,6 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage,
/* XXX These don't even exist
case PIPE_FORMAT_A32R32G32B32:
case PIPE_FORMAT_A16R16G16B16: */
- /* XXX Insert YUV422 packed VYUY and YVYU here */
/* XXX What the deuce is UV88? (r3xx accel page 14)
debug_printf("r300: Warning: Got unimplemented format: %s in %s\n",
pf_name(format), __FUNCTION__);
@@ -261,10 +277,15 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage,
break;
}
- return FALSE;
+ /* If usage was a mask that contained multiple bits, and not all of them
+ * are supported, this will catch that and return FALSE.
+ * e.g. usage = 2 | 4; retval = 4; (retval >= usage) == FALSE
+ *
+ * This also returns FALSE for any unknown formats.
+ */
+ return (retval >= usage);
}
-/* XXX moar targets */
static boolean r300_is_format_supported(struct pipe_screen* pscreen,
enum pipe_format format,
enum pipe_texture_target target,
@@ -272,15 +293,13 @@ static boolean r300_is_format_supported(struct pipe_screen* pscreen,
unsigned geom_flags)
{
switch (target) {
+ case PIPE_TEXTURE_1D: /* handle 1D textures as 2D ones */
case PIPE_TEXTURE_2D:
- return check_tex_2d_format(format, tex_usage,
- r300_screen(pscreen)->caps->is_r500);
- case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_3D:
case PIPE_TEXTURE_CUBE:
- debug_printf("r300: Implementation error: Unsupported format "
- "target: %d\n", target);
- break;
+ return check_tex_format(format, tex_usage,
+ r300_screen(pscreen)->caps->is_r500);
+
default:
debug_printf("r300: Fatal: This is not a format target: %d\n",
target);
@@ -300,36 +319,25 @@ r300_get_tex_transfer(struct pipe_screen *screen,
{
struct r300_texture *tex = (struct r300_texture *)texture;
struct r300_transfer *trans;
- unsigned offset; /* in bytes */
+ unsigned offset;
- /* XXX Add support for these things */
- if (texture->target == PIPE_TEXTURE_CUBE) {
- debug_printf("PIPE_TEXTURE_CUBE is not yet supported.\n");
- /* offset = tex->image_offset[level][face]; */
- }
- else if (texture->target == PIPE_TEXTURE_3D) {
- debug_printf("PIPE_TEXTURE_3D is not yet supported.\n");
- /* offset = tex->image_offset[level][zslice]; */
- }
- else {
- offset = tex->offset[level];
- assert(face == 0);
- assert(zslice == 0);
- }
+ offset = r300_texture_get_offset(tex, level, zslice, face); /* in bytes */
trans = CALLOC_STRUCT(r300_transfer);
if (trans) {
pipe_texture_reference(&trans->transfer.texture, texture);
- trans->transfer.format = texture->format;
trans->transfer.x = x;
trans->transfer.y = y;
trans->transfer.width = w;
trans->transfer.height = h;
- trans->transfer.block = texture->block;
- trans->transfer.nblocksx = texture->nblocksx[level];
- trans->transfer.nblocksy = texture->nblocksy[level];
trans->transfer.stride = r300_texture_get_stride(tex, level);
trans->transfer.usage = usage;
+
+ /* XXX not sure whether it's required to set these two,
+ the driver doesn't use them */
+ trans->transfer.zslice = zslice;
+ trans->transfer.face = face;
+
trans->offset = offset;
}
return &trans->transfer;
@@ -347,6 +355,7 @@ static void* r300_transfer_map(struct pipe_screen* screen,
{
struct r300_texture* tex = (struct r300_texture*)transfer->texture;
char* map;
+ enum pipe_format format = tex->tex.format;
map = pipe_buffer_map(screen, tex->buffer,
pipe_transfer_buffer_flags(transfer));
@@ -356,8 +365,8 @@ static void* r300_transfer_map(struct pipe_screen* screen,
}
return map + r300_transfer(transfer)->offset +
- transfer->y / transfer->block.height * transfer->stride +
- transfer->x / transfer->block.width * transfer->block.size;
+ transfer->y / util_format_get_blockheight(format) * transfer->stride +
+ transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
}
static void r300_transfer_unmap(struct pipe_screen* screen,
@@ -375,7 +384,7 @@ static void r300_destroy_screen(struct pipe_screen* pscreen)
FREE(r300screen);
}
-struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys)
+struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys)
{
struct r300_screen* r300screen = CALLOC_STRUCT(r300_screen);
struct r300_capabilities* caps = CALLOC_STRUCT(r300_capabilities);
@@ -383,13 +392,14 @@ struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys)
if (!r300screen || !caps)
return NULL;
- caps->pci_id = r300_winsys->pci_id;
- caps->num_frag_pipes = r300_winsys->gb_pipes;
+ caps->pci_id = radeon_winsys->pci_id;
+ caps->num_frag_pipes = radeon_winsys->gb_pipes;
+ caps->num_z_pipes = radeon_winsys->z_pipes;
r300_parse_chipset(caps);
r300screen->caps = caps;
- r300screen->screen.winsys = (struct pipe_winsys*)r300_winsys;
+ r300screen->screen.winsys = (struct pipe_winsys*)radeon_winsys;
r300screen->screen.destroy = r300_destroy_screen;
r300screen->screen.get_name = r300_get_name;
r300screen->screen.get_vendor = r300_get_vendor;
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index 2a0e41fbc3b..2217988addd 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -23,14 +23,11 @@
#ifndef R300_SCREEN_H
#define R300_SCREEN_H
-#include "pipe/p_inlines.h"
#include "pipe/p_screen.h"
-#include "util/u_memory.h"
-#include "util/u_simple_screen.h"
#include "r300_chipset.h"
-#include "r300_texture.h"
-#include "r300_winsys.h"
+
+struct radeon_winsys;
struct r300_screen {
/* Parent class */
@@ -61,6 +58,6 @@ r300_transfer(struct pipe_transfer* transfer)
}
/* Creates a new r300 screen. */
-struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys);
+struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys);
#endif /* R300_SCREEN_H */
diff --git a/src/gallium/drivers/r300/r300_shader_inlines.h b/src/gallium/drivers/r300/r300_shader_inlines.h
deleted file mode 100644
index a04f45b03e2..00000000000
--- a/src/gallium/drivers/r300/r300_shader_inlines.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
- * Joakim Sindholt <opensource@zhasha.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_SHADER_INLINES_H
-#define R300_SHADER_INLINES_H
-
-/* TGSI constants. TGSI is like XML: If it can't solve your problems, you're
- * not using enough of it. */
-static const struct tgsi_full_src_register r300_constant_zero = {
- .SrcRegister.Extended = TRUE,
- .SrcRegister.File = TGSI_FILE_NULL,
- .SrcRegisterExtSwz.ExtSwizzleX = TGSI_EXTSWIZZLE_ZERO,
- .SrcRegisterExtSwz.ExtSwizzleY = TGSI_EXTSWIZZLE_ZERO,
- .SrcRegisterExtSwz.ExtSwizzleZ = TGSI_EXTSWIZZLE_ZERO,
- .SrcRegisterExtSwz.ExtSwizzleW = TGSI_EXTSWIZZLE_ZERO,
-};
-
-static const struct tgsi_full_src_register r300_constant_one = {
- .SrcRegister.Extended = TRUE,
- .SrcRegister.File = TGSI_FILE_NULL,
- .SrcRegisterExtSwz.ExtSwizzleX = TGSI_EXTSWIZZLE_ONE,
- .SrcRegisterExtSwz.ExtSwizzleY = TGSI_EXTSWIZZLE_ONE,
- .SrcRegisterExtSwz.ExtSwizzleZ = TGSI_EXTSWIZZLE_ONE,
- .SrcRegisterExtSwz.ExtSwizzleW = TGSI_EXTSWIZZLE_ONE,
-};
-
-#endif /* R300_SHADER_INLINES_H */
diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h
new file mode 100644
index 00000000000..6796841b29b
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_shader_semantics.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_SHADER_SEMANTICS_H
+#define R300_SHADER_SEMANTICS_H
+
+#define ATTR_UNUSED (-1)
+#define ATTR_COLOR_COUNT 2
+#define ATTR_GENERIC_COUNT 16
+
+/* This structure contains information about what attributes are written by VS
+ * or read by FS. (but not both) It's much easier to work with than
+ * tgsi_shader_info.
+ *
+ * The variables contain indices to tgsi_shader_info semantics and those
+ * indices are nothing else than input/output register numbers. */
+struct r300_shader_semantics {
+ int pos;
+ int psize;
+ int color[ATTR_COLOR_COUNT];
+ int bcolor[ATTR_COLOR_COUNT];
+ int generic[ATTR_GENERIC_COUNT];
+ int fog;
+ int wpos;
+};
+
+static INLINE void r300_shader_semantics_reset(
+ struct r300_shader_semantics* info)
+{
+ int i;
+
+ info->pos = ATTR_UNUSED;
+ info->psize = ATTR_UNUSED;
+ info->fog = ATTR_UNUSED;
+ info->wpos = ATTR_UNUSED;
+
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ info->color[i] = ATTR_UNUSED;
+ info->bcolor[i] = ATTR_UNUSED;
+ }
+
+ for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+ info->generic[i] = ATTR_UNUSED;
+ }
+}
+
+#endif
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 88cb9af6fb7..a145a7f18a5 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -1,5 +1,6 @@
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -20,8 +21,10 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
-#include "util/u_debug.h"
+#include "draw/draw_context.h"
+
#include "util/u_math.h"
+#include "util/u_memory.h"
#include "util/u_pack_color.h"
#include "tgsi/tgsi_parse.h"
@@ -31,6 +34,7 @@
#include "r300_context.h"
#include "r300_reg.h"
+#include "r300_screen.h"
#include "r300_state_inlines.h"
#include "r300_fs.h"
#include "r300_vs.h"
@@ -38,6 +42,120 @@
/* r300_state: Functions used to intialize state context by translating
* Gallium state objects into semi-native r300 state objects. */
+static boolean blend_discard_if_src_alpha_0(unsigned srcRGB, unsigned srcA,
+ unsigned dstRGB, unsigned dstA)
+{
+ /* If the blend equation is ADD or REVERSE_SUBTRACT,
+ * SRC_ALPHA == 0, and the following state is set, the colorbuffer
+ * will not be changed.
+ * Notice that the dst factors are the src factors inverted. */
+ return (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (srcA == PIPE_BLENDFACTOR_SRC_COLOR ||
+ srcA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ srcA == PIPE_BLENDFACTOR_ZERO) &&
+ (dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_ONE) &&
+ (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_alpha_1(unsigned srcRGB, unsigned srcA,
+ unsigned dstRGB, unsigned dstA)
+{
+ /* If the blend equation is ADD or REVERSE_SUBTRACT,
+ * SRC_ALPHA == 1, and the following state is set, the colorbuffer
+ * will not be changed.
+ * Notice that the dst factors are the src factors inverted. */
+ return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ srcA == PIPE_BLENDFACTOR_ZERO) &&
+ (dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_ONE) &&
+ (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
+ dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_color_0(unsigned srcRGB, unsigned srcA,
+ unsigned dstRGB, unsigned dstA)
+{
+ /* If the blend equation is ADD or REVERSE_SUBTRACT,
+ * SRC_COLOR == (0,0,0), and the following state is set, the colorbuffer
+ * will not be changed.
+ * Notice that the dst factors are the src factors inverted. */
+ return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
+ srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (srcA == PIPE_BLENDFACTOR_ZERO) &&
+ (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ dstRGB == PIPE_BLENDFACTOR_ONE) &&
+ (dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_color_1(unsigned srcRGB, unsigned srcA,
+ unsigned dstRGB, unsigned dstA)
+{
+ /* If the blend equation is ADD or REVERSE_SUBTRACT,
+ * SRC_COLOR == (1,1,1), and the following state is set, the colorbuffer
+ * will not be changed.
+ * Notice that the dst factors are the src factors inverted. */
+ return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (srcA == PIPE_BLENDFACTOR_ZERO) &&
+ (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
+ dstRGB == PIPE_BLENDFACTOR_ONE) &&
+ (dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_alpha_color_0(unsigned srcRGB, unsigned srcA,
+ unsigned dstRGB, unsigned dstA)
+{
+ /* If the blend equation is ADD or REVERSE_SUBTRACT,
+ * SRC_ALPHA_COLOR == (0,0,0,0), and the following state is set,
+ * the colorbuffer will not be changed.
+ * Notice that the dst factors are the src factors inverted. */
+ return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
+ srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (srcA == PIPE_BLENDFACTOR_SRC_COLOR ||
+ srcA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ srcA == PIPE_BLENDFACTOR_ZERO) &&
+ (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_ONE) &&
+ (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_alpha_color_1(unsigned srcRGB, unsigned srcA,
+ unsigned dstRGB, unsigned dstA)
+{
+ /* If the blend equation is ADD or REVERSE_SUBTRACT,
+ * SRC_ALPHA_COLOR == (1,1,1,1), and the following state is set,
+ * the colorbuffer will not be changed.
+ * Notice that the dst factors are the src factors inverted. */
+ return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ srcA == PIPE_BLENDFACTOR_ZERO) &&
+ (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
+ dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_ONE) &&
+ (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
+ dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstA == PIPE_BLENDFACTOR_ONE);
+}
+
/* Create a new blend state based on the CSO blend state.
*
* This encompasses alpha blending, logic/raster ops, and blend dithering. */
@@ -46,23 +164,123 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
{
struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state);
- if (state->blend_enable) {
- /* XXX for now, always do separate alpha...
- * is it faster to do it with one reg? */
+ if (state->blend_enable)
+ {
+ unsigned eqRGB = state->rgb_func;
+ unsigned srcRGB = state->rgb_src_factor;
+ unsigned dstRGB = state->rgb_dst_factor;
+
+ unsigned eqA = state->alpha_func;
+ unsigned srcA = state->alpha_src_factor;
+ unsigned dstA = state->alpha_dst_factor;
+
+ /* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha,
+ * this is just the crappy D3D naming */
blend->blend_control = R300_ALPHA_BLEND_ENABLE |
- R300_SEPARATE_ALPHA_ENABLE |
- R300_READ_ENABLE |
- r300_translate_blend_function(state->rgb_func) |
- (r300_translate_blend_factor(state->rgb_src_factor) <<
- R300_SRC_BLEND_SHIFT) |
- (r300_translate_blend_factor(state->rgb_dst_factor) <<
- R300_DST_BLEND_SHIFT);
- blend->alpha_blend_control =
- r300_translate_blend_function(state->alpha_func) |
- (r300_translate_blend_factor(state->alpha_src_factor) <<
- R300_SRC_BLEND_SHIFT) |
- (r300_translate_blend_factor(state->alpha_dst_factor) <<
- R300_DST_BLEND_SHIFT);
+ r300_translate_blend_function(eqRGB) |
+ ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) |
+ ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT);
+
+ /* Optimization: some operations do not require the destination color.
+ *
+ * When SRC_ALPHA_SATURATE is used, colorbuffer reads must be enabled,
+ * otherwise blending gives incorrect results. It seems to be
+ * a hardware bug. */
+ if (eqRGB == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MIN ||
+ eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MAX ||
+ dstRGB != PIPE_BLENDFACTOR_ZERO ||
+ dstA != PIPE_BLENDFACTOR_ZERO ||
+ srcRGB == PIPE_BLENDFACTOR_DST_COLOR ||
+ srcRGB == PIPE_BLENDFACTOR_DST_ALPHA ||
+ srcRGB == PIPE_BLENDFACTOR_INV_DST_COLOR ||
+ srcRGB == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
+ srcA == PIPE_BLENDFACTOR_DST_COLOR ||
+ srcA == PIPE_BLENDFACTOR_DST_ALPHA ||
+ srcA == PIPE_BLENDFACTOR_INV_DST_COLOR ||
+ srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
+ srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) {
+ /* Enable reading from the colorbuffer. */
+ blend->blend_control |= R300_READ_ENABLE;
+
+ if (r300_screen(r300_context(pipe)->context.screen)->caps->is_r500) {
+ /* Optimization: Depending on incoming pixels, we can
+ * conditionally disable the reading in hardware... */
+ if (eqRGB != PIPE_BLEND_MIN && eqA != PIPE_BLEND_MIN &&
+ eqRGB != PIPE_BLEND_MAX && eqA != PIPE_BLEND_MAX) {
+ /* Disable reading if SRC_ALPHA == 0. */
+ if ((dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
+ dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstA == PIPE_BLENDFACTOR_ZERO)) {
+ blend->blend_control |= R500_SRC_ALPHA_0_NO_READ;
+ }
+
+ /* Disable reading if SRC_ALPHA == 1. */
+ if ((dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstA == PIPE_BLENDFACTOR_ZERO)) {
+ blend->blend_control |= R500_SRC_ALPHA_1_NO_READ;
+ }
+ }
+ }
+ }
+
+ /* Optimization: discard pixels which don't change the colorbuffer.
+ *
+ * The code below is non-trivial and some math is involved.
+ *
+ * Discarding pixels must be disabled when FP16 AA is enabled.
+ * This is a hardware bug. Also, this implementation wouldn't work
+ * with FP blending enabled and equation clamping disabled.
+ *
+ * Equations other than ADD are rarely used and therefore won't be
+ * optimized. */
+ if ((eqRGB == PIPE_BLEND_ADD || eqRGB == PIPE_BLEND_REVERSE_SUBTRACT) &&
+ (eqA == PIPE_BLEND_ADD || eqA == PIPE_BLEND_REVERSE_SUBTRACT)) {
+ /* ADD: X+Y
+ * REVERSE_SUBTRACT: Y-X
+ *
+ * The idea is:
+ * If X = src*srcFactor = 0 and Y = dst*dstFactor = 1,
+ * then CB will not be changed.
+ *
+ * Given the srcFactor and dstFactor variables, we can derive
+ * what src and dst should be equal to and discard appropriate
+ * pixels.
+ */
+ if (blend_discard_if_src_alpha_0(srcRGB, srcA, dstRGB, dstA)) {
+ blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0;
+ } else if (blend_discard_if_src_alpha_1(srcRGB, srcA,
+ dstRGB, dstA)) {
+ blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1;
+ } else if (blend_discard_if_src_color_0(srcRGB, srcA,
+ dstRGB, dstA)) {
+ blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0;
+ } else if (blend_discard_if_src_color_1(srcRGB, srcA,
+ dstRGB, dstA)) {
+ blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1;
+ } else if (blend_discard_if_src_alpha_color_0(srcRGB, srcA,
+ dstRGB, dstA)) {
+ blend->blend_control |=
+ R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0;
+ } else if (blend_discard_if_src_alpha_color_1(srcRGB, srcA,
+ dstRGB, dstA)) {
+ blend->blend_control |=
+ R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1;
+ }
+ }
+
+ /* separate alpha */
+ if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
+ blend->blend_control |= R300_SEPARATE_ALPHA_ENABLE;
+ blend->alpha_blend_control =
+ r300_translate_blend_function(eqA) |
+ (r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) |
+ (r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT);
+ }
}
/* PIPE_LOGICOP_* don't need to be translated, fortunately. */
@@ -71,6 +289,20 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
(state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT;
}
+ /* Color Channel Mask */
+ if (state->colormask & PIPE_MASK_R) {
+ blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_RED_MASK0;
+ }
+ if (state->colormask & PIPE_MASK_G) {
+ blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0;
+ }
+ if (state->colormask & PIPE_MASK_B) {
+ blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0;
+ }
+ if (state->colormask & PIPE_MASK_A) {
+ blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0;
+ }
+
if (state->dither) {
blend->dither = R300_RB3D_DITHER_CTL_DITHER_MODE_LUT |
R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT;
@@ -96,25 +328,30 @@ static void r300_delete_blend_state(struct pipe_context* pipe,
FREE(state);
}
+/* Convert float to 10bit integer */
+static unsigned float_to_fixed10(float f)
+{
+ return CLAMP((unsigned)(f * 1023.9f), 0, 1023);
+}
+
/* Set blend color.
* Setup both R300 and R500 registers, figure out later which one to write. */
static void r300_set_blend_color(struct pipe_context* pipe,
const struct pipe_blend_color* color)
{
struct r300_context* r300 = r300_context(pipe);
- ubyte ur, ug, ub, ua;
-
- ur = float_to_ubyte(color->color[0]);
- ug = float_to_ubyte(color->color[1]);
- ub = float_to_ubyte(color->color[2]);
- ua = float_to_ubyte(color->color[3]);
+ union util_color uc;
- util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM,
- &r300->blend_color_state->blend_color);
+ util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, &uc);
+ r300->blend_color_state->blend_color = uc.ui;
- /* XXX this is wrong */
- r300->blend_color_state->blend_color_red_alpha = ur | (ua << 16);
- r300->blend_color_state->blend_color_green_blue = ub | (ug << 16);
+ /* XXX if FP16 blending is enabled, we should use the FP16 format */
+ r300->blend_color_state->blend_color_red_alpha =
+ float_to_fixed10(color->color[0]) |
+ (float_to_fixed10(color->color[3]) << 16);
+ r300->blend_color_state->blend_color_green_blue =
+ float_to_fixed10(color->color[2]) |
+ (float_to_fixed10(color->color[1]) << 16);
r300->dirty_state |= R300_NEW_BLEND_COLOR;
}
@@ -133,31 +370,6 @@ static void r300_set_clip_state(struct pipe_context* pipe,
}
}
-static void
- r300_set_constant_buffer(struct pipe_context* pipe,
- uint shader, uint index,
- const struct pipe_constant_buffer* buffer)
-{
- struct r300_context* r300 = r300_context(pipe);
-
- /* This entire chunk of code seems ever-so-slightly baked.
- * It's as if I've got pipe_buffer* matryoshkas... */
- if (buffer && buffer->buffer && buffer->buffer->size) {
- void* map = pipe->winsys->buffer_map(pipe->winsys, buffer->buffer,
- PIPE_BUFFER_USAGE_CPU_READ);
- memcpy(r300->shader_constants[shader].constants,
- map, buffer->buffer->size);
- pipe->winsys->buffer_unmap(pipe->winsys, buffer->buffer);
-
- r300->shader_constants[shader].count =
- buffer->buffer->size / (sizeof(float) * 4);
- } else {
- r300->shader_constants[shader].count = 0;
- }
-
- r300->dirty_state |= R300_NEW_CONSTANTS;
-}
-
/* Create a new depth, stencil, and alpha state based on the CSO dsa state.
*
* This contains the depth buffer, stencil buffer, alpha test, and such.
@@ -167,6 +379,8 @@ static void*
r300_create_dsa_state(struct pipe_context* pipe,
const struct pipe_depth_stencil_alpha_state* state)
{
+ struct r300_capabilities *caps =
+ r300_screen(r300_context(pipe)->context.screen)->caps;
struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state);
/* Depth test setup. */
@@ -211,9 +425,16 @@ static void*
(r300_translate_stencil_op(state->stencil[1].zfail_op) <<
R300_S_BACK_ZFAIL_OP_SHIFT);
- dsa->stencil_ref_bf = (state->stencil[1].ref_value) |
- (state->stencil[1].valuemask << R300_STENCILMASK_SHIFT) |
- (state->stencil[1].writemask << R300_STENCILWRITEMASK_SHIFT);
+ /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */
+ if (caps->is_r500)
+ {
+ dsa->z_buffer_control |= R500_STENCIL_REFMASK_FRONT_BACK;
+ dsa->stencil_ref_bf = (state->stencil[1].ref_value) |
+ (state->stencil[1].valuemask <<
+ R300_STENCILMASK_SHIFT) |
+ (state->stencil[1].writemask <<
+ R300_STENCILWRITEMASK_SHIFT);
+ }
}
}
@@ -222,11 +443,13 @@ static void*
dsa->alpha_function =
r300_translate_alpha_function(state->alpha.func) |
R300_FG_ALPHA_FUNC_ENABLE;
- dsa->alpha_reference = CLAMP(state->alpha.ref_value * 1023.0f,
- 0, 1023);
- } else {
- /* XXX need to fix this to be dynamically set
- dsa->z_buffer_top = R300_ZTOP_ENABLE; */
+
+ /* XXX figure out why emitting 10bit alpha ref causes CS to dump */
+ /* always use 8bit alpha ref */
+ dsa->alpha_function |= float_to_ubyte(state->alpha.ref_value);
+
+ if (caps->is_r500)
+ dsa->alpha_function |= R500_FG_ALPHA_FUNC_8BIT;
}
return (void*)dsa;
@@ -249,11 +472,29 @@ static void r300_delete_dsa_state(struct pipe_context* pipe,
FREE(state);
}
-static void r300_set_edgeflags(struct pipe_context* pipe,
- const unsigned* bitfield)
+static void r300_set_scissor_regs(const struct pipe_scissor_state* state,
+ struct r300_scissor_regs *scissor,
+ boolean is_r500)
{
- /* XXX you know it's bad when i915 has this blank too */
- /* XXX and even worse, I have no idea WTF the bitfield is */
+ if (is_r500) {
+ scissor->top_left =
+ (state->minx << R300_SCISSORS_X_SHIFT) |
+ (state->miny << R300_SCISSORS_Y_SHIFT);
+ scissor->bottom_right =
+ ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) |
+ ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT);
+ } else {
+ /* Offset of 1440 in non-R500 chipsets. */
+ scissor->top_left =
+ ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) |
+ ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT);
+ scissor->bottom_right =
+ (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) |
+ (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT);
+ }
+
+ scissor->empty_area = state->minx >= state->maxx ||
+ state->miny >= state->maxy;
}
static void
@@ -261,12 +502,27 @@ static void
const struct pipe_framebuffer_state* state)
{
struct r300_context* r300 = r300_context(pipe);
+ struct pipe_scissor_state scissor;
- draw_flush(r300->draw);
+ if (r300->draw) {
+ draw_flush(r300->draw);
+ }
r300->framebuffer_state = *state;
+ scissor.minx = scissor.miny = 0;
+ scissor.maxx = state->width;
+ scissor.maxy = state->height;
+ r300_set_scissor_regs(&scissor, &r300->scissor_state->framebuffer,
+ r300_screen(r300->context.screen)->caps->is_r500);
+
+ /* Don't rely on the order of states being set for the first time. */
+ if (!r300->rs_state || !r300->rs_state->rs.scissor) {
+ r300->dirty_state |= R300_NEW_SCISSOR;
+ }
r300->dirty_state |= R300_NEW_FRAMEBUFFERS;
+ r300->dirty_state |= R300_NEW_BLEND;
+ r300->dirty_state |= R300_NEW_DSA;
}
/* Create fragment shader state. */
@@ -282,6 +538,7 @@ static void* r300_create_fs_state(struct pipe_context* pipe,
fs->state.tokens = tgsi_dup_tokens(shader->tokens);
tgsi_scan_shader(shader->tokens, &fs->info);
+ r300_shader_read_fs_inputs(&fs->info, &fs->inputs);
return (void*)fs;
}
@@ -295,21 +552,31 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
if (fs == NULL) {
r300->fs = NULL;
return;
- } else if (!fs->translated) {
- r300_translate_fragment_shader(r300, fs);
}
r300->fs = fs;
+ r300_pick_fragment_shader(r300);
- r300->dirty_state |= R300_NEW_FRAGMENT_SHADER;
+ if (r300->vs && r300_vertex_shader_setup_wpos(r300)) {
+ r300->dirty_state |= R300_NEW_VERTEX_FORMAT;
+ }
+
+ r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS;
}
/* Delete fragment shader state. */
static void r300_delete_fs_state(struct pipe_context* pipe, void* shader)
{
struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader;
- rc_constants_destroy(&fs->code.constants);
- FREE(fs->state.tokens);
+ struct r300_fragment_shader_code *tmp, *ptr = fs->first;
+
+ while (ptr) {
+ tmp = ptr;
+ ptr = ptr->next;
+ rc_constants_destroy(&tmp->code.constants);
+ FREE(tmp);
+ }
+ FREE((void*)fs->state.tokens);
FREE(shader);
}
@@ -347,8 +614,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
if (state->bypass_vs_clip_and_viewport ||
!r300_screen(pipe->screen)->caps->has_tcl) {
rs->vap_control_status |= R300_VAP_TCL_BYPASS;
- } else {
- rs->rs.bypass_vs_clip_and_viewport = TRUE;
}
rs->point_size = pack_float_16_6x(state->point_size) |
@@ -363,25 +628,52 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
rs->line_control = pack_float_16_6x(state->line_width) |
R300_GA_LINE_CNTL_END_TYPE_COMP;
+ /* XXX I think there is something wrong with the polygon mode,
+ * XXX re-test when r300g is in a better shape */
+
+ /* Enable polygon mode */
+ if (state->fill_cw != PIPE_POLYGON_MODE_FILL ||
+ state->fill_ccw != PIPE_POLYGON_MODE_FILL) {
+ rs->polygon_mode = R300_GA_POLY_MODE_DUAL;
+ }
+
/* Radeons don't think in "CW/CCW", they think in "front/back". */
if (state->front_winding == PIPE_WINDING_CW) {
rs->cull_mode = R300_FRONT_FACE_CW;
+ /* Polygon offset */
if (state->offset_cw) {
rs->polygon_offset_enable |= R300_FRONT_ENABLE;
}
if (state->offset_ccw) {
rs->polygon_offset_enable |= R300_BACK_ENABLE;
}
+
+ /* Polygon mode */
+ if (rs->polygon_mode) {
+ rs->polygon_mode |=
+ r300_translate_polygon_mode_front(state->fill_cw);
+ rs->polygon_mode |=
+ r300_translate_polygon_mode_back(state->fill_ccw);
+ }
} else {
rs->cull_mode = R300_FRONT_FACE_CCW;
+ /* Polygon offset */
if (state->offset_ccw) {
rs->polygon_offset_enable |= R300_FRONT_ENABLE;
}
if (state->offset_cw) {
rs->polygon_offset_enable |= R300_BACK_ENABLE;
}
+
+ /* Polygon mode */
+ if (rs->polygon_mode) {
+ rs->polygon_mode |=
+ r300_translate_polygon_mode_front(state->fill_ccw);
+ rs->polygon_mode |=
+ r300_translate_polygon_mode_back(state->fill_cw);
+ }
}
if (state->front_winding & state->cull_mode) {
rs->cull_mode |= R300_CULL_FRONT;
@@ -412,10 +704,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
rs->color_control = R300_SHADE_MODEL_SMOOTH;
}
- if (!state->flatshade_first) {
- rs->color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
- }
-
return (void*)rs;
}
@@ -425,14 +713,20 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
struct r300_context* r300 = r300_context(pipe);
struct r300_rs_state* rs = (struct r300_rs_state*)state;
- draw_flush(r300->draw);
- draw_set_rasterizer_state(r300->draw, &rs->rs);
+ if (r300->draw) {
+ draw_flush(r300->draw);
+ draw_set_rasterizer_state(r300->draw, &rs->rs);
+ }
r300->rs_state = rs;
+ /* XXX Clean these up when we move to atom emits */
r300->dirty_state |= R300_NEW_RASTERIZER;
r300->dirty_state |= R300_NEW_RS_BLOCK;
r300->dirty_state |= R300_NEW_SCISSOR;
r300->dirty_state |= R300_NEW_VIEWPORT;
+ if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) {
+ r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS;
+ }
}
/* Free rasterizer state. */
@@ -448,6 +742,9 @@ static void*
struct r300_context* r300 = r300_context(pipe);
struct r300_sampler_state* sampler = CALLOC_STRUCT(r300_sampler_state);
int lod_bias;
+ union util_color uc;
+
+ sampler->state = *state;
sampler->filter0 |=
(r300_translate_wrap(state->wrap_s) << R300_TX_WRAP_S_SHIFT) |
@@ -456,7 +753,13 @@ static void*
sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter,
state->mag_img_filter,
- state->min_mip_filter);
+ state->min_mip_filter,
+ state->max_anisotropy > 1.0);
+
+ /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */
+ /* We must pass these to the emit function to clamp them properly. */
+ sampler->min_lod = MAX2((unsigned)state->min_lod, 0);
+ sampler->max_lod = MAX2((unsigned)ceilf(state->max_lod), 0);
lod_bias = CLAMP((int)(state->lod_bias * 32), -(1 << 9), (1 << 9) - 1);
@@ -464,8 +767,8 @@ static void*
sampler->filter1 |= r300_anisotropy(state->max_anisotropy);
- util_pack_color(state->border_color, PIPE_FORMAT_A8R8G8B8_UNORM,
- &sampler->border_color);
+ util_pack_color(state->border_color, PIPE_FORMAT_A8R8G8B8_UNORM, &uc);
+ sampler->border_color = uc.ui;
/* R500-specific fixups and optimizations */
if (r300_screen(r300->context.screen)->caps->is_r500) {
@@ -494,6 +797,20 @@ static void r300_bind_sampler_states(struct pipe_context* pipe,
}
r300->sampler_count = count;
+
+ /* Pick a fragment shader based on the texture compare state. */
+ if (r300->fs && (r300->dirty_state & R300_ANY_NEW_SAMPLERS)) {
+ if (r300_pick_fragment_shader(r300)) {
+ r300->dirty_state |= R300_NEW_FRAGMENT_SHADER |
+ R300_NEW_FRAGMENT_SHADER_CONSTANTS;
+ }
+ }
+}
+
+static void r300_lacks_vertex_textures(struct pipe_context* pipe,
+ unsigned count,
+ void** states)
+{
}
static void r300_delete_sampler_state(struct pipe_context* pipe, void* state)
@@ -506,18 +823,26 @@ static void r300_set_sampler_textures(struct pipe_context* pipe,
struct pipe_texture** texture)
{
struct r300_context* r300 = r300_context(pipe);
+ boolean is_r500 = r300_screen(r300->context.screen)->caps->is_r500;
int i;
/* XXX magic num */
if (count > 8) {
return;
}
-
+
for (i = 0; i < count; i++) {
if (r300->textures[i] != (struct r300_texture*)texture[i]) {
pipe_texture_reference((struct pipe_texture**)&r300->textures[i],
texture[i]);
r300->dirty_state |= (R300_NEW_TEXTURE << i);
+
+ /* R300-specific - set the texrect factor in a fragment shader */
+ if (!is_r500 && r300->textures[i]->is_npot) {
+ /* XXX It would be nice to re-emit just 1 constant,
+ * XXX not all of them */
+ r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS;
+ }
}
}
@@ -537,24 +862,13 @@ static void r300_set_scissor_state(struct pipe_context* pipe,
{
struct r300_context* r300 = r300_context(pipe);
- if (r300_screen(r300->context.screen)->caps->is_r500) {
- r300->scissor_state->scissor_top_left =
- (state->minx << R300_SCISSORS_X_SHIFT) |
- (state->miny << R300_SCISSORS_Y_SHIFT);
- r300->scissor_state->scissor_bottom_right =
- ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) |
- ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT);
- } else {
- /* Offset of 1440 in non-R500 chipsets. */
- r300->scissor_state->scissor_top_left =
- ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) |
- ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT);
- r300->scissor_state->scissor_bottom_right =
- (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) |
- (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT);
- }
+ r300_set_scissor_regs(state, &r300->scissor_state->scissor,
+ r300_screen(r300->context.screen)->caps->is_r500);
- r300->dirty_state |= R300_NEW_SCISSOR;
+ /* Don't rely on the order of states being set for the first time. */
+ if (!r300->rs_state || r300->rs_state->rs.scissor) {
+ r300->dirty_state |= R300_NEW_SCISSOR;
+ }
}
static void r300_set_viewport_state(struct pipe_context* pipe,
@@ -566,17 +880,14 @@ static void r300_set_viewport_state(struct pipe_context* pipe,
r300->viewport_state->vte_control = R300_VTX_W0_FMT;
if (state->scale[0] != 1.0f) {
- assert(state->scale[0] != 0.0f);
r300->viewport_state->xscale = state->scale[0];
r300->viewport_state->vte_control |= R300_VPORT_X_SCALE_ENA;
}
if (state->scale[1] != 1.0f) {
- assert(state->scale[1] != 0.0f);
r300->viewport_state->yscale = state->scale[1];
r300->viewport_state->vte_control |= R300_VPORT_Y_SCALE_ENA;
}
if (state->scale[2] != 1.0f) {
- assert(state->scale[2] != 0.0f);
r300->viewport_state->zscale = state->scale[2];
r300->viewport_state->vte_control |= R300_VPORT_Z_SCALE_ENA;
}
@@ -594,6 +905,9 @@ static void r300_set_viewport_state(struct pipe_context* pipe,
}
r300->dirty_state |= R300_NEW_VIEWPORT;
+ if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) {
+ r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS;
+ }
}
static void r300_set_vertex_buffers(struct pipe_context* pipe,
@@ -602,13 +916,16 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
{
struct r300_context* r300 = r300_context(pipe);
- memcpy(r300->vertex_buffers, buffers,
+ memcpy(r300->vertex_buffer, buffers,
sizeof(struct pipe_vertex_buffer) * count);
-
r300->vertex_buffer_count = count;
- draw_flush(r300->draw);
- draw_set_vertex_buffers(r300->draw, count, buffers);
+ if (r300->draw) {
+ draw_flush(r300->draw);
+ draw_set_vertex_buffers(r300->draw, count, buffers);
+ }
+
+ r300->dirty_state |= R300_NEW_VERTEX_FORMAT;
}
static void r300_set_vertex_elements(struct pipe_context* pipe,
@@ -617,8 +934,15 @@ static void r300_set_vertex_elements(struct pipe_context* pipe,
{
struct r300_context* r300 = r300_context(pipe);
- draw_flush(r300->draw);
- draw_set_vertex_elements(r300->draw, count, elements);
+ memcpy(r300->vertex_element,
+ elements,
+ sizeof(struct pipe_vertex_element) * count);
+ r300->vertex_element_count = count;
+
+ if (r300->draw) {
+ draw_flush(r300->draw);
+ draw_set_vertex_elements(r300->draw, count, elements);
+ }
}
static void* r300_create_vs_state(struct pipe_context* pipe,
@@ -634,9 +958,6 @@ static void* r300_create_vs_state(struct pipe_context* pipe,
tgsi_scan_shader(shader->tokens, &vs->info);
- /* Appease Draw. */
- vs->draw = draw_create_vertex_shader(r300->draw, shader);
-
return (void*)vs;
} else {
return draw_create_vertex_shader(r300->draw, shader);
@@ -647,8 +968,6 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
{
struct r300_context* r300 = r300_context(pipe);
- draw_flush(r300->draw);
-
if (r300_screen(pipe->screen)->caps->has_tcl) {
struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader;
@@ -659,10 +978,16 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
r300_translate_vertex_shader(r300, vs);
}
- draw_bind_vertex_shader(r300->draw, vs->draw);
r300->vs = vs;
- r300->dirty_state |= R300_NEW_VERTEX_SHADER;
+ if (r300->fs) {
+ r300_vertex_shader_setup_wpos(r300);
+ }
+
+ r300->dirty_state |=
+ R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS |
+ R300_NEW_VERTEX_FORMAT;
} else {
+ draw_flush(r300->draw);
draw_bind_vertex_shader(r300->draw,
(struct draw_vertex_shader*)shader);
}
@@ -676,8 +1001,7 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader)
struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader;
rc_constants_destroy(&vs->code.constants);
- draw_delete_vertex_shader(r300->draw, vs->draw);
- FREE(vs->state.tokens);
+ FREE((void*)vs->state.tokens);
FREE(shader);
} else {
draw_delete_vertex_shader(r300->draw,
@@ -685,6 +1009,31 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader)
}
}
+static void r300_set_constant_buffer(struct pipe_context *pipe,
+ uint shader, uint index,
+ const struct pipe_constant_buffer *buf)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ void *mapped;
+
+ if (buf == NULL || buf->buffer->size == 0 ||
+ (mapped = pipe_buffer_map(pipe->screen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)) == NULL)
+ {
+ r300->shader_constants[shader].count = 0;
+ return;
+ }
+
+ assert((buf->buffer->size % 4 * sizeof(float)) == 0);
+ memcpy(r300->shader_constants[shader].constants, mapped, buf->buffer->size);
+ r300->shader_constants[shader].count = buf->buffer->size / (4 * sizeof(float));
+ pipe_buffer_unmap(pipe->screen, buf->buffer);
+
+ if (shader == PIPE_SHADER_VERTEX)
+ r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS;
+ else if (shader == PIPE_SHADER_FRAGMENT)
+ r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS;
+}
+
void r300_init_state_functions(struct r300_context* r300)
{
r300->context.create_blend_state = r300_create_blend_state;
@@ -701,8 +1050,6 @@ void r300_init_state_functions(struct r300_context* r300)
r300->context.bind_depth_stencil_alpha_state = r300_bind_dsa_state;
r300->context.delete_depth_stencil_alpha_state = r300_delete_dsa_state;
- r300->context.set_edgeflags = r300_set_edgeflags;
-
r300->context.set_framebuffer_state = r300_set_framebuffer_state;
r300->context.create_fs_state = r300_create_fs_state;
@@ -716,10 +1063,11 @@ void r300_init_state_functions(struct r300_context* r300)
r300->context.delete_rasterizer_state = r300_delete_rs_state;
r300->context.create_sampler_state = r300_create_sampler_state;
- r300->context.bind_sampler_states = r300_bind_sampler_states;
+ r300->context.bind_fragment_sampler_states = r300_bind_sampler_states;
+ r300->context.bind_vertex_sampler_states = r300_lacks_vertex_textures;
r300->context.delete_sampler_state = r300_delete_sampler_state;
- r300->context.set_sampler_textures = r300_set_sampler_textures;
+ r300->context.set_fragment_sampler_textures = r300_set_sampler_textures;
r300->context.set_scissor_state = r300_set_scissor_state;
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 02b7ab91076..22660a52d9a 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -1,5 +1,6 @@
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -20,439 +21,540 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
-#include "r300_state_derived.h"
+#include "draw/draw_context.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "r300_context.h"
#include "r300_fs.h"
+#include "r300_screen.h"
+#include "r300_shader_semantics.h"
+#include "r300_state_derived.h"
#include "r300_state_inlines.h"
#include "r300_vs.h"
/* r300_state_derived: Various bits of state which are dependent upon
* currently bound CSO data. */
-/* Set up the vs_tab and routes. */
-static void r300_vs_tab_routes(struct r300_context* r300,
- struct r300_vertex_format* vformat)
+struct r300_shader_key {
+ struct r300_vertex_shader* vs;
+ struct r300_fragment_shader* fs;
+};
+
+struct r300_shader_derived_value {
+ struct r300_vertex_format* vformat;
+ struct r300_rs_block* rs_block;
+};
+
+unsigned r300_shader_key_hash(void* key) {
+ struct r300_shader_key* shader_key = (struct r300_shader_key*)key;
+ unsigned vs = (intptr_t)shader_key->vs;
+ unsigned fs = (intptr_t)shader_key->fs;
+
+ return (vs << 16) | (fs & 0xffff);
+}
+
+int r300_shader_key_compare(void* key1, void* key2) {
+ struct r300_shader_key* shader_key1 = (struct r300_shader_key*)key1;
+ struct r300_shader_key* shader_key2 = (struct r300_shader_key*)key2;
+
+ return (shader_key1->vs == shader_key2->vs) &&
+ (shader_key1->fs == shader_key2->fs);
+}
+
+static void r300_draw_emit_attrib(struct r300_context* r300,
+ enum attrib_emit emit,
+ enum interp_mode interp,
+ int index)
{
- struct r300_screen* r300screen = r300_screen(r300->context.screen);
- struct vertex_info* vinfo = &vformat->vinfo;
- int* tab = vformat->vs_tab;
- boolean pos = FALSE, psize = FALSE, fog = FALSE;
- int i, texs = 0, cols = 0;
- struct tgsi_shader_info* info;
+ struct tgsi_shader_info* info = &r300->vs->info;
+ int output;
- if (r300screen->caps->has_tcl) {
- /* Use vertex shader to determine required routes. */
- info = &r300->vs->info;
+ output = draw_find_shader_output(r300->draw,
+ info->output_semantic_name[index],
+ info->output_semantic_index[index]);
+ draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output);
+}
+
+static void r300_draw_emit_all_attribs(struct r300_context* r300)
+{
+ struct r300_shader_semantics* vs_outputs = &r300->vs->outputs;
+ int i, gen_count;
+
+ /* Position. */
+ if (vs_outputs->pos != ATTR_UNUSED) {
+ r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+ vs_outputs->pos);
} else {
- /* Use fragment shader to determine required routes. */
- info = &r300->fs->info;
+ assert(0);
}
- assert(info->num_inputs <= 16);
-
- if (!r300screen->caps->has_tcl || !r300->rs_state->enable_vte)
- {
- for (i = 0; i < info->num_inputs; i++) {
- switch (r300->vs->code.inputs[i]) {
- case TGSI_SEMANTIC_POSITION:
- pos = TRUE;
- tab[i] = 0;
- break;
- case TGSI_SEMANTIC_COLOR:
- tab[i] = 2 + cols;
- cols++;
- break;
- case TGSI_SEMANTIC_PSIZE:
- assert(psize == FALSE);
- psize = TRUE;
- tab[i] = 15;
- break;
- case TGSI_SEMANTIC_FOG:
- assert(fog == FALSE);
- fog = TRUE;
- /* Fall through */
- case TGSI_SEMANTIC_GENERIC:
- tab[i] = 6 + texs;
- texs++;
- break;
- default:
- debug_printf("r300: Unknown vertex input %d\n",
- info->input_semantic_name[i]);
- break;
- }
- }
+ /* Point size. */
+ if (vs_outputs->psize != ATTR_UNUSED) {
+ r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS,
+ vs_outputs->psize);
}
- else
- {
- /* Just copy vert attribs over as-is. */
- for (i = 0; i < info->num_inputs; i++) {
- tab[i] = i;
- }
- for (i = 0; i < info->num_outputs; i++) {
- switch (info->output_semantic_name[i]) {
- case TGSI_SEMANTIC_POSITION:
- pos = TRUE;
- break;
- case TGSI_SEMANTIC_COLOR:
- cols++;
- break;
- case TGSI_SEMANTIC_PSIZE:
- psize = TRUE;
- break;
- case TGSI_SEMANTIC_FOG:
- fog = TRUE;
- /* Fall through */
- case TGSI_SEMANTIC_GENERIC:
- texs++;
- break;
- default:
- debug_printf("r300: Unknown vertex output %d\n",
- info->output_semantic_name[i]);
- break;
- }
+ /* Colors. */
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ if (vs_outputs->color[i] != ATTR_UNUSED) {
+ r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
+ vs_outputs->color[i]);
}
}
- /* XXX magic */
- assert(texs <= 8);
+ /* XXX Back-face colors. */
- /* Do the actual vertex_info setup.
- *
- * vertex_info has four uints of hardware-specific data in it.
- * vinfo.hwfmt[0] is R300_VAP_VTX_STATE_CNTL
- * vinfo.hwfmt[1] is R300_VAP_VSM_VTX_ASSM
- * vinfo.hwfmt[2] is R300_VAP_OUTPUT_VTX_FMT_0
- * vinfo.hwfmt[3] is R300_VAP_OUTPUT_VTX_FMT_1 */
-
- vinfo->hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */
-
- /* We need to add vertex position attribute only for SW TCL case,
- * for HW TCL case it could be generated by vertex shader */
- if (!pos && !r300screen->caps->has_tcl) {
- debug_printf("r300: Forcing vertex position attribute emit...\n");
- /* Make room for the position attribute
- * at the beginning of the tab. */
- for (i = 15; i > 0; i--) {
- tab[i] = tab[i-1];
+ /* Texture coordinates. */
+ gen_count = 0;
+ for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+ if (vs_outputs->generic[i] != ATTR_UNUSED) {
+ r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+ vs_outputs->generic[i]);
+ gen_count++;
}
- tab[0] = 0;
- }
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE,
- draw_find_vs_output(r300->draw, TGSI_SEMANTIC_POSITION, 0));
- vinfo->hwfmt[1] |= R300_INPUT_CNTL_POS;
- vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
-
- if (psize) {
- draw_emit_vertex_attr(vinfo, EMIT_1F_PSIZE, INTERP_POS,
- draw_find_vs_output(r300->draw, TGSI_SEMANTIC_PSIZE, 0));
- vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
}
- for (i = 0; i < cols; i++) {
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR,
- draw_find_vs_output(r300->draw, TGSI_SEMANTIC_COLOR, i));
- vinfo->hwfmt[1] |= R300_INPUT_CNTL_COLOR;
- vinfo->hwfmt[2] |= (R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i);
+ /* Fog coordinates. */
+ if (vs_outputs->fog != ATTR_UNUSED) {
+ r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+ vs_outputs->fog);
+ gen_count++;
}
- /* Init i right here, increment it if fog is enabled.
- * This gets around a double-increment problem. */
- i = 0;
+ /* XXX magic */
+ assert(gen_count <= 8);
+}
- if (fog) {
- i++;
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE,
- draw_find_vs_output(r300->draw, TGSI_SEMANTIC_FOG, 0));
- vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i);
- vinfo->hwfmt[3] |= (4 << (3 * i));
+/* Update the PSC tables. */
+static void r300_vertex_psc(struct r300_context* r300)
+{
+ struct r300_vertex_info *vformat = r300->vertex_info;
+ uint16_t type, swizzle;
+ enum pipe_format format;
+ unsigned i;
+ int identity[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+ int* stream_tab;
+
+ /* If TCL is bypassed, map vertex streams to equivalent VS output
+ * locations. */
+ if (r300->rs_state->enable_vte) {
+ stream_tab = identity;
+ } else {
+ stream_tab = r300->vs->stream_loc_notcl;
}
- for (; i < texs; i++) {
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE,
- draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i));
- vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i);
- vinfo->hwfmt[3] |= (4 << (3 * i));
- }
+ /* Vertex shaders have no semantics on their inputs,
+ * so PSC should just route stuff based on the vertex elements,
+ * and not on attrib information. */
+ DBG(r300, DBG_DRAW, "r300: vs expects %d attribs, routing %d elements"
+ " in psc\n",
+ r300->vs->info.num_inputs,
+ r300->vertex_element_count);
- /* Handle the case where the vertex shader will be generating some of
- * the attribs based on its inputs. */
- if (r300screen->caps->has_tcl &&
- info->num_inputs < info->num_outputs) {
- vinfo->num_attribs = info->num_inputs;
+ for (i = 0; i < r300->vertex_element_count; i++) {
+ format = r300->vertex_element[i].src_format;
+
+ type = r300_translate_vertex_data_type(format) |
+ (stream_tab[i] << R300_DST_VEC_LOC_SHIFT);
+ swizzle = r300_translate_vertex_data_swizzle(format);
+
+ if (i & 1) {
+ vformat->vap_prog_stream_cntl[i >> 1] |= type << 16;
+ vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
+ } else {
+ vformat->vap_prog_stream_cntl[i >> 1] |= type;
+ vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
+ }
}
- draw_compute_vertex_size(vinfo);
+ assert(i <= 15);
+
+ /* Set the last vector in the PSC. */
+ if (i) {
+ i -= 1;
+ }
+ vformat->vap_prog_stream_cntl[i >> 1] |=
+ (R300_LAST_VEC << (i & 1 ? 16 : 0));
}
-/* Update the PSC tables. */
-static void r300_vertex_psc(struct r300_context* r300,
- struct r300_vertex_format* vformat)
+/* Update the PSC tables for SW TCL, using Draw. */
+static void r300_swtcl_vertex_psc(struct r300_context* r300)
{
- struct r300_screen* r300screen = r300_screen(r300->context.screen);
+ struct r300_vertex_info *vformat = r300->vertex_info;
struct vertex_info* vinfo = &vformat->vinfo;
- int* tab = vformat->vs_tab;
- uint32_t temp;
- int i, attrib_count;
-
- /* Vertex shaders have no semantics on their inputs,
- * so PSC should just route stuff based on their info,
- * and not on attrib information. */
- if (r300screen->caps->has_tcl) {
- attrib_count = r300->vs->info.num_inputs;
- DBG(r300, DBG_DRAW, "r300: routing %d attribs in psc for vs\n",
- attrib_count);
- } else {
- attrib_count = vinfo->num_attribs;
- DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
- for (i = 0; i < attrib_count; i++) {
- DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d,"
- " tab %d\n", vinfo->attrib[i].src_index,
- vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
- tab[i]);
- }
+ uint16_t type, swizzle;
+ enum pipe_format format;
+ unsigned i, attrib_count;
+ int* vs_output_tab = r300->vs->stream_loc_notcl;
+
+ /* For each Draw attribute, route it to the fragment shader according
+ * to the vs_output_tab. */
+ attrib_count = vinfo->num_attribs;
+ DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
+ for (i = 0; i < attrib_count; i++) {
+ DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d,"
+ " vs_output_tab %d\n", vinfo->attrib[i].src_index,
+ vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
+ vs_output_tab[i]);
}
for (i = 0; i < attrib_count; i++) {
- /* Make sure we have a proper destination for our attribute */
- assert(tab[i] != -1);
+ /* Make sure we have a proper destination for our attribute. */
+ assert(vs_output_tab[i] != -1);
- /* Add the attribute to the PSC table. */
- temp = r300screen->caps->has_tcl ?
- R300_DATA_TYPE_FLOAT_4 :
- translate_vertex_data_type(vinfo->attrib[i].emit);
- temp |= tab[i] << R300_DST_VEC_LOC_SHIFT;
+ format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
- if (i & 1) {
- vformat->vap_prog_stream_cntl[i >> 1] &= 0x0000ffff;
- vformat->vap_prog_stream_cntl[i >> 1] |= temp << 16;
+ /* Obtain the type of data in this attribute. */
+ type = r300_translate_vertex_data_type(format) |
+ vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT;
- vformat->vap_prog_stream_cntl_ext[i >> 1] |=
- (R300_VAP_SWIZZLE_XYZW << 16);
- } else {
- vformat->vap_prog_stream_cntl[i >> 1] &= 0xffff0000;
- vformat->vap_prog_stream_cntl[i >> 1] |= temp << 0;
+ /* Obtain the swizzle for this attribute. Note that the default
+ * swizzle in the hardware is not XYZW! */
+ swizzle = r300_translate_vertex_data_swizzle(format);
- vformat->vap_prog_stream_cntl_ext[i >> 1] |=
- (R300_VAP_SWIZZLE_XYZW << 0);
+ /* Add the attribute to the PSC table. */
+ if (i & 1) {
+ vformat->vap_prog_stream_cntl[i >> 1] |= type << 16;
+ vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
+ } else {
+ vformat->vap_prog_stream_cntl[i >> 1] |= type;
+ vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
}
}
/* Set the last vector in the PSC. */
- i--;
+ if (i) {
+ i -= 1;
+ }
vformat->vap_prog_stream_cntl[i >> 1] |=
(R300_LAST_VEC << (i & 1 ? 16 : 0));
}
-/* Update the vertex format. */
-static void r300_update_vertex_format(struct r300_context* r300)
+static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr,
+ boolean swizzle_0001)
{
- struct r300_vertex_format vformat;
- int i;
-
- memset(&vformat, 0, sizeof(struct r300_vertex_format));
- for (i = 0; i < 16; i++) {
- vformat.vs_tab[i] = -1;
- vformat.fs_tab[i] = -1;
+ rs->ip[id] |= R300_RS_COL_PTR(ptr);
+ if (swizzle_0001) {
+ rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
+ } else {
+ rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
}
+ rs->inst[id] |= R300_RS_INST_COL_ID(id);
+}
- r300_vs_tab_routes(r300, &vformat);
-
- r300_vertex_psc(r300, &vformat);
+static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset)
+{
+ rs->inst[id] |= R300_RS_INST_COL_CN_WRITE |
+ R300_RS_INST_COL_ADDR(fp_offset);
+}
- if (memcmp(&r300->vertex_info, &vformat,
- sizeof(struct r300_vertex_format))) {
- memcpy(&r300->vertex_info, &vformat,
- sizeof(struct r300_vertex_format));
- r300->dirty_state |= R300_NEW_VERTEX_FORMAT;
+static void r300_rs_tex(struct r300_rs_block* rs, int id, int ptr,
+ boolean swizzle_X001)
+{
+ if (swizzle_X001) {
+ rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) |
+ R300_RS_SEL_S(R300_RS_SEL_C0) |
+ R300_RS_SEL_T(R300_RS_SEL_K0) |
+ R300_RS_SEL_R(R300_RS_SEL_K0) |
+ R300_RS_SEL_Q(R300_RS_SEL_K1);
+ } else {
+ rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) |
+ R300_RS_SEL_S(R300_RS_SEL_C0) |
+ R300_RS_SEL_T(R300_RS_SEL_C1) |
+ R300_RS_SEL_R(R300_RS_SEL_C2) |
+ R300_RS_SEL_Q(R300_RS_SEL_C3);
}
+ rs->inst[id] |= R300_RS_INST_TEX_ID(id);
}
-/* Set up the mappings from GB to US, for RS block. */
-static void r300_update_fs_tab(struct r300_context* r300)
+static void r300_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset)
{
- struct r300_vertex_format* vformat = &r300->vertex_info;
- struct tgsi_shader_info* info = &r300->fs->info;
- int i, cols = 0, texs = 0, cols_emitted = 0;
- int* tab = vformat->fs_tab;
+ rs->inst[id] |= R300_RS_INST_TEX_CN_WRITE |
+ R300_RS_INST_TEX_ADDR(fp_offset);
+}
- for (i = 0; i < 16; i++) {
- tab[i] = -1;
+static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr,
+ boolean swizzle_0001)
+{
+ rs->ip[id] |= R500_RS_COL_PTR(ptr);
+ if (swizzle_0001) {
+ rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
+ } else {
+ rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
}
+ rs->inst[id] |= R500_RS_INST_COL_ID(id);
+}
- assert(info->num_inputs <= 16);
- for (i = 0; i < info->num_inputs; i++) {
- switch (info->input_semantic_name[i]) {
- case TGSI_SEMANTIC_COLOR:
- tab[i] = INTERP_LINEAR;
- cols++;
- break;
- case TGSI_SEMANTIC_POSITION:
- case TGSI_SEMANTIC_PSIZE:
- debug_printf("r300: Implementation error: Can't use "
- "pos attribs in fragshader yet!\n");
- /* Pass through for now */
- case TGSI_SEMANTIC_FOG:
- case TGSI_SEMANTIC_GENERIC:
- tab[i] = INTERP_PERSPECTIVE;
- break;
- default:
- debug_printf("r300: Unknown vertex input %d\n",
- info->input_semantic_name[i]);
- break;
- }
- }
+static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset)
+{
+ rs->inst[id] |= R500_RS_INST_COL_CN_WRITE |
+ R500_RS_INST_COL_ADDR(fp_offset);
+}
- /* Now that we know where everything is... */
- DBG(r300, DBG_DRAW, "r300: fp input count: %d\n", info->num_inputs);
- for (i = 0; i < info->num_inputs; i++) {
- switch (tab[i]) {
- case INTERP_LINEAR:
- DBG(r300, DBG_DRAW, "r300: attrib: "
- "stack offset %d, color, tab %d\n",
- i, cols_emitted);
- tab[i] = cols_emitted;
- cols_emitted++;
- break;
- case INTERP_PERSPECTIVE:
- DBG(r300, DBG_DRAW, "r300: attrib: "
- "stack offset %d, texcoord, tab %d\n",
- i, cols + texs);
- tab[i] = cols + texs;
- texs++;
- break;
- case -1:
- debug_printf("r300: Implementation error: Bad fp interp!\n");
- default:
- break;
- }
+static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr,
+ boolean swizzle_X001)
+{
+ int rs_tex_comp = ptr*4;
+
+ if (swizzle_X001) {
+ rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) |
+ R500_RS_SEL_T(R500_RS_IP_PTR_K0) |
+ R500_RS_SEL_R(R500_RS_IP_PTR_K0) |
+ R500_RS_SEL_Q(R500_RS_IP_PTR_K1);
+ } else {
+ rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) |
+ R500_RS_SEL_T(rs_tex_comp + 1) |
+ R500_RS_SEL_R(rs_tex_comp + 2) |
+ R500_RS_SEL_Q(rs_tex_comp + 3);
}
+ rs->inst[id] |= R500_RS_INST_TEX_ID(id);
+}
+static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset)
+{
+ rs->inst[id] |= R500_RS_INST_TEX_CN_WRITE |
+ R500_RS_INST_TEX_ADDR(fp_offset);
}
-/* Set up the RS block. This is the part of the chipset that actually does
- * the rasterization of vertices into fragments. This is also the part of the
- * chipset that locks up if any part of it is even slightly wrong. */
-static void r300_update_rs_block(struct r300_context* r300)
+/* Set up the RS block.
+ *
+ * This is the part of the chipset that actually does the rasterization
+ * of vertices into fragments. This is also the part of the chipset that
+ * locks up if any part of it is even slightly wrong. */
+static void r300_update_rs_block(struct r300_context* r300,
+ struct r300_shader_semantics* vs_outputs,
+ struct r300_shader_semantics* fs_inputs)
{
struct r300_rs_block* rs = r300->rs_block;
- struct tgsi_shader_info* info = &r300->fs->info;
- int* tab = r300->vertex_info.fs_tab;
- int col_count = 0, fp_offset = 0, i, tex_count = 0;
- int rs_tex_comp = 0;
- memset(rs, 0, sizeof(struct r300_rs_block));
+ int i, col_count = 0, tex_count = 0, fp_offset = 0;
+ void (*rX00_rs_col)(struct r300_rs_block*, int, int, boolean);
+ void (*rX00_rs_col_write)(struct r300_rs_block*, int, int);
+ void (*rX00_rs_tex)(struct r300_rs_block*, int, int, boolean);
+ void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int);
+ boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
+ vs_outputs->bcolor[1] != ATTR_UNUSED;
if (r300_screen(r300->context.screen)->caps->is_r500) {
- for (i = 0; i < info->num_inputs; i++) {
- assert(tab[i] != -1);
- switch (info->input_semantic_name[i]) {
- case TGSI_SEMANTIC_COLOR:
- rs->ip[col_count] |=
- R500_RS_COL_PTR(col_count) |
- R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
- col_count++;
- break;
- case TGSI_SEMANTIC_GENERIC:
- rs->ip[tex_count] |=
- R500_RS_SEL_S(rs_tex_comp) |
- R500_RS_SEL_T(rs_tex_comp + 1) |
- R500_RS_SEL_R(rs_tex_comp + 2) |
- R500_RS_SEL_Q(rs_tex_comp + 3);
- tex_count++;
- rs_tex_comp += 4;
- break;
- default:
- break;
- }
- }
+ rX00_rs_col = r500_rs_col;
+ rX00_rs_col_write = r500_rs_col_write;
+ rX00_rs_tex = r500_rs_tex;
+ rX00_rs_tex_write = r500_rs_tex_write;
+ } else {
+ rX00_rs_col = r300_rs_col;
+ rX00_rs_col_write = r300_rs_col_write;
+ rX00_rs_tex = r300_rs_tex;
+ rX00_rs_tex_write = r300_rs_tex_write;
+ }
- /* Rasterize at least one color, or bad things happen. */
- if ((col_count == 0) && (tex_count == 0)) {
- rs->ip[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
+ /* Rasterize colors. */
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) {
+ /* Always rasterize if it's written by the VS,
+ * otherwise it locks up. */
+ rX00_rs_col(rs, col_count, i, FALSE);
+
+ /* Write it to the FS input register if it's used by the FS. */
+ if (fs_inputs->color[i] != ATTR_UNUSED) {
+ rX00_rs_col_write(rs, col_count, fp_offset);
+ fp_offset++;
+ }
col_count++;
+ } else {
+ /* Skip the FS input register, leave it uninitialized. */
+ /* If we try to set it to (0,0,0,1), it will lock up. */
+ if (fs_inputs->color[i] != ATTR_UNUSED) {
+ fp_offset++;
+ }
}
+ }
- for (i = 0; i < tex_count; i++) {
- rs->inst[i] |= R500_RS_INST_TEX_ID(i) |
- R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_offset);
- fp_offset++;
+ /* Rasterize texture coordinates. */
+ for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+ if (vs_outputs->generic[i] != ATTR_UNUSED) {
+ /* Always rasterize if it's written by the VS,
+ * otherwise it locks up. */
+ rX00_rs_tex(rs, tex_count, tex_count, FALSE);
+
+ /* Write it to the FS input register if it's used by the FS. */
+ if (fs_inputs->generic[i] != ATTR_UNUSED) {
+ rX00_rs_tex_write(rs, tex_count, fp_offset);
+ fp_offset++;
+ }
+ tex_count++;
+ } else {
+ /* Skip the FS input register, leave it uninitialized. */
+ /* If we try to set it to (0,0,0,1), it will lock up. */
+ if (fs_inputs->generic[i] != ATTR_UNUSED) {
+ fp_offset++;
+ }
}
+ }
- for (i = 0; i < col_count; i++) {
- rs->inst[i] |= R500_RS_INST_COL_ID(i) |
- R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_offset);
+ /* Rasterize fog coordinates. */
+ if (vs_outputs->fog != ATTR_UNUSED) {
+ /* Always rasterize if it's written by the VS,
+ * otherwise it locks up. */
+ rX00_rs_tex(rs, tex_count, tex_count, TRUE);
+
+ /* Write it to the FS input register if it's used by the FS. */
+ if (fs_inputs->fog != ATTR_UNUSED) {
+ rX00_rs_tex_write(rs, tex_count, fp_offset);
fp_offset++;
}
+ tex_count++;
} else {
- for (i = 0; i < info->num_inputs; i++) {
- assert(tab[i] != -1);
- switch (info->input_semantic_name[i]) {
- case TGSI_SEMANTIC_COLOR:
- rs->ip[col_count] |=
- R300_RS_COL_PTR(col_count) |
- R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
- col_count++;
- break;
- case TGSI_SEMANTIC_GENERIC:
- rs->ip[tex_count] |=
- R300_RS_TEX_PTR(rs_tex_comp) |
- R300_RS_SEL_S(R300_RS_SEL_C0) |
- R300_RS_SEL_T(R300_RS_SEL_C1) |
- R300_RS_SEL_R(R300_RS_SEL_C2) |
- R300_RS_SEL_Q(R300_RS_SEL_C3);
- tex_count++;
- rs_tex_comp+=4;
- break;
- default:
- break;
- }
+ /* Skip the FS input register, leave it uninitialized. */
+ /* If we try to set it to (0,0,0,1), it will lock up. */
+ if (fs_inputs->fog != ATTR_UNUSED) {
+ fp_offset++;
}
+ }
- if (col_count == 0) {
- rs->ip[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
- }
+ /* Rasterize WPOS. */
+ /* If the FS doesn't need it, it's not written by the VS. */
+ if (fs_inputs->wpos != ATTR_UNUSED) {
+ rX00_rs_tex(rs, tex_count, tex_count, FALSE);
+ rX00_rs_tex_write(rs, tex_count, fp_offset);
- if (tex_count == 0) {
- rs->ip[0] |=
- R300_RS_SEL_S(R300_RS_SEL_K0) |
- R300_RS_SEL_T(R300_RS_SEL_K0) |
- R300_RS_SEL_R(R300_RS_SEL_K0) |
- R300_RS_SEL_Q(R300_RS_SEL_K1);
- }
+ fp_offset++;
+ tex_count++;
+ }
- /* Rasterize at least one color, or bad things happen. */
- if ((col_count == 0) && (tex_count == 0)) {
- col_count++;
- }
+ /* Rasterize at least one color, or bad things happen. */
+ if (col_count == 0 && tex_count == 0) {
+ rX00_rs_col(rs, 0, 0, TRUE);
+ col_count++;
+ }
- for (i = 0; i < tex_count; i++) {
- rs->inst[i] |= R300_RS_INST_TEX_ID(i) |
- R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_offset);
- fp_offset++;
- }
+ rs->count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) |
+ R300_HIRES_EN;
- for (i = 0; i < col_count; i++) {
- rs->inst[i] |= R300_RS_INST_COL_ID(i) |
- R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_offset);
- fp_offset++;
- }
+ rs->inst_count = MAX3(col_count - 1, tex_count - 1, 0);
+}
+
+/* Update the vertex format. */
+static void r300_update_derived_shader_state(struct r300_context* r300)
+{
+ struct r300_screen* r300screen = r300_screen(r300->context.screen);
+
+ /*
+ struct r300_shader_key* key;
+ struct r300_shader_derived_value* value;
+ key = CALLOC_STRUCT(r300_shader_key);
+ key->vs = r300->vs;
+ key->fs = r300->fs;
+
+ value = (struct r300_shader_derived_value*)
+ util_hash_table_get(r300->shader_hash_table, (void*)key);
+ if (value) {
+ //vformat = value->vformat;
+ rs_block = value->rs_block;
+
+ FREE(key);
+ } else {
+ rs_block = CALLOC_STRUCT(r300_rs_block);
+ value = CALLOC_STRUCT(r300_shader_derived_value);
+
+ r300_update_rs_block(r300, rs_block);
+
+ //value->vformat = vformat;
+ value->rs_block = rs_block;
+ util_hash_table_set(r300->shader_hash_table,
+ (void*)key, (void*)value);
+ } */
+
+ /* Reset structures */
+ memset(r300->rs_block, 0, sizeof(struct r300_rs_block));
+ memset(r300->vertex_info, 0, sizeof(struct r300_vertex_info));
+ memcpy(r300->vertex_info->vinfo.hwfmt, r300->vs->hwfmt, sizeof(uint)*4);
+
+ r300_update_rs_block(r300, &r300->vs->outputs, &r300->fs->inputs);
+
+ if (r300screen->caps->has_tcl) {
+ r300_vertex_psc(r300);
+ } else {
+ r300_draw_emit_all_attribs(r300);
+ draw_compute_vertex_size(&r300->vertex_info->vinfo);
+ r300_swtcl_vertex_psc(r300);
}
- rs->count = (rs_tex_comp) | (col_count << R300_IC_COUNT_SHIFT) |
- R300_HIRES_EN;
+ r300->dirty_state |= R300_NEW_RS_BLOCK;
+}
+
+static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa)
+{
+ /* We are interested only in the cases when a new depth or stencil value
+ * can be written and changed. */
+
+ /* We might optionally check for [Z func: never] and inspect the stencil
+ * state in a similar fashion, but it's not terribly important. */
+ return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) ||
+ (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) ||
+ ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) &&
+ (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK));
+}
+
+static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa)
+{
+ /* We are interested only in the cases when alpha testing can kill
+ * a fragment. */
+ uint32_t af = dsa->alpha_function;
+
+ return (af & R300_FG_ALPHA_FUNC_ENABLE) &&
+ (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS;
+}
- rs->inst_count = MAX2(MAX2(col_count - 1, tex_count - 1), 0);
+static void r300_update_ztop(struct r300_context* r300)
+{
+ r300->ztop_state.z_buffer_top = R300_ZTOP_ENABLE;
+
+ /* This is important enough that I felt it warranted a comment.
+ *
+ * According to the docs, these are the conditions where ZTOP must be
+ * disabled:
+ * 1) Alpha testing enabled
+ * 2) Texture kill instructions in fragment shader
+ * 3) Chroma key culling enabled
+ * 4) W-buffering enabled
+ *
+ * The docs claim that for the first three cases, if no ZS writes happen,
+ * then ZTOP can be used.
+ *
+ * (3) will never apply since we do not support chroma-keyed operations.
+ * (4) will need to be re-examined (and this comment updated) if/when
+ * Hyper-Z becomes supported.
+ *
+ * Additionally, the following conditions require disabled ZTOP:
+ * 5) Depth writes in fragment shader
+ * 6) Outstanding occlusion queries
+ *
+ * ~C.
+ */
+
+ /* ZS writes */
+ if (r300_dsa_writes_depth_stencil(r300->dsa_state) &&
+ (r300_dsa_alpha_test_enabled(r300->dsa_state) || /* (1) */
+ r300->fs->info.uses_kill)) { /* (2) */
+ r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
+ } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */
+ r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
+ } else if (r300->query_current) { /* (6) */
+ r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
+ }
}
void r300_update_derived_state(struct r300_context* r300)
{
if (r300->dirty_state &
- (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER)) {
- r300_update_vertex_format(r300);
+ (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER |
+ R300_NEW_VERTEX_FORMAT)) {
+ r300_update_derived_shader_state(r300);
}
- if (r300->dirty_state & R300_NEW_VERTEX_FORMAT) {
- r300_update_fs_tab(r300);
- r300_update_rs_block(r300);
+ if (r300->dirty_state &
+ (R300_NEW_DSA | R300_NEW_FRAGMENT_SHADER | R300_NEW_QUERY)) {
+ r300_update_ztop(r300);
}
}
diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h
index 71a4a47b003..05ad535e2de 100644
--- a/src/gallium/drivers/r300/r300_state_derived.h
+++ b/src/gallium/drivers/r300/r300_state_derived.h
@@ -25,6 +25,10 @@
struct r300_context;
+unsigned r300_shader_key_hash(void* key);
+
+int r300_shader_key_compare(void* key1, void* key2);
+
void r300_update_derived_state(struct r300_context* r300);
#endif /* R300_STATE_DERIVED_H */
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index 88eb66b79e7..35be00e1b01 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -24,8 +24,12 @@
#ifndef R300_STATE_INLINES_H
#define R300_STATE_INLINES_H
+#include "draw/draw_vertex.h"
+
#include "pipe/p_format.h"
+#include "util/u_format.h"
+
#include "r300_reg.h"
/* Some maths. These should probably find their way to u_math, if needed. */
@@ -51,6 +55,7 @@ static INLINE uint32_t r300_translate_blend_function(int blend_func)
return R300_COMB_FCN_MAX;
default:
debug_printf("r300: Unknown blend function %d\n", blend_func);
+ assert(0);
break;
}
return 0;
@@ -98,6 +103,7 @@ static INLINE uint32_t r300_translate_blend_factor(int blend_fact)
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: */
default:
debug_printf("r300: Unknown blend factor %d\n", blend_fact);
+ assert(0);
break;
}
return 0;
@@ -127,6 +133,7 @@ static INLINE uint32_t r300_translate_depth_stencil_function(int zs_func)
default:
debug_printf("r300: Unknown depth/stencil function %d\n",
zs_func);
+ assert(0);
break;
}
return 0;
@@ -153,6 +160,7 @@ static INLINE uint32_t r300_translate_stencil_op(int s_op)
return R300_ZS_INVERT;
default:
debug_printf("r300: Unknown stencil op %d", s_op);
+ assert(0);
break;
}
return 0;
@@ -179,11 +187,48 @@ static INLINE uint32_t r300_translate_alpha_function(int alpha_func)
return R300_FG_ALPHA_FUNC_ALWAYS;
default:
debug_printf("r300: Unknown alpha function %d", alpha_func);
+ assert(0);
break;
}
return 0;
}
+static INLINE uint32_t
+r300_translate_polygon_mode_front(unsigned mode) {
+ switch (mode)
+ {
+ case PIPE_POLYGON_MODE_FILL:
+ return R300_GA_POLY_MODE_FRONT_PTYPE_TRI;
+ case PIPE_POLYGON_MODE_LINE:
+ return R300_GA_POLY_MODE_FRONT_PTYPE_LINE;
+ case PIPE_POLYGON_MODE_POINT:
+ return R300_GA_POLY_MODE_FRONT_PTYPE_POINT;
+
+ default:
+ debug_printf("r300: Bad polygon mode %i in %s\n", mode,
+ __FUNCTION__);
+ return R300_GA_POLY_MODE_FRONT_PTYPE_TRI;
+ }
+}
+
+static INLINE uint32_t
+r300_translate_polygon_mode_back(unsigned mode) {
+ switch (mode)
+ {
+ case PIPE_POLYGON_MODE_FILL:
+ return R300_GA_POLY_MODE_BACK_PTYPE_TRI;
+ case PIPE_POLYGON_MODE_LINE:
+ return R300_GA_POLY_MODE_BACK_PTYPE_LINE;
+ case PIPE_POLYGON_MODE_POINT:
+ return R300_GA_POLY_MODE_BACK_PTYPE_POINT;
+
+ default:
+ debug_printf("r300: Bad polygon mode %i in %s\n", mode,
+ __FUNCTION__);
+ return R300_GA_POLY_MODE_BACK_PTYPE_TRI;
+ }
+}
+
/* Texture sampler state. */
static INLINE uint32_t r300_translate_wrap(int wrap)
@@ -207,40 +252,42 @@ static INLINE uint32_t r300_translate_wrap(int wrap)
return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED;
default:
debug_printf("r300: Unknown texture wrap %d", wrap);
+ assert(0);
return 0;
}
}
-static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip)
+static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip,
+ int is_anisotropic)
{
uint32_t retval = 0;
- switch (min) {
+ if (is_anisotropic)
+ retval |= R300_TX_MIN_FILTER_ANISO | R300_TX_MAG_FILTER_ANISO;
+ else {
+ switch (min) {
case PIPE_TEX_FILTER_NEAREST:
retval |= R300_TX_MIN_FILTER_NEAREST;
break;
case PIPE_TEX_FILTER_LINEAR:
retval |= R300_TX_MIN_FILTER_LINEAR;
break;
- case PIPE_TEX_FILTER_ANISO:
- retval |= R300_TX_MIN_FILTER_ANISO;
- break;
default:
debug_printf("r300: Unknown texture filter %d\n", min);
+ assert(0);
break;
- }
- switch (mag) {
+ }
+ switch (mag) {
case PIPE_TEX_FILTER_NEAREST:
retval |= R300_TX_MAG_FILTER_NEAREST;
break;
case PIPE_TEX_FILTER_LINEAR:
retval |= R300_TX_MAG_FILTER_LINEAR;
break;
- case PIPE_TEX_FILTER_ANISO:
- retval |= R300_TX_MAG_FILTER_ANISO;
- break;
default:
debug_printf("r300: Unknown texture filter %d\n", mag);
+ assert(0);
break;
+ }
}
switch (mip) {
case PIPE_TEX_MIPFILTER_NONE:
@@ -254,6 +301,7 @@ static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip)
break;
default:
debug_printf("r300: Unknown texture filter %d\n", mip);
+ assert(0);
break;
}
@@ -277,6 +325,8 @@ static INLINE uint32_t r300_anisotropy(float max_aniso)
/* Buffer formats. */
+/* Colorbuffer formats. This is the unswizzled format of the RB3D block's
+ * output. For the swizzling of the targets, check the shader's format. */
static INLINE uint32_t r300_translate_colorformat(enum pipe_format format)
{
switch (format) {
@@ -295,7 +345,6 @@ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_X8R8G8B8_UNORM:
case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_R8G8B8X8_UNORM:
- case PIPE_FORMAT_Z24S8_UNORM:
return R300_COLOR_FORMAT_ARGB8888;
/* XXX Not in pipe_format
case PIPE_FORMAT_A32R32G32B32:
@@ -312,17 +361,21 @@ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format)
debug_printf("r300: Implementation error: "
"Got unsupported color format %s in %s\n",
pf_name(format), __FUNCTION__);
+ assert(0);
break;
}
return 0;
}
+/* Depthbuffer and stencilbuffer. Thankfully, we only support two flavors. */
static INLINE uint32_t r300_translate_zsformat(enum pipe_format format)
{
switch (format) {
/* 16-bit depth, no stencil */
case PIPE_FORMAT_Z16_UNORM:
return R300_DEPTHFORMAT_16BIT_INT_Z;
+ /* 24-bit depth, ignored stencil */
+ case PIPE_FORMAT_Z24X8_UNORM:
/* 24-bit depth, 8-bit stencil */
case PIPE_FORMAT_Z24S8_UNORM:
return R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
@@ -330,18 +383,22 @@ static INLINE uint32_t r300_translate_zsformat(enum pipe_format format)
debug_printf("r300: Implementation error: "
"Got unsupported ZS format %s in %s\n",
pf_name(format), __FUNCTION__);
+ assert(0);
break;
}
return 0;
}
-/* Translate pipe_format into US_OUT_FMT.
+/* Shader output formats. This is essentially the swizzle from the shader
+ * to the RB3D block.
+ *
* Note that formats are stored from C3 to C0. */
static INLINE uint32_t r300_translate_out_fmt(enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_A8R8G8B8_UNORM:
case PIPE_FORMAT_X8R8G8B8_UNORM:
+ /* XXX */
case PIPE_FORMAT_Z24S8_UNORM:
return R300_US_OUT_FMT_C4_8 |
R300_C0_SEL_B | R300_C1_SEL_G |
@@ -355,6 +412,7 @@ static INLINE uint32_t r300_translate_out_fmt(enum pipe_format format)
debug_printf("r300: Implementation error: "
"Got unsupported output format %s in %s\n",
pf_name(format), __FUNCTION__);
+ assert(0);
return R300_US_OUT_FMT_UNUSED;
}
return 0;
@@ -381,32 +439,119 @@ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count)
return 0;
}
-static INLINE uint32_t translate_vertex_data_type(int type) {
- switch (type) {
- case EMIT_1F:
- case EMIT_1F_PSIZE:
- return R300_DATA_TYPE_FLOAT_1;
- break;
- case EMIT_2F:
- return R300_DATA_TYPE_FLOAT_2;
- break;
- case EMIT_3F:
- return R300_DATA_TYPE_FLOAT_3;
- break;
- case EMIT_4F:
- return R300_DATA_TYPE_FLOAT_4;
+/* Utility function to count the number of components in RGBAZS formats.
+ * XXX should go to util or p_format.h */
+static INLINE unsigned pf_component_count(enum pipe_format format) {
+ unsigned count = 0;
+
+ if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0)) {
+ count++;
+ }
+ if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 1)) {
+ count++;
+ }
+ if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 2)) {
+ count++;
+ }
+ if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 3)) {
+ count++;
+ }
+ if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 0)) {
+ count++;
+ }
+ if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 1)) {
+ count++;
+ }
+
+ return count;
+}
+
+/* Translate pipe_formats into PSC vertex types. */
+static INLINE uint16_t
+r300_translate_vertex_data_type(enum pipe_format format) {
+ uint32_t result = 0;
+ const struct util_format_description *desc;
+ unsigned components = pf_component_count(format);
+
+ desc = util_format_description(format);
+
+ if (desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
+ desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) {
+ debug_printf("r300: Bad format %s in %s:%d\n", pf_name(format),
+ __FUNCTION__, __LINE__);
+ assert(0);
+ }
+
+ switch (desc->channel[0].type) {
+ /* Half-floats, floats, doubles */
+ case UTIL_FORMAT_TYPE_FLOAT:
+ switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0)) {
+ case 32:
+ result = R300_DATA_TYPE_FLOAT_1 + (components - 1);
+ break;
+ default:
+ debug_printf("r300: Bad format %s in %s:%d\n",
+ pf_name(format), __FUNCTION__, __LINE__);
+ assert(0);
+ }
break;
- case EMIT_4UB:
- return R300_DATA_TYPE_BYTE;
+ /* Unsigned ints */
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ /* Signed ints */
+ case UTIL_FORMAT_TYPE_SIGNED:
+ switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0)) {
+ case 8:
+ result = R300_DATA_TYPE_BYTE;
+ break;
+ case 16:
+ if (components > 2) {
+ result = R300_DATA_TYPE_SHORT_4;
+ } else {
+ result = R300_DATA_TYPE_SHORT_2;
+ }
+ break;
+ default:
+ debug_printf("r300: Bad format %s in %s:%d\n",
+ pf_name(format), __FUNCTION__, __LINE__);
+ debug_printf("r300: util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) == %d\n",
+ util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0));
+ assert(0);
+ }
break;
default:
- debug_printf("r300: Implementation error: "
- "Bad vertex data type!\n");
+ debug_printf("r300: Bad format %s in %s:%d\n",
+ pf_name(format), __FUNCTION__, __LINE__);
assert(0);
- break;
}
- return 0;
+ if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+ result |= R300_SIGNED;
+ }
+ if (desc->channel[0].normalized) {
+ result |= R300_NORMALIZE;
+ }
+
+ return result;
+}
+
+static INLINE uint16_t
+r300_translate_vertex_data_swizzle(enum pipe_format format) {
+ const struct util_format_description *desc = util_format_description(format);
+
+ assert(format);
+
+ if (desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
+ desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) {
+ debug_printf("r300: Bad format %s in %s:%d\n",
+ pf_name(format), __FUNCTION__, __LINE__);
+ return 0;
+ }
+
+ return ((desc->swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) |
+ (desc->swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) |
+ (desc->swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) |
+ (desc->swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) |
+ (0xf << R300_WRITE_ENA_SHIFT));
}
#endif /* R300_STATE_INLINES_H */
diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
index 3865730d635..f25f3ca217d 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ b/src/gallium/drivers/r300/r300_state_invariant.c
@@ -21,9 +21,12 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#include "r300_context.h"
+#include "r300_cs.h"
+#include "r300_reg.h"
+#include "r300_screen.h"
#include "r300_state_invariant.h"
-
struct pipe_viewport_state r300_viewport_identity = {
.scale = {1.0, 1.0, 1.0, 1.0},
.translate = {0.0, 0.0, 0.0, 0.0},
@@ -40,7 +43,7 @@ void r300_emit_invariant_state(struct r300_context* r300)
struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
CS_LOCALS(r300);
- BEGIN_CS(24 + (caps->has_tcl ? 2: 0));
+ BEGIN_CS(16 + (caps->has_tcl ? 2: 0));
/*** Graphics Backend (GB) ***/
/* Various GB enables */
@@ -63,13 +66,8 @@ void r300_emit_invariant_state(struct r300_context* r300)
OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0);
OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0);
OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0);
- OUT_CS_REG(R300_FG_DEPTH_SRC, 0x0);
- OUT_CS_REG(R300_US_W_FMT, 0x0);
/*** VAP ***/
- /* Max and min vertex index clamp. */
- OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0x0);
- OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, 0xffffff);
/* Sign/normalize control */
OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO);
/* TCL-only stuff */
@@ -81,15 +79,11 @@ void r300_emit_invariant_state(struct r300_context* r300)
END_CS;
/* XXX unsorted stuff from surface_fill */
- BEGIN_CS(64 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0));
- /* Flush PVS. */
- OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
+ BEGIN_CS(44 + (caps->has_tcl ? 7 : 0) + (caps->is_r500 ? 4 : 0));
- OUT_CS_REG(R300_SE_VTE_CNTL, R300_VPORT_X_SCALE_ENA |
- R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
- R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
- R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT);
if (caps->has_tcl) {
+ /*Flushing PVS is required before the VAP_GB registers can be changed*/
+ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4);
OUT_CS_32F(1.0);
OUT_CS_32F(1.0);
@@ -111,7 +105,6 @@ void r300_emit_invariant_state(struct r300_context* r300)
/* XXX this big chunk should be refactored into rs_state */
OUT_CS_REG(R300_GA_SOLID_RG, 0x00000000);
OUT_CS_REG(R300_GA_SOLID_BA, 0x00000000);
- OUT_CS_REG(R300_GA_POLY_MODE, 0x00000000);
OUT_CS_REG(R300_GA_ROUND_MODE, 0x00000001);
OUT_CS_REG(R300_GA_OFFSET, 0x00000000);
OUT_CS_REG(R300_GA_FOG_SCALE, 0x3DBF1412);
@@ -121,22 +114,15 @@ void r300_emit_invariant_state(struct r300_context* r300)
OUT_CS_REG(R300_SU_DEPTH_OFFSET, 0x00000000);
OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C);
OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525);
- OUT_CS_REG(R300_RB3D_CCTL, 0x00000000);
- OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0x0000000F);
OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000);
if (caps->is_r500) {
- OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000);
- OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFFFFFFFF);
+ OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101);
+ OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE);
}
- OUT_CS_REG(R300_ZB_FORMAT, 0x00000002);
- OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, 0x00000003);
OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000);
OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000);
OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000);
OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000);
- OUT_CS_REG(R300_VAP_VTX_STATE_CNTL, 0x1);
- OUT_CS_REG(R300_VAP_VSM_VTX_ASSM, 0x405);
- OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F);
/* XXX */
OUT_CS_REG(R300_SC_CLIP_RULE, 0xaaaa);
diff --git a/src/gallium/drivers/r300/r300_state_invariant.h b/src/gallium/drivers/r300/r300_state_invariant.h
index 5bea6779fe5..05cff0d6dfe 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.h
+++ b/src/gallium/drivers/r300/r300_state_invariant.h
@@ -23,11 +23,7 @@
#ifndef R300_STATE_INVARIANT_H
#define R300_STATE_INVARIANT_H
-#include "r300_chipset.h"
-#include "r300_context.h"
-#include "r300_cs.h"
-#include "r300_reg.h"
-#include "r300_state_inlines.h"
+struct r300_context;
void r300_emit_invariant_state(struct r300_context* r300);
diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c
deleted file mode 100644
index cc6288cb519..00000000000
--- a/src/gallium/drivers/r300/r300_surface.c
+++ /dev/null
@@ -1,372 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- * Joakim Sindholt <opensource@zhasha.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#include "r300_surface.h"
-
-static void r300_surface_setup(struct r300_context* r300,
- struct r300_texture* dest,
- unsigned x, unsigned y,
- unsigned w, unsigned h)
-{
- struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
- unsigned pixpitch = r300_texture_get_stride(dest, 0) / dest->tex.block.size;
- CS_LOCALS(r300);
-
- r300_emit_blend_state(r300, &blend_clear_state);
- r300_emit_blend_color_state(r300, &blend_color_clear_state);
- r300_emit_dsa_state(r300, &dsa_clear_state);
- r300_emit_rs_state(r300, &rs_clear_state);
-
- BEGIN_CS(26);
-
- /* Viewport setup */
- OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6);
- OUT_CS_32F((float)w);
- OUT_CS_32F((float)x);
- OUT_CS_32F((float)h);
- OUT_CS_32F((float)y);
- OUT_CS_32F(1.0);
- OUT_CS_32F(0.0);
-
- OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VPORT_X_SCALE_ENA |
- R300_VPORT_X_OFFSET_ENA |
- R300_VPORT_Y_SCALE_ENA |
- R300_VPORT_Y_OFFSET_ENA |
- R300_VTX_XY_FMT | R300_VTX_Z_FMT);
-
- /* Pixel scissors. */
- OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
- if (caps->is_r500) {
- OUT_CS((x << R300_SCISSORS_X_SHIFT) | (y << R300_SCISSORS_Y_SHIFT));
- OUT_CS(((w - 1) << R300_SCISSORS_X_SHIFT) | ((h - 1) << R300_SCISSORS_Y_SHIFT));
- } else {
- /* Non-R500 chipsets have an offset of 1440 in their scissors. */
- OUT_CS(((x + 1440) << R300_SCISSORS_X_SHIFT) |
- ((y + 1440) << R300_SCISSORS_Y_SHIFT));
- OUT_CS((((w - 1) + 1440) << R300_SCISSORS_X_SHIFT) |
- (((h - 1) + 1440) << R300_SCISSORS_Y_SHIFT));
- }
-
- /* Flush colorbuffer and blend caches. */
- OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
- R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D |
- R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL);
- OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT,
- R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
- R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
-
- /* Setup colorbuffer. */
- OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1);
- OUT_CS_RELOC(dest->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
- OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0, 1);
- OUT_CS_RELOC(dest->buffer, pixpitch |
- r300_translate_colorformat(dest->tex.format), 0,
- RADEON_GEM_DOMAIN_VRAM, 0);
- OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0xf);
-
- END_CS;
-}
-
-/* Provides pipe_context's "surface_fill". Commonly used for clearing
- * buffers. */
-static void r300_surface_fill(struct pipe_context* pipe,
- struct pipe_surface* dest,
- unsigned x, unsigned y,
- unsigned w, unsigned h,
- unsigned color)
-{
- int i;
- float r, g, b, a, depth;
- struct r300_context* r300 = r300_context(pipe);
- struct r300_capabilities* caps = r300_screen(pipe->screen)->caps;
- struct r300_texture* tex = (struct r300_texture*)dest->texture;
- unsigned pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size;
- boolean invalid = FALSE;
- CS_LOCALS(r300);
-
- a = (float)((color >> 24) & 0xff) / 255.0f;
- r = (float)((color >> 16) & 0xff) / 255.0f;
- g = (float)((color >> 8) & 0xff) / 255.0f;
- b = (float)((color >> 0) & 0xff) / 255.0f;
- debug_printf("r300: Filling surface %p at (%d,%d),"
- " dimensions %dx%d (pixel pitch %d), color 0x%x\n",
- dest, x, y, w, h, pixpitch, color);
-
- /* Fallback? */
- if (FALSE) {
-fallback:
- debug_printf("r300: Falling back on surface clear...");
- util_surface_fill(pipe, dest, x, y, w, h, color);
- return;
- }
-
- /* Make sure our target BO is okay. */
-validate:
- if (!r300->winsys->add_buffer(r300->winsys, tex->buffer,
- 0, RADEON_GEM_DOMAIN_VRAM)) {
- r300->context.flush(&r300->context, 0, NULL);
- goto validate;
- }
- if (!r300->winsys->validate(r300->winsys)) {
- r300->context.flush(&r300->context, 0, NULL);
- if (invalid) {
- debug_printf("r300: Stuck in validation loop, gonna fallback.");
- goto fallback;
- }
- invalid = TRUE;
- goto validate;
- }
-
- r300_surface_setup(r300, tex, x, y, w, h);
-
- /* Vertex shader setup */
- if (caps->has_tcl) {
- r300_emit_vertex_program_code(r300, &r300_passthrough_vertex_shader, 0);
- } else {
- BEGIN_CS(4);
- OUT_CS_REG(R300_VAP_CNTL_STATUS,
-#ifdef PIPE_ARCH_BIG_ENDIAN
- R300_VC_32BIT_SWAP |
-#endif
- R300_VAP_TCL_BYPASS);
- OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(5) |
- R300_PVS_NUM_CNTLRS(5) |
- R300_PVS_NUM_FPUS(caps->num_vert_fpus) |
- R300_PVS_VF_MAX_VTX_NUM(12));
- END_CS;
- }
-
- /* Fragment shader setup */
- if (caps->is_r500) {
- r500_emit_fragment_program_code(r300, &r5xx_passthrough_fragment_shader, 0);
- r300_emit_rs_block_state(r300, &r5xx_rs_block_clear_state);
- } else {
- r300_emit_fragment_program_code(r300, &r3xx_passthrough_fragment_shader, 0);
- r300_emit_rs_block_state(r300, &r3xx_rs_block_clear_state);
- }
-
- BEGIN_CS(26);
-
- /* VAP stream control, mapping from input memory to PVS/RS memory */
- if (caps->has_tcl) {
- OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0,
- (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) |
- ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) |
- R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT));
- } else {
- OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0,
- (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) |
- ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) |
- R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT));
- }
- OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0,
- (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE0_SHIFT) |
- (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE1_SHIFT));
-
- /* VAP format controls */
- OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_0,
- R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
- R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
- OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x0);
-
- /* Disable textures */
- OUT_CS_REG(R300_TX_ENABLE, 0x0);
-
- /* The size of the point we're about to draw, in sixths of pixels */
- OUT_CS_REG(R300_GA_POINT_SIZE,
- ((h * 6) & R300_POINTSIZE_Y_MASK) |
- ((w * 6) << R300_POINTSIZE_X_SHIFT));
-
- /* Vertex size. */
- OUT_CS_REG(R300_VAP_VTX_SIZE, 0x8);
-
- /* Packet3 with our point vertex */
- OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 8);
- OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
- (1 << R300_PRIM_NUM_VERTICES_SHIFT));
- /* Position */
- OUT_CS_32F(0.5);
- OUT_CS_32F(0.5);
- OUT_CS_32F(1.0);
- OUT_CS_32F(1.0);
- /* Color */
- OUT_CS_32F(r);
- OUT_CS_32F(g);
- OUT_CS_32F(b);
- OUT_CS_32F(a);
-
- OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA);
-
- END_CS;
-
- r300->dirty_hw++;
-}
-
-static void r300_surface_copy(struct pipe_context* pipe,
- struct pipe_surface* dest,
- unsigned destx, unsigned desty,
- struct pipe_surface* src,
- unsigned srcx, unsigned srcy,
- unsigned w, unsigned h)
-{
- struct r300_context* r300 = r300_context(pipe);
- struct r300_capabilities* caps = r300_screen(pipe->screen)->caps;
- struct r300_texture* srctex = (struct r300_texture*)src->texture;
- struct r300_texture* desttex = (struct r300_texture*)dest->texture;
- unsigned pixpitch = r300_texture_get_stride(srctex, 0) / srctex->tex.block.size;
- boolean invalid = FALSE;
- float fsrcx = srcx, fsrcy = srcy, fdestx = destx, fdesty = desty;
- CS_LOCALS(r300);
-
- debug_printf("r300: Copying surface %p at (%d,%d) to %p at (%d, %d),"
- " dimensions %dx%d (pixel pitch %d)\n",
- src, srcx, srcy, dest, destx, desty, w, h, pixpitch);
-
- if ((srctex->buffer == desttex->buffer) &&
- ((destx < srcx + w) || (srcx < destx + w)) &&
- ((desty < srcy + h) || (srcy < desty + h))) {
-fallback:
- debug_printf("r300: Falling back on surface_copy\n");
- util_surface_copy(pipe, FALSE, dest, destx, desty, src,
- srcx, srcy, w, h);
- }
-
- /* Add our target BOs to the list. */
-validate:
- if (!r300->winsys->add_buffer(r300->winsys, srctex->buffer,
- RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) {
- r300->context.flush(&r300->context, 0, NULL);
- goto validate;
- }
- if (!r300->winsys->add_buffer(r300->winsys, desttex->buffer,
- 0, RADEON_GEM_DOMAIN_VRAM)) {
- r300->context.flush(&r300->context, 0, NULL);
- goto validate;
- }
- if (!r300->winsys->validate(r300->winsys)) {
- r300->context.flush(&r300->context, 0, NULL);
- if (invalid) {
- debug_printf("r300: Stuck in validation loop, gonna fallback.");
- goto fallback;
- }
- invalid = TRUE;
- goto validate;
- }
-
- r300_surface_setup(r300, desttex, destx, desty, w, h);
-
- /* Setup the texture. */
- r300_emit_texture(r300, &r300_sampler_copy_state, srctex, 0);
-
- /* Flush and enable. */
- r300_flush_textures(r300);
-
- /* Vertex shader setup */
- if (caps->has_tcl) {
- r300_emit_vertex_program_code(r300, &r300_passthrough_vertex_shader, 0);
- } else {
- BEGIN_CS(4);
- OUT_CS_REG(R300_VAP_CNTL_STATUS,
-#ifdef PIPE_ARCH_BIG_ENDIAN
- R300_VC_32BIT_SWAP |
-#endif
- R300_VAP_TCL_BYPASS);
- OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(5) |
- R300_PVS_NUM_CNTLRS(5) |
- R300_PVS_NUM_FPUS(caps->num_vert_fpus) |
- R300_PVS_VF_MAX_VTX_NUM(12));
- END_CS;
- }
-
- /* Fragment shader setup */
- if (caps->is_r500) {
- r500_emit_fragment_program_code(r300, &r5xx_texture_fragment_shader, 0);
- r300_emit_rs_block_state(r300, &r5xx_rs_block_copy_state);
- } else {
- r300_emit_fragment_program_code(r300, &r3xx_texture_fragment_shader, 0);
- r300_emit_rs_block_state(r300, &r3xx_rs_block_copy_state);
- }
-
- BEGIN_CS(30);
- /* VAP stream control, mapping from input memory to PVS/RS memory */
- if (caps->has_tcl) {
- OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0,
- (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
- ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) |
- R300_DATA_TYPE_FLOAT_2) << R300_DATA_TYPE_1_SHIFT));
- } else {
- OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0,
- (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
- ((R300_LAST_VEC | (6 << R300_DST_VEC_LOC_SHIFT) |
- R300_DATA_TYPE_FLOAT_2) << R300_DATA_TYPE_1_SHIFT));
- }
- OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0,
- (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE0_SHIFT) |
- (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE1_SHIFT));
-
- /* VAP format controls */
- OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_0,
- R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT);
- /* Two components of texture 0 */
- OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x2);
-
- /* Vertex size. */
- OUT_CS_REG(R300_VAP_VTX_SIZE, 0x4);
-
- /* Packet3 with our texcoords */
- OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 16);
- OUT_CS(R300_PRIM_TYPE_QUADS | R300_PRIM_WALK_RING |
- (4 << R300_PRIM_NUM_VERTICES_SHIFT));
- /* (x , y ) */
- OUT_CS_32F(fdestx / dest->width);
- OUT_CS_32F(fdesty / dest->height);
- OUT_CS_32F(fsrcx / src->width);
- OUT_CS_32F(fsrcy / src->height);
- /* (x , y + h) */
- OUT_CS_32F(fdestx / dest->width);
- OUT_CS_32F((fdesty + h) / dest->height);
- OUT_CS_32F(fsrcx / src->width);
- OUT_CS_32F((fsrcy + h) / src->height);
- /* (x + w, y + h) */
- OUT_CS_32F((fdestx + w) / dest->width);
- OUT_CS_32F((fdesty + h) / dest->height);
- OUT_CS_32F((fsrcx + w) / src->width);
- OUT_CS_32F((fsrcy + h) / src->height);
- /* (x + w, y ) */
- OUT_CS_32F((fdestx + w) / dest->width);
- OUT_CS_32F(fdesty / dest->height);
- OUT_CS_32F((fsrcx + w) / src->width);
- OUT_CS_32F(fsrcy / src->height);
-
- OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA);
-
- END_CS;
-
- r300->dirty_hw++;
-}
-
-void r300_init_surface_functions(struct r300_context* r300)
-{
- r300->context.surface_fill = r300_surface_fill;
- r300->context.surface_copy = r300_surface_copy;
-}
diff --git a/src/gallium/drivers/r300/r300_surface.h b/src/gallium/drivers/r300/r300_surface.h
deleted file mode 100644
index f9e98b2ec9c..00000000000
--- a/src/gallium/drivers/r300/r300_surface.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_SURFACE_H
-#define R300_SURFACE_H
-
-#include "pipe/p_context.h"
-#include "pipe/p_screen.h"
-
-#include "util/u_rect.h"
-
-#include "r300_context.h"
-#include "r300_cs.h"
-#include "r300_emit.h"
-#include "r300_fs.h"
-#include "r300_vs.h"
-#include "r300_state_inlines.h"
-
-static struct r300_blend_state blend_clear_state = {
- .blend_control = 0x0,
- .alpha_blend_control = 0x0,
- .rop = 0x0,
- .dither = 0x0,
-};
-
-static struct r300_blend_color_state blend_color_clear_state = {
- .blend_color = 0x0,
- .blend_color_red_alpha = 0x0,
- .blend_color_green_blue = 0x0,
-};
-
-static struct r300_dsa_state dsa_clear_state = {
- .alpha_function = 0x0,
- .alpha_reference = 0x0,
- .z_buffer_control = 0x0,
- .z_stencil_control = 0x0,
- .stencil_ref_mask = R300_STENCILWRITEMASK_MASK,
- .z_buffer_top = R300_ZTOP_ENABLE,
- .stencil_ref_bf = 0x0,
-};
-
-static struct r300_rs_state rs_clear_state = {
- .point_minmax = 0x36000006,
- .line_control = 0x00030006,
- .depth_scale_front = 0x0,
- .depth_offset_front = 0x0,
- .depth_scale_back = 0x0,
- .depth_offset_back = 0x0,
- .polygon_offset_enable = 0x0,
- .cull_mode = 0x0,
- .line_stipple_config = 0x3BAAAAAB,
- .line_stipple_value = 0x0,
- .color_control = R300_SHADE_MODEL_FLAT,
-};
-
-static struct r300_rs_block r3xx_rs_block_clear_state = {
- .ip[0] = R500_RS_SEL_S(R300_RS_SEL_C0) |
- R500_RS_SEL_T(R300_RS_SEL_C0) |
- R500_RS_SEL_R(R300_RS_SEL_C0) |
- R500_RS_SEL_Q(R300_RS_SEL_K1),
- .inst[0] = R300_RS_INST_COL_CN_WRITE,
- .count = R300_IT_COUNT(0) | R300_IC_COUNT(1) | R300_HIRES_EN,
- .inst_count = 0,
-};
-
-static struct r300_rs_block r5xx_rs_block_clear_state = {
- .ip[0] = R500_RS_SEL_S(R500_RS_IP_PTR_K0) |
- R500_RS_SEL_T(R500_RS_IP_PTR_K0) |
- R500_RS_SEL_R(R500_RS_IP_PTR_K0) |
- R500_RS_SEL_Q(R500_RS_IP_PTR_K1),
- .inst[0] = R500_RS_INST_COL_CN_WRITE,
- .count = R300_IT_COUNT(0) | R300_IC_COUNT(1) | R300_HIRES_EN,
- .inst_count = 0,
-};
-
-/* The following state is used for surface_copy only. */
-
-static struct r300_rs_block r3xx_rs_block_copy_state = {
- .ip[0] = R500_RS_SEL_S(R300_RS_SEL_K0) |
- R500_RS_SEL_T(R300_RS_SEL_K0) |
- R500_RS_SEL_R(R300_RS_SEL_K0) |
- R500_RS_SEL_Q(R300_RS_SEL_K1),
- .inst[0] = R300_RS_INST_COL_CN_WRITE,
- .count = R300_IT_COUNT(2) | R300_IC_COUNT(0) | R300_HIRES_EN,
- .inst_count = R300_RS_TX_OFFSET(0),
-};
-
-static struct r300_rs_block r5xx_rs_block_copy_state = {
- .ip[0] = R500_RS_SEL_S(0) |
- R500_RS_SEL_T(1) |
- R500_RS_SEL_R(R500_RS_IP_PTR_K0) |
- R500_RS_SEL_Q(R500_RS_IP_PTR_K1),
- .inst[0] = R500_RS_INST_TEX_CN_WRITE,
- .count = R300_IT_COUNT(2) | R300_IC_COUNT(0) | R300_HIRES_EN,
- .inst_count = R300_RS_TX_OFFSET(0),
-};
-
-static struct r300_sampler_state r300_sampler_copy_state = {
- .filter0 = R300_TX_WRAP_S(R300_TX_CLAMP) |
- R300_TX_WRAP_T(R300_TX_CLAMP) |
- R300_TX_MAG_FILTER_NEAREST |
- R300_TX_MIN_FILTER_NEAREST,
-};
-
-#endif /* R300_SURFACE_H */
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index ce60ded7caf..9a96206a4dc 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -20,41 +20,75 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#include "pipe/p_screen.h"
+
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "r300_context.h"
#include "r300_texture.h"
+#include "r300_screen.h"
-static void r300_setup_texture_state(struct r300_texture* tex)
+static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500)
{
struct r300_texture_state* state = &tex->state;
struct pipe_texture *pt = &tex->tex;
- state->format0 = R300_TX_WIDTH((pt->width[0] - 1) & 0x7ff) |
- R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff) |
- R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) |
- R300_TX_NUM_LEVELS(pt->last_level) |
- R300_TX_PITCH_EN;
+ state->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) |
+ R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff);
+
+ if (tex->is_npot) {
+ /* rectangles love this */
+ state->format0 |= R300_TX_PITCH_EN;
+ state->format2 = (tex->pitch[0] - 1) & 0x1fff;
+ } else {
+ /* power of two textures (3D, mipmaps, and no pitch) */
+ state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf);
+ }
- /* XXX */
state->format1 = r300_translate_texformat(pt->format);
if (pt->target == PIPE_TEXTURE_CUBE) {
- state->format1 |= R300_TX_FORMAT_CUBIC_MAP;
+ state->format1 |= R300_TX_FORMAT_CUBIC_MAP;
}
if (pt->target == PIPE_TEXTURE_3D) {
- state->format1 |= R300_TX_FORMAT_3D;
+ state->format1 |= R300_TX_FORMAT_3D;
}
- state->format2 = (r300_texture_get_stride(tex, 0) / pt->block.size) - 1;
-
- /* Assume (somewhat foolishly) that oversized textures will
- * not be permitted by the state tracker. */
- if (pt->width[0] > 2048) {
- state->format2 |= R500_TXWIDTH_BIT11;
- }
- if (pt->height[0] > 2048) {
- state->format2 |= R500_TXHEIGHT_BIT11;
+ /* large textures on r500 */
+ if (is_r500)
+ {
+ if (pt->width0 > 2048) {
+ state->format2 |= R500_TXWIDTH_BIT11;
+ }
+ if (pt->height0 > 2048) {
+ state->format2 |= R500_TXHEIGHT_BIT11;
+ }
}
+ assert(is_r500 || (pt->width0 <= 2048 && pt->height0 <= 2048));
debug_printf("r300: Set texture state (%dx%d, %d levels)\n",
- pt->width[0], pt->height[0], pt->last_level);
+ pt->width0, pt->height0, pt->last_level);
+}
+
+unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level,
+ unsigned zslice, unsigned face)
+{
+ unsigned offset = tex->offset[level];
+
+ switch (tex->tex.target) {
+ case PIPE_TEXTURE_3D:
+ assert(face == 0);
+ return offset + zslice * tex->layer_size[level];
+
+ case PIPE_TEXTURE_CUBE:
+ assert(zslice == 0);
+ return offset + face * tex->layer_size[level];
+
+ default:
+ assert(zslice == 0 && face == 0);
+ return offset;
+ }
}
/**
@@ -67,47 +101,49 @@ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level)
return tex->stride_override;
if (level > tex->tex.last_level) {
- debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__, level, tex->tex.last_level);
+ debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__,
+ level, tex->tex.last_level);
return 0;
}
- return align(pf_get_stride(&tex->tex.block, tex->tex.width[level]), 32);
+ return align(util_format_get_stride(tex->tex.format, u_minify(tex->tex.width0, level)), 32);
}
static void r300_setup_miptree(struct r300_texture* tex)
{
struct pipe_texture* base = &tex->tex;
- int stride, size;
+ int stride, size, layer_size;
int i;
for (i = 0; i <= base->last_level; i++) {
- if (i > 0) {
- base->width[i] = minify(base->width[i-1]);
- base->height[i] = minify(base->height[i-1]);
- base->depth[i] = minify(base->depth[i-1]);
- }
+ unsigned nblocksy = util_format_get_nblocksy(base->format, u_minify(base->height0, i));
- base->nblocksx[i] = pf_get_nblocksx(&base->block, base->width[i]);
- base->nblocksy[i] = pf_get_nblocksy(&base->block, base->height[i]);
-
- /* Radeons enjoy things in multiples of 64.
- *
- * XXX
- * POT, uncompressed, unmippmapped textures can be aligned to 32,
- * instead of 64. */
stride = r300_texture_get_stride(tex, i);
- size = stride * base->nblocksy[i] * base->depth[i];
+ layer_size = stride * nblocksy;
+
+ if (base->target == PIPE_TEXTURE_CUBE)
+ size = layer_size * 6;
+ else
+ size = layer_size * u_minify(base->depth0, i);
tex->offset[i] = align(tex->size, 32);
tex->size = tex->offset[i] + size;
+ tex->layer_size[i] = layer_size;
+ tex->pitch[i] = stride / util_format_get_blocksize(base->format);
debug_printf("r300: Texture miptree: Level %d "
"(%dx%dx%d px, pitch %d bytes)\n",
- i, base->width[i], base->height[i], base->depth[i],
- stride);
+ i, u_minify(base->width0, i), u_minify(base->height0, i),
+ u_minify(base->depth0, i), stride);
}
}
+static void r300_setup_flags(struct r300_texture* tex)
+{
+ tex->is_npot = !util_is_power_of_two(tex->tex.width0) ||
+ !util_is_power_of_two(tex->tex.height0);
+}
+
/* Create a new texture. */
static struct pipe_texture*
r300_texture_create(struct pipe_screen* screen,
@@ -123,9 +159,9 @@ static struct pipe_texture*
pipe_reference_init(&tex->tex.reference, 1);
tex->tex.screen = screen;
+ r300_setup_flags(tex);
r300_setup_miptree(tex);
-
- r300_setup_texture_state(tex);
+ r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500);
tex->buffer = screen->buffer_create(screen, 1024,
PIPE_BUFFER_USAGE_PIXEL,
@@ -159,17 +195,20 @@ static struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
struct pipe_surface* surface = CALLOC_STRUCT(pipe_surface);
unsigned offset;
- /* XXX this is certainly dependent on tex target */
- offset = tex->offset[level];
+ offset = r300_texture_get_offset(tex, level, zslice, face);
if (surface) {
pipe_reference_init(&surface->reference, 1);
pipe_texture_reference(&surface->texture, texture);
surface->format = texture->format;
- surface->width = texture->width[level];
- surface->height = texture->height[level];
+ surface->width = u_minify(texture->width0, level);
+ surface->height = u_minify(texture->height0, level);
surface->offset = offset;
surface->usage = flags;
+ surface->zslice = zslice;
+ surface->texture = texture;
+ surface->face = face;
+ surface->level = level;
}
return surface;
@@ -189,10 +228,10 @@ static struct pipe_texture*
{
struct r300_texture* tex;
- /* XXX we should start doing mips now... */
+ /* Support only 2D textures without mipmaps */
if (base->target != PIPE_TEXTURE_2D ||
- base->last_level != 0 ||
- base->depth[0] != 1) {
+ base->depth0 != 1 ||
+ base->last_level != 0) {
return NULL;
}
@@ -206,15 +245,64 @@ static struct pipe_texture*
tex->tex.screen = screen;
tex->stride_override = *stride;
+ tex->pitch[0] = *stride / util_format_get_blocksize(base->format);
- /* XXX */
- r300_setup_texture_state(tex);
+ r300_setup_flags(tex);
+ r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500);
pipe_buffer_reference(&tex->buffer, buffer);
return (struct pipe_texture*)tex;
}
+static struct pipe_video_surface *
+r300_video_surface_create(struct pipe_screen *screen,
+ enum pipe_video_chroma_format chroma_format,
+ unsigned width, unsigned height)
+{
+ struct r300_video_surface *r300_vsfc;
+ struct pipe_texture template;
+
+ assert(screen);
+ assert(width && height);
+
+ r300_vsfc = CALLOC_STRUCT(r300_video_surface);
+ if (!r300_vsfc)
+ return NULL;
+
+ pipe_reference_init(&r300_vsfc->base.reference, 1);
+ r300_vsfc->base.screen = screen;
+ r300_vsfc->base.chroma_format = chroma_format;
+ r300_vsfc->base.width = width;
+ r300_vsfc->base.height = height;
+
+ memset(&template, 0, sizeof(struct pipe_texture));
+ template.target = PIPE_TEXTURE_2D;
+ template.format = PIPE_FORMAT_X8R8G8B8_UNORM;
+ template.last_level = 0;
+ template.width0 = util_next_power_of_two(width);
+ template.height0 = util_next_power_of_two(height);
+ template.depth0 = 1;
+ template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER |
+ PIPE_TEXTURE_USAGE_RENDER_TARGET;
+
+ r300_vsfc->tex = screen->texture_create(screen, &template);
+ if (!r300_vsfc->tex)
+ {
+ FREE(r300_vsfc);
+ return NULL;
+ }
+
+ return &r300_vsfc->base;
+}
+
+static void r300_video_surface_destroy(struct pipe_video_surface *vsfc)
+{
+ struct r300_video_surface *r300_vsfc = r300_video_surface(vsfc);
+ pipe_texture_reference(&r300_vsfc->tex, NULL);
+ FREE(r300_vsfc);
+}
+
void r300_init_screen_texture_functions(struct pipe_screen* screen)
{
screen->texture_create = r300_texture_create;
@@ -222,6 +310,9 @@ void r300_init_screen_texture_functions(struct pipe_screen* screen)
screen->get_tex_surface = r300_get_tex_surface;
screen->tex_surface_destroy = r300_tex_surface_destroy;
screen->texture_blanket = r300_texture_blanket;
+
+ screen->video_surface_create = r300_video_surface_create;
+ screen->video_surface_destroy= r300_video_surface_destroy;
}
boolean r300_get_texture_buffer(struct pipe_texture* texture,
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index bd87790bc34..55ceb1a5136 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -23,11 +23,8 @@
#ifndef R300_TEXTURE_H
#define R300_TEXTURE_H
-#include "pipe/p_screen.h"
+#include "pipe/p_video_state.h"
-#include "util/u_math.h"
-
-#include "r300_context.h"
#include "r300_reg.h"
struct r300_texture;
@@ -36,6 +33,9 @@ void r300_init_screen_texture_functions(struct pipe_screen* screen);
unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level);
+unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level,
+ unsigned zslice, unsigned face);
+
/* Note the signature of R300_EASY_TX_FORMAT(A, R, G, B, FORMAT)... */
static INLINE uint32_t r300_translate_texformat(enum pipe_format format)
{
@@ -43,6 +43,19 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format)
/* X8 */
case PIPE_FORMAT_I8_UNORM:
return R300_EASY_TX_FORMAT(X, X, X, X, X8);
+ case PIPE_FORMAT_L8_UNORM:
+ return R300_EASY_TX_FORMAT(X, X, X, ONE, X8);
+ /* X16 */
+ case PIPE_FORMAT_R16_UNORM:
+ return R300_EASY_TX_FORMAT(X, X, X, X, X16);
+ case PIPE_FORMAT_R16_SNORM:
+ return R300_EASY_TX_FORMAT(X, X, X, X, X16) |
+ R300_TX_FORMAT_SIGNED;
+ case PIPE_FORMAT_Z16_UNORM:
+ return R300_EASY_TX_FORMAT(X, X, X, X, X16);
+ /* Y8X8 */
+ case PIPE_FORMAT_A8L8_UNORM:
+ return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8);
/* W8Z8Y8X8 */
case PIPE_FORMAT_A8R8G8B8_UNORM:
return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
@@ -75,12 +88,9 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format)
R300_TX_FORMAT_YUV_TO_RGB;
/* W24_FP */
case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
return R300_EASY_TX_FORMAT(X, X, X, X, W24_FP);
- /* Z5_Y6_X5 */
- case PIPE_FORMAT_R16_SNORM:
- return R300_EASY_TX_FORMAT(X, X, X, X, Z5Y6X5);
- case PIPE_FORMAT_Z16_UNORM:
- return R300_EASY_TX_FORMAT(X, X, X, X, X16);
+
default:
debug_printf("r300: Implementation error: "
"Got unsupported texture format %s in %s\n",
@@ -91,6 +101,18 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format)
return 0;
}
+struct r300_video_surface
+{
+ struct pipe_video_surface base;
+ struct pipe_texture *tex;
+};
+
+static INLINE struct r300_video_surface *
+r300_video_surface(struct pipe_video_surface *pvs)
+{
+ return (struct r300_video_surface *)pvs;
+}
+
#ifndef R300_WINSYS_H
boolean r300_get_texture_buffer(struct pipe_texture* texture,
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 74d4fb50876..a792c2cf989 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -120,7 +120,7 @@ static unsigned translate_opcode(unsigned opcode)
/* case TGSI_OPCODE_NOT: return RC_OPCODE_NOT; */
/* case TGSI_OPCODE_TRUNC: return RC_OPCODE_TRUNC; */
/* case TGSI_OPCODE_SHL: return RC_OPCODE_SHL; */
- /* case TGSI_OPCODE_SHR: return RC_OPCODE_SHR; */
+ /* case TGSI_OPCODE_ISHR: return RC_OPCODE_SHR; */
/* case TGSI_OPCODE_AND: return RC_OPCODE_AND; */
/* case TGSI_OPCODE_OR: return RC_OPCODE_OR; */
/* case TGSI_OPCODE_MOD: return RC_OPCODE_MOD; */
@@ -135,10 +135,6 @@ static unsigned translate_opcode(unsigned opcode)
/* case TGSI_OPCODE_BGNSUB: return RC_OPCODE_BGNSUB; */
/* case TGSI_OPCODE_ENDLOOP2: return RC_OPCODE_ENDLOOP2; */
/* case TGSI_OPCODE_ENDSUB: return RC_OPCODE_ENDSUB; */
- /* case TGSI_OPCODE_NOISE1: return RC_OPCODE_NOISE1; */
- /* case TGSI_OPCODE_NOISE2: return RC_OPCODE_NOISE2; */
- /* case TGSI_OPCODE_NOISE3: return RC_OPCODE_NOISE3; */
- /* case TGSI_OPCODE_NOISE4: return RC_OPCODE_NOISE4; */
case TGSI_OPCODE_NOP: return RC_OPCODE_NOP;
/* gap */
/* case TGSI_OPCODE_NRM4: return RC_OPCODE_NRM4; */
@@ -146,7 +142,6 @@ static unsigned translate_opcode(unsigned opcode)
/* case TGSI_OPCODE_IFC: return RC_OPCODE_IFC; */
/* case TGSI_OPCODE_BREAKC: return RC_OPCODE_BREAKC; */
case TGSI_OPCODE_KIL: return RC_OPCODE_KIL;
- case TGSI_OPCODE_SWZ: return RC_OPCODE_SWZ;
}
fprintf(stderr, "Unknown opcode: %i\n", opcode);
@@ -195,10 +190,10 @@ static void transform_dstreg(
struct rc_dst_register * dst,
struct tgsi_full_dst_register * src)
{
- dst->File = translate_register_file(src->DstRegister.File);
- dst->Index = translate_register_index(ttr, src->DstRegister.File, src->DstRegister.Index);
- dst->WriteMask = src->DstRegister.WriteMask;
- dst->RelAddr = src->DstRegister.Indirect;
+ dst->File = translate_register_file(src->Register.File);
+ dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index);
+ dst->WriteMask = src->Register.WriteMask;
+ dst->RelAddr = src->Register.Indirect;
}
static void transform_srcreg(
@@ -206,23 +201,19 @@ static void transform_srcreg(
struct rc_src_register * dst,
struct tgsi_full_src_register * src)
{
- dst->File = translate_register_file(src->SrcRegister.File);
- dst->Index = translate_register_index(ttr, src->SrcRegister.File, src->SrcRegister.Index);
- dst->RelAddr = src->SrcRegister.Indirect;
- dst->Swizzle = tgsi_util_get_full_src_register_extswizzle(src, 0);
- dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 1) << 3;
- dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 2) << 6;
- dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 3) << 9;
- dst->Abs = src->SrcRegisterExtMod.Absolute;
- dst->Negate =
- src->SrcRegisterExtSwz.NegateX |
- (src->SrcRegisterExtSwz.NegateY << 1) |
- (src->SrcRegisterExtSwz.NegateZ << 2) |
- (src->SrcRegisterExtSwz.NegateW << 3);
- dst->Negate ^= src->SrcRegister.Negate ? RC_MASK_XYZW : 0;
+ dst->File = translate_register_file(src->Register.File);
+ dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index);
+ dst->RelAddr = src->Register.Indirect;
+ dst->Swizzle = tgsi_util_get_full_src_register_swizzle(src, 0);
+ dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 1) << 3;
+ dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 2) << 6;
+ dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9;
+ dst->Abs = src->Register.Absolute;
+ dst->Negate = src->Register.Negate ? RC_MASK_XYZW : 0;
}
-static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_ext_texture src)
+static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_texture src,
+ uint32_t *shadowSamplers)
{
switch(src.Texture) {
case TGSI_TEXTURE_1D:
@@ -243,14 +234,17 @@ static void transform_texture(struct rc_instruction * dst, struct tgsi_instructi
case TGSI_TEXTURE_SHADOW1D:
dst->U.I.TexSrcTarget = RC_TEXTURE_1D;
dst->U.I.TexShadow = 1;
+ *shadowSamplers |= 1 << dst->U.I.TexSrcUnit;
break;
case TGSI_TEXTURE_SHADOW2D:
dst->U.I.TexSrcTarget = RC_TEXTURE_2D;
dst->U.I.TexShadow = 1;
+ *shadowSamplers |= 1 << dst->U.I.TexSrcUnit;
break;
case TGSI_TEXTURE_SHADOWRECT:
dst->U.I.TexSrcTarget = RC_TEXTURE_RECT;
dst->U.I.TexShadow = 1;
+ *shadowSamplers |= 1 << dst->U.I.TexSrcUnit;
break;
}
}
@@ -268,17 +262,19 @@ static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_inst
dst->U.I.SaturateMode = translate_saturate(src->Instruction.Saturate);
if (src->Instruction.NumDstRegs)
- transform_dstreg(ttr, &dst->U.I.DstReg, &src->FullDstRegisters[0]);
+ transform_dstreg(ttr, &dst->U.I.DstReg, &src->Dst[0]);
for(i = 0; i < src->Instruction.NumSrcRegs; ++i) {
- if (src->FullSrcRegisters[i].SrcRegister.File == TGSI_FILE_SAMPLER)
- dst->U.I.TexSrcUnit = src->FullSrcRegisters[i].SrcRegister.Index;
+ if (src->Src[i].Register.File == TGSI_FILE_SAMPLER)
+ dst->U.I.TexSrcUnit = src->Src[i].Register.Index;
else
- transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->FullSrcRegisters[i]);
+ transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->Src[i]);
}
/* Texturing. */
- transform_texture(dst, src->InstructionExtTexture);
+ if (src->Instruction.Texture)
+ transform_texture(dst, src->Texture,
+ &ttr->compiler->Program.ShadowSamplers);
}
static void handle_immediate(struct tgsi_to_rc * ttr, struct tgsi_full_immediate * imm)
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index 8460cfaf518..68aef70872e 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -1,5 +1,6 @@
/*
* Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -21,91 +22,296 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "r300_vs.h"
+#include "r300_fs.h"
#include "r300_context.h"
+#include "r300_screen.h"
#include "r300_tgsi_to_rc.h"
+#include "r300_reg.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_parse.h"
#include "radeon_compiler.h"
+#include "util/u_math.h"
-static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
+/* Convert info about VS output semantics into r300_shader_semantics. */
+static void r300_shader_read_vs_outputs(
+ struct tgsi_shader_info* info,
+ struct r300_shader_semantics* vs_outputs)
{
- struct r300_vertex_shader * vs = c->UserData;
- struct tgsi_shader_info* info = &vs->info;
- struct tgsi_parse_context parser;
- struct tgsi_full_declaration * decl;
- boolean pointsize = false;
- int out_colors = 0;
- int colors = 0;
- int out_generic = 0;
- int generic = 0;
int i;
+ unsigned index;
- /* Fill in the input mapping */
- for (i = 0; i < info->num_inputs; i++)
- c->code->inputs[i] = i;
+ r300_shader_semantics_reset(vs_outputs);
- /* Fill in the output mapping */
for (i = 0; i < info->num_outputs; i++) {
+ index = info->output_semantic_index[i];
+
switch (info->output_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ assert(index == 0);
+ vs_outputs->pos = i;
+ break;
+
case TGSI_SEMANTIC_PSIZE:
- pointsize = true;
+ assert(index == 0);
+ vs_outputs->psize = i;
break;
+
case TGSI_SEMANTIC_COLOR:
- out_colors++;
+ assert(index <= ATTR_COLOR_COUNT);
+ vs_outputs->color[index] = i;
break;
- case TGSI_SEMANTIC_FOG:
+
+ case TGSI_SEMANTIC_BCOLOR:
+ assert(index <= ATTR_COLOR_COUNT);
+ vs_outputs->bcolor[index] = i;
+ break;
+
case TGSI_SEMANTIC_GENERIC:
- out_generic++;
+ assert(index <= ATTR_GENERIC_COUNT);
+ vs_outputs->generic[index] = i;
+ break;
+
+ case TGSI_SEMANTIC_FOG:
+ assert(index == 0);
+ vs_outputs->fog = i;
break;
+
+ case TGSI_SEMANTIC_EDGEFLAG:
+ assert(index == 0);
+ fprintf(stderr, "r300 VP: cannot handle edgeflag output\n");
+ assert(0);
+ break;
+ default:
+ assert(0);
}
}
+}
- tgsi_parse_init(&parser, vs->state.tokens);
+static void r300_shader_vap_output_fmt(struct r300_vertex_shader* vs)
+{
+ struct r300_shader_semantics* vs_outputs = &vs->outputs;
+ uint32_t* hwfmt = vs->hwfmt;
+ int i, gen_count;
+ boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
+ vs_outputs->bcolor[1] != ATTR_UNUSED;
+
+ /* Do the actual vertex_info setup.
+ *
+ * vertex_info has four uints of hardware-specific data in it.
+ * vinfo.hwfmt[0] is R300_VAP_VTX_STATE_CNTL
+ * vinfo.hwfmt[1] is R300_VAP_VSM_VTX_ASSM
+ * vinfo.hwfmt[2] is R300_VAP_OUTPUT_VTX_FMT_0
+ * vinfo.hwfmt[3] is R300_VAP_OUTPUT_VTX_FMT_1 */
+
+ hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */
+
+ /* Position. */
+ if (vs_outputs->pos != ATTR_UNUSED) {
+ hwfmt[1] |= R300_INPUT_CNTL_POS;
+ hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
+ } else {
+ assert(0);
+ }
- while (!tgsi_parse_end_of_tokens(&parser)) {
- tgsi_parse_token(&parser);
+ /* Point size. */
+ if (vs_outputs->psize != ATTR_UNUSED) {
+ hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
+ }
- if (parser.FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION)
- continue;
+ /* Colors. */
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) {
+ hwfmt[1] |= R300_INPUT_CNTL_COLOR;
+ hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i;
+ }
+ }
- decl = &parser.FullToken.FullDeclaration;
+ /* Back-face colors. */
+ if (any_bcolor_used) {
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ hwfmt[1] |= R300_INPUT_CNTL_COLOR;
+ hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i);
+ }
+ }
- if (decl->Declaration.File != TGSI_FILE_OUTPUT)
- continue;
+ /* Texture coordinates. */
+ gen_count = 0;
+ for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+ if (vs_outputs->generic[i] != ATTR_UNUSED) {
+ hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count);
+ hwfmt[3] |= (4 << (3 * gen_count));
+ gen_count++;
+ }
+ }
- switch (decl->Semantic.SemanticName) {
- case TGSI_SEMANTIC_POSITION:
- c->code->outputs[decl->DeclarationRange.First] = 0;
- break;
- case TGSI_SEMANTIC_PSIZE:
- c->code->outputs[decl->DeclarationRange.First] = 1;
- break;
- case TGSI_SEMANTIC_COLOR:
- c->code->outputs[decl->DeclarationRange.First] = 1 +
- (pointsize ? 1 : 0) +
- colors++;
- break;
- case TGSI_SEMANTIC_FOG:
- case TGSI_SEMANTIC_GENERIC:
- c->code->outputs[decl->DeclarationRange.First] = 1 +
- (pointsize ? 1 : 0) +
- out_colors +
- generic++;
- break;
- default:
- debug_printf("r300: vs: Bad semantic declaration %d\n",
- decl->Semantic.SemanticName);
- break;
+ /* Fog coordinates. */
+ if (vs_outputs->fog != ATTR_UNUSED) {
+ hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count);
+ hwfmt[3] |= (4 << (3 * gen_count));
+ gen_count++;
+ }
+
+ /* XXX magic */
+ assert(gen_count <= 8);
+
+ /* WPOS. */
+ vs->wpos_tex_output = gen_count;
+}
+
+/* Sets up stream mapping to equivalent VS outputs if TCL is bypassed
+ * or isn't present. */
+static void r300_stream_locations_notcl(
+ struct r300_shader_semantics* vs_outputs,
+ int* stream_loc)
+{
+ int i, tabi = 0, gen_count;
+ boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
+ vs_outputs->bcolor[1] != ATTR_UNUSED;
+
+ /* Position. */
+ stream_loc[tabi++] = 0;
+
+ /* Point size. */
+ if (vs_outputs->psize != ATTR_UNUSED) {
+ stream_loc[tabi++] = 1;
+ }
+
+ /* Colors. */
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) {
+ stream_loc[tabi++] = 2 + i;
+ }
+ }
+
+ /* Back-face colors. */
+ if (any_bcolor_used) {
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ stream_loc[tabi++] = 4 + i;
}
}
- tgsi_parse_free(&parser);
+ /* Texture coordinates. */
+ gen_count = 0;
+ for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+ if (vs_outputs->generic[i] != ATTR_UNUSED) {
+ assert(tabi < 16);
+ stream_loc[tabi++] = 6 + gen_count;
+ gen_count++;
+ }
+ }
+
+ /* Fog coordinates. */
+ if (vs_outputs->fog != ATTR_UNUSED) {
+ assert(tabi < 16);
+ stream_loc[tabi++] = 6 + gen_count;
+ gen_count++;
+ }
+
+ /* WPOS. */
+ if (vs_outputs->wpos != ATTR_UNUSED) {
+ assert(tabi < 16);
+ stream_loc[tabi++] = 6 + gen_count;
+ gen_count++;
+ }
+
+ for (; tabi < 16;) {
+ stream_loc[tabi++] = -1;
+ }
}
+static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
+{
+ struct r300_vertex_shader * vs = c->UserData;
+ struct r300_shader_semantics* outputs = &vs->outputs;
+ struct tgsi_shader_info* info = &vs->info;
+ int i, reg = 0;
+ boolean any_bcolor_used = outputs->bcolor[0] != ATTR_UNUSED ||
+ outputs->bcolor[1] != ATTR_UNUSED;
+
+ /* Fill in the input mapping */
+ for (i = 0; i < info->num_inputs; i++)
+ c->code->inputs[i] = i;
+
+ /* Position. */
+ if (outputs->pos != ATTR_UNUSED) {
+ c->code->outputs[outputs->pos] = reg++;
+ } else {
+ assert(0);
+ }
+
+ /* Point size. */
+ if (outputs->psize != ATTR_UNUSED) {
+ c->code->outputs[outputs->psize] = reg++;
+ }
+
+ /* If we're writing back facing colors we need to send
+ * four colors to make front/back face colors selection work.
+ * If the vertex program doesn't write all 4 colors, lets
+ * pretend it does by skipping output index reg so the colors
+ * get written into appropriate output vectors.
+ */
+
+ /* Colors. */
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ if (outputs->color[i] != ATTR_UNUSED) {
+ c->code->outputs[outputs->color[i]] = reg++;
+ } else if (any_bcolor_used) {
+ reg++;
+ }
+ }
+
+ /* Back-face colors. */
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ if (outputs->bcolor[i] != ATTR_UNUSED) {
+ c->code->outputs[outputs->bcolor[i]] = reg++;
+ } else if (any_bcolor_used) {
+ reg++;
+ }
+ }
+
+ /* Texture coordinates. */
+ for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+ if (outputs->generic[i] != ATTR_UNUSED) {
+ c->code->outputs[outputs->generic[i]] = reg++;
+ }
+ }
+
+ /* Fog coordinates. */
+ if (outputs->fog != ATTR_UNUSED) {
+ c->code->outputs[outputs->fog] = reg++;
+ }
+
+ /* WPOS. */
+ if (outputs->wpos != ATTR_UNUSED) {
+ c->code->outputs[outputs->wpos] = reg++;
+ }
+}
+
+static void r300_insert_wpos(struct r300_vertex_program_compiler* c,
+ struct r300_shader_semantics* outputs)
+{
+ int i, lastOutput = 0;
+
+ /* Find the max output index. */
+ lastOutput = MAX2(lastOutput, outputs->psize);
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ lastOutput = MAX2(lastOutput, outputs->color[i]);
+ lastOutput = MAX2(lastOutput, outputs->bcolor[i]);
+ }
+ for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+ lastOutput = MAX2(lastOutput, outputs->generic[i]);
+ }
+ lastOutput = MAX2(lastOutput, outputs->fog);
+
+ /* Set WPOS after the last output. */
+ lastOutput++;
+ rc_copy_output(&c->Base, 0, lastOutput); /* out[lastOutput] = out[0]; */
+ outputs->wpos = lastOutput;
+}
void r300_translate_vertex_shader(struct r300_context* r300,
struct r300_vertex_shader* vs)
@@ -113,6 +319,9 @@ void r300_translate_vertex_shader(struct r300_context* r300,
struct r300_vertex_program_compiler compiler;
struct tgsi_to_rc ttr;
+ /* Initialize. */
+ r300_shader_read_vs_outputs(&vs->info, &vs->outputs);
+
/* Setup the compiler */
rc_init(&compiler.Base);
@@ -131,13 +340,19 @@ void r300_translate_vertex_shader(struct r300_context* r300,
r300_tgsi_to_rc(&ttr, vs->state.tokens);
- compiler.RequiredOutputs = ~(~0 << vs->info.num_outputs);
+ compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs+1));
compiler.SetHwInputOutput = &set_vertex_inputs_outputs;
+ /* Insert the WPOS output. */
+ r300_insert_wpos(&compiler, &vs->outputs);
+
+ r300_shader_vap_output_fmt(vs);
+ r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl);
+
/* Invoke the compiler */
r3xx_compile_vertex_program(&compiler);
if (compiler.Base.Error) {
- /* Todo: Fail gracefully */
+ /* XXX We should fallback using Draw. */
fprintf(stderr, "r300 VP: Compiler error\n");
abort();
}
@@ -147,88 +362,29 @@ void r300_translate_vertex_shader(struct r300_context* r300,
vs->translated = TRUE;
}
-
-/* XXX get these to r300_reg */
-#define R300_PVS_DST_OPCODE(x) ((x) << 0)
-# define R300_VE_DOT_PRODUCT 1
-# define R300_VE_MULTIPLY 2
-# define R300_VE_ADD 3
-# define R300_VE_MAXIMUM 7
-# define R300_VE_SET_LESS_THAN 10
-#define R300_PVS_DST_MATH_INST (1 << 6)
-# define R300_ME_RECIP_DX 6
-#define R300_PVS_DST_MACRO_INST (1 << 7)
-# define R300_PVS_MACRO_OP_2CLK_MADD 0
-#define R300_PVS_DST_REG_TYPE(x) ((x) << 8)
-# define R300_PVS_DST_REG_TEMPORARY 0
-# define R300_PVS_DST_REG_A0 1
-# define R300_PVS_DST_REG_OUT 2
-# define R300_PVS_DST_REG_OUT_REPL_X 3
-# define R300_PVS_DST_REG_ALT_TEMPORARY 4
-# define R300_PVS_DST_REG_INPUT 5
-#define R300_PVS_DST_OFFSET(x) ((x) << 13)
-#define R300_PVS_DST_WE(x) ((x) << 20)
-#define R300_PVS_DST_WE_XYZW (0xf << 20)
-
-#define R300_PVS_SRC_REG_TYPE(x) ((x) << 0)
-# define R300_PVS_SRC_REG_TEMPORARY 0
-# define R300_PVS_SRC_REG_INPUT 1
-# define R300_PVS_SRC_REG_CONSTANT 2
-# define R300_PVS_SRC_REG_ALT_TEMPORARY 3
-#define R300_PVS_SRC_OFFSET(x) ((x) << 5)
-#define R300_PVS_SRC_SWIZZLE(x) ((x) << 13)
-# define R300_PVS_SRC_SELECT_X 0
-# define R300_PVS_SRC_SELECT_Y 1
-# define R300_PVS_SRC_SELECT_Z 2
-# define R300_PVS_SRC_SELECT_W 3
-# define R300_PVS_SRC_SELECT_FORCE_0 4
-# define R300_PVS_SRC_SELECT_FORCE_1 5
-# define R300_PVS_SRC_SWIZZLE_XYZW \
- ((R300_PVS_SRC_SELECT_X | (R300_PVS_SRC_SELECT_Y << 3) | \
- (R300_PVS_SRC_SELECT_Z << 6) | (R300_PVS_SRC_SELECT_W << 9)) << 13)
-# define R300_PVS_SRC_SWIZZLE_ZERO \
- ((R300_PVS_SRC_SELECT_FORCE_0 | (R300_PVS_SRC_SELECT_FORCE_0 << 3) | \
- (R300_PVS_SRC_SELECT_FORCE_0 << 6) | \
- (R300_PVS_SRC_SELECT_FORCE_0 << 9)) << 13)
-# define R300_PVS_SRC_SWIZZLE_ONE \
- ((R300_PVS_SRC_SELECT_FORCE_1 | (R300_PVS_SRC_SELECT_FORCE_1 << 3) | \
- (R300_PVS_SRC_SELECT_FORCE_1 << 6) | \
- (R300_PVS_SRC_SELECT_FORCE_1 << 9)) << 13)
-#define R300_PVS_MODIFIER_X (1 << 25)
-#define R300_PVS_MODIFIER_Y (1 << 26)
-#define R300_PVS_MODIFIER_Z (1 << 27)
-#define R300_PVS_MODIFIER_W (1 << 28)
-#define R300_PVS_NEGATE_XYZW \
- (R300_PVS_MODIFIER_X | R300_PVS_MODIFIER_Y | \
- R300_PVS_MODIFIER_Z | R300_PVS_MODIFIER_W)
-
-struct r300_vertex_program_code r300_passthrough_vertex_shader = {
- .length = 8, /* two instructions */
-
- /* MOV out[0], in[0] */
- .body.d[0] = R300_PVS_DST_OPCODE(R300_VE_ADD) |
- R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
- R300_PVS_DST_OFFSET(0) | R300_PVS_DST_WE_XYZW,
- .body.d[1] = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
- R300_PVS_SRC_OFFSET(0) | R300_PVS_SRC_SWIZZLE_XYZW,
- .body.d[2] = R300_PVS_SRC_SWIZZLE_ZERO,
- .body.d[3] = 0x0,
-
- /* MOV out[1], in[1] */
- .body.d[4] = R300_PVS_DST_OPCODE(R300_VE_ADD) |
- R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
- R300_PVS_DST_OFFSET(1) | R300_PVS_DST_WE_XYZW,
- .body.d[5] = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
- R300_PVS_SRC_OFFSET(1) | R300_PVS_SRC_SWIZZLE_XYZW,
- .body.d[6] = R300_PVS_SRC_SWIZZLE_ZERO,
- .body.d[7] = 0x0,
-
- .inputs[0] = 0,
- .inputs[1] = 1,
- .outputs[0] = 0,
- .outputs[1] = 1,
-
- .InputsRead = 3,
- .OutputsWritten = 3
-};
-
+boolean r300_vertex_shader_setup_wpos(struct r300_context* r300)
+{
+ struct r300_vertex_shader* vs = r300->vs;
+ int tex_output = r300->vs->wpos_tex_output;
+ uint32_t tex_fmt = R300_INPUT_CNTL_TC0 << tex_output;
+ uint32_t* hwfmt = vs->hwfmt;
+
+ if (r300->fs->inputs.wpos != ATTR_UNUSED) {
+ /* Enable WPOS in VAP. */
+ if (!(hwfmt[1] & tex_fmt)) {
+ hwfmt[1] |= tex_fmt;
+ hwfmt[3] |= (4 << (3 * tex_output));
+
+ assert(tex_output < 8);
+ return TRUE;
+ }
+ } else {
+ /* Disable WPOS in VAP. */
+ if (hwfmt[1] & tex_fmt) {
+ hwfmt[1] &= ~tex_fmt;
+ hwfmt[3] &= ~(4 << (3 * tex_output));
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h
index 2a4ce315e32..18cfeee3cd4 100644
--- a/src/gallium/drivers/r300/r300_vs.h
+++ b/src/gallium/drivers/r300/r300_vs.h
@@ -1,5 +1,6 @@
/*
* Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -25,18 +26,25 @@
#include "pipe/p_state.h"
#include "tgsi/tgsi_scan.h"
-
#include "radeon_code.h"
+#include "r300_shader_semantics.h"
+
struct r300_context;
struct r300_vertex_shader {
/* Parent class */
struct pipe_shader_state state;
+
struct tgsi_shader_info info;
+ struct r300_shader_semantics outputs;
+ uint hwfmt[4];
+
+ /* Stream locations for SWTCL or if TCL is bypassed. */
+ int stream_loc_notcl[16];
- /* Fallback shader, because Draw has issues */
- struct draw_vertex_shader* draw;
+ /* Output stream location for WPOS. */
+ int wpos_tex_output;
/* Has this shader been translated yet? */
boolean translated;
@@ -45,10 +53,10 @@ struct r300_vertex_shader {
struct r300_vertex_program_code code;
};
-
-extern struct r300_vertex_program_code r300_passthrough_vertex_shader;
-
void r300_translate_vertex_shader(struct r300_context* r300,
struct r300_vertex_shader* vs);
+/* Return TRUE if VAP (hwfmt) needs to be re-emitted. */
+boolean r300_vertex_shader_setup_wpos(struct r300_context* r300);
+
#endif /* R300_VS_H */
diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h
index f18ad75a47d..1ae6de70fee 100644
--- a/src/gallium/drivers/r300/r300_winsys.h
+++ b/src/gallium/drivers/r300/r300_winsys.h
@@ -35,67 +35,10 @@ extern "C" {
#include "pipe/p_state.h"
#include "pipe/internal/p_winsys_screen.h"
-struct r300_winsys {
- /* Parent class */
- struct pipe_winsys base;
-
- /* Opaque Radeon-specific winsys object. */
- void* radeon_winsys;
-
- /* PCI ID */
- uint32_t pci_id;
-
- /* GB pipe count */
- uint32_t gb_pipes;
-
- /* GART size. */
- uint32_t gart_size;
-
- /* VRAM size. */
- uint32_t vram_size;
-
- /* Add a pipe_buffer to the list of buffer objects to validate. */
- boolean (*add_buffer)(struct r300_winsys* winsys,
- struct pipe_buffer* pbuffer,
- uint32_t rd,
- uint32_t wd);
-
- /* Revalidate all currently setup pipe_buffers.
- * Returns TRUE if a flush is required. */
- boolean (*validate)(struct r300_winsys* winsys);
-
- /* Check to see if there's room for commands. */
- boolean (*check_cs)(struct r300_winsys* winsys, int size);
-
- /* Start a command emit. */
- void (*begin_cs)(struct r300_winsys* winsys,
- int size,
- const char* file,
- const char* function,
- int line);
-
- /* Write a dword to the command buffer. */
- void (*write_cs_dword)(struct r300_winsys* winsys, uint32_t dword);
-
- /* Write a relocated dword to the command buffer. */
- void (*write_cs_reloc)(struct r300_winsys* winsys,
- struct pipe_buffer* bo,
- uint32_t rd,
- uint32_t wd,
- uint32_t flags);
-
- /* Finish a command emit. */
- void (*end_cs)(struct r300_winsys* winsys,
- const char* file,
- const char* function,
- int line);
-
- /* Flush the CS. */
- void (*flush_cs)(struct r300_winsys* winsys);
-};
+#include "radeon_winsys.h"
struct pipe_context* r300_create_context(struct pipe_screen* screen,
- struct r300_winsys* r300_winsys);
+ struct radeon_winsys* radeon_winsys);
boolean r300_get_texture_buffer(struct pipe_texture* texture,
struct pipe_buffer** buffer,
diff --git a/src/gallium/drivers/r300/r3xx_fs.c b/src/gallium/drivers/r300/r3xx_fs.c
deleted file mode 100644
index c1c1194d58e..00000000000
--- a/src/gallium/drivers/r300/r3xx_fs.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- * Joakim Sindholt <opensource@zhasha.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#include "r3xx_fs.h"
-
-#include "r300_reg.h"
-
-struct rX00_fragment_program_code r3xx_passthrough_fragment_shader = {
- .code.r300.alu.length = 1,
- .code.r300.tex.length = 0,
-
- .code.r300.config = 0,
- .code.r300.pixsize = 0,
- .code.r300.code_offset = 0,
- .code.r300.code_addr[3] = R300_RGBA_OUT,
-
- .code.r300.alu.inst[0].rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) |
- R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) |
- R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) |
- R300_ALU_OUTC_CMP,
- .code.r300.alu.inst[0].rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) |
- R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ,
- .code.r300.alu.inst[0].alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) |
- R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) |
- R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) |
- R300_ALU_OUTA_CMP,
- .code.r300.alu.inst[0].alpha_addr = R300_ALPHA_ADDR0(0) |
- R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT,
-};
-
-struct rX00_fragment_program_code r3xx_texture_fragment_shader = {
- .code.r300.alu.length = 1,
- .code.r300.tex.length = 1,
-
- .code.r300.config = R300_PFS_CNTL_FIRST_NODE_HAS_TEX,
- .code.r300.pixsize = 0,
- .code.r300.code_offset = 0,
- .code.r300.code_addr[3] = R300_RGBA_OUT,
-
- .code.r300.tex.inst[0] = R300_TEX_OP_LD << R300_TEX_INST_SHIFT,
-
- .code.r300.alu.inst[0].rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) |
- R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) |
- R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) |
- R300_ALU_OUTC_CMP,
- .code.r300.alu.inst[0].rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) |
- R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ,
- .code.r300.alu.inst[0].alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) |
- R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) |
- R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) |
- R300_ALU_OUTA_CMP,
- .code.r300.alu.inst[0].alpha_addr = R300_ALPHA_ADDR0(0) |
- R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT,
-};
diff --git a/src/gallium/drivers/r300/r3xx_fs.h b/src/gallium/drivers/r300/r3xx_fs.h
deleted file mode 100644
index 51cd245724d..00000000000
--- a/src/gallium/drivers/r300/r3xx_fs.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- * Joakim Sindholt <opensource@zhasha.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R3XX_FS_H
-#define R3XX_FS_H
-
-#include "radeon_code.h"
-
-struct rX00_fragment_program_code r3xx_passthrough_fragment_shader;
-struct rX00_fragment_program_code r3xx_texture_fragment_shader;
-
-#endif /* R3XX_FS_H */
diff --git a/src/gallium/drivers/r300/r5xx_fs.c b/src/gallium/drivers/r300/r5xx_fs.c
deleted file mode 100644
index f072deab0d9..00000000000
--- a/src/gallium/drivers/r300/r5xx_fs.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- * Joakim Sindholt <opensource@zhasha.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#include "r5xx_fs.h"
-
-#include "r300_reg.h"
-
-/* XXX this all should find its way back to r300_reg */
-/* Swizzle tools */
-#define R500_SWIZZLE_ZERO 4
-#define R500_SWIZZLE_HALF 5
-#define R500_SWIZZLE_ONE 6
-#define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
-#define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
-#define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
-#define R500_SWIZ_MOD_NEG 1
-#define R500_SWIZ_MOD_ABS 2
-#define R500_SWIZ_MOD_NEG_ABS 3
-/* Swizzles for inst2 */
-#define R500_SWIZ_TEX_STRQ(x) ((x) << 8)
-#define R500_SWIZ_TEX_RGBA(x) ((x) << 24)
-/* Swizzles for inst3 */
-#define R500_SWIZ_RGB_A(x) ((x) << 2)
-#define R500_SWIZ_RGB_B(x) ((x) << 15)
-/* Swizzles for inst4 */
-#define R500_SWIZ_ALPHA_A(x) ((x) << 14)
-#define R500_SWIZ_ALPHA_B(x) ((x) << 21)
-/* Swizzle for inst5 */
-#define R500_SWIZ_RGBA_C(x) ((x) << 14)
-#define R500_SWIZ_ALPHA_C(x) ((x) << 27)
-/* Writemasks */
-#define R500_TEX_WMASK(x) ((x) << 11)
-#define R500_ALU_WMASK(x) ((x) << 11)
-#define R500_ALU_OMASK(x) ((x) << 15)
-#define R500_W_OMASK (1 << 31)
-
-struct rX00_fragment_program_code r5xx_passthrough_fragment_shader = {
- .code.r500.max_temp_idx = 0,
- .code.r500.inst_end = 0,
-
- .code.r500.inst[0].inst0 = R500_INST_TYPE_OUT |
- R500_INST_TEX_SEM_WAIT | R500_INST_LAST |
- R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK |
- R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP,
- .code.r500.inst[0].inst1 =
- R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST |
- R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST,
- .code.r500.inst[0].inst2 =
- R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST |
- R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST,
- .code.r500.inst[0].inst3 =
- R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R |
- R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B,
- .code.r500.inst[0].inst4 =
- R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A,
- .code.r500.inst[0].inst5 =
- R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 |
- R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 |
- R500_ALU_RGBA_A_SWIZ_0,
-};
-
-struct rX00_fragment_program_code r5xx_texture_fragment_shader = {
- .code.r500.max_temp_idx = 0,
- .code.r500.inst_end = 1,
-
- .code.r500.inst[0].inst0 = R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_RGB | R500_INST_ALPHA_WMASK |
- R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP,
- .code.r500.inst[0].inst1 = R500_TEX_ID(0) | R500_TEX_INST_LD |
- R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED,
- .code.r500.inst[0].inst2 = R500_TEX_SRC_ADDR(0) |
- R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G |
- R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A |
- R500_TEX_DST_ADDR(0) |
- R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A,
- .code.r500.inst[0].inst3 = 0x0,
- .code.r500.inst[0].inst4 = 0x0,
- .code.r500.inst[0].inst5 = 0x0,
-
- .code.r500.inst[1].inst0 = R500_INST_TYPE_OUT |
- R500_INST_TEX_SEM_WAIT | R500_INST_LAST |
- R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK |
- R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP,
- .code.r500.inst[1].inst1 =
- R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST |
- R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST,
- .code.r500.inst[1].inst2 =
- R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST |
- R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST,
- .code.r500.inst[1].inst3 =
- R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R |
- R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B,
- .code.r500.inst[1].inst4 =
- R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A,
- .code.r500.inst[1].inst5 =
- R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 |
- R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 |
- R500_ALU_RGBA_A_SWIZ_0,
-};
diff --git a/src/gallium/drivers/r300/r5xx_fs.h b/src/gallium/drivers/r300/r5xx_fs.h
deleted file mode 100644
index a4addde32b2..00000000000
--- a/src/gallium/drivers/r300/r5xx_fs.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- * Joakim Sindholt <opensource@zhasha.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R5XX_FS_H
-#define R5XX_FS_H
-
-#include "radeon_code.h"
-
-struct rX00_fragment_program_code r5xx_passthrough_fragment_shader;
-struct rX00_fragment_program_code r5xx_texture_fragment_shader;
-
-#endif /* R5XX_FS_H */
diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c
index 8fac8e6e05f..5f130453c39 100644
--- a/src/gallium/drivers/softpipe/sp_clear.c
+++ b/src/gallium/drivers/softpipe/sp_clear.c
@@ -36,6 +36,7 @@
#include "util/u_pack_color.h"
#include "sp_clear.h"
#include "sp_context.h"
+#include "sp_query.h"
#include "sp_tile_cache.h"
@@ -48,12 +49,16 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
double depth, unsigned stencil)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
+ union util_color uc;
unsigned cv;
uint i;
if (softpipe->no_rast)
return;
+ if (!softpipe_check_render_cond(softpipe))
+ return;
+
#if 0
softpipe_update_derived(softpipe); /* not needed?? */
#endif
@@ -62,12 +67,12 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
struct pipe_surface *ps = softpipe->framebuffer.cbufs[i];
- util_pack_color(rgba, ps->format, &cv);
- sp_tile_cache_clear(softpipe->cbuf_cache[i], rgba, cv);
+ util_pack_color(rgba, ps->format, &uc);
+ sp_tile_cache_clear(softpipe->cbuf_cache[i], rgba, uc.ui);
#if !TILE_CLEAR_OPTIMIZATION
/* non-cached surface */
- pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, cv);
+ pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, uc.ui);
#endif
}
}
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index 94d000a5acc..f3ac6760db5 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -90,14 +90,15 @@ softpipe_destroy( struct pipe_context *pipe )
if (softpipe->draw)
draw_destroy( softpipe->draw );
- softpipe->quad.shade->destroy( softpipe->quad.shade );
- softpipe->quad.depth_test->destroy( softpipe->quad.depth_test );
- softpipe->quad.blend->destroy( softpipe->quad.blend );
+ softpipe->quad.shade->destroy( softpipe->quad.shade );
+ softpipe->quad.depth_test->destroy( softpipe->quad.depth_test );
+ softpipe->quad.blend->destroy( softpipe->quad.blend );
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
sp_destroy_tile_cache(softpipe->cbuf_cache[i]);
pipe_surface_reference(&softpipe->framebuffer.cbufs[i], NULL);
}
+
sp_destroy_tile_cache(softpipe->zsbuf_cache);
pipe_surface_reference(&softpipe->framebuffer.zsbuf, NULL);
@@ -106,6 +107,11 @@ softpipe_destroy( struct pipe_context *pipe )
pipe_texture_reference(&softpipe->texture[i], NULL);
}
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ sp_destroy_tex_tile_cache(softpipe->vertex_tex_cache[i]);
+ pipe_texture_reference(&softpipe->vertex_textures[i], NULL);
+ }
+
for (i = 0; i < Elements(softpipe->constants); i++) {
if (softpipe->constants[i].buffer) {
pipe_buffer_reference(&softpipe->constants[i].buffer, NULL);
@@ -120,7 +126,7 @@ softpipe_destroy( struct pipe_context *pipe )
* if (the texture is being used as a framebuffer surface)
* return PIPE_REFERENCED_FOR_WRITE
* else if (the texture is a bound texture source)
- * return PIPE_REFERENCED_FOR_READ XXX not done yet
+ * return PIPE_REFERENCED_FOR_READ
* else
* return PIPE_UNREFERENCED
*/
@@ -132,6 +138,7 @@ softpipe_is_texture_referenced( struct pipe_context *pipe,
struct softpipe_context *softpipe = softpipe_context( pipe );
unsigned i;
+ /* check if any of the bound drawing surfaces are this texture */
if (softpipe->dirty_render_cache) {
for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
if (softpipe->framebuffer.cbufs[i] &&
@@ -145,7 +152,17 @@ softpipe_is_texture_referenced( struct pipe_context *pipe,
}
}
- /* FIXME: we also need to do the same for the texture cache */
+ /* check if any of the tex_cache textures are this texture */
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ if (softpipe->tex_cache[i] &&
+ softpipe->tex_cache[i]->texture == texture)
+ return PIPE_REFERENCED_FOR_READ;
+ }
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ if (softpipe->vertex_tex_cache[i] &&
+ softpipe->vertex_tex_cache[i]->texture == texture)
+ return PIPE_REFERENCED_FOR_READ;
+ }
return PIPE_UNREFERENCED;
}
@@ -159,6 +176,19 @@ softpipe_is_buffer_referenced( struct pipe_context *pipe,
}
+static void
+softpipe_render_condition( struct pipe_context *pipe,
+ struct pipe_query *query,
+ uint mode )
+{
+ struct softpipe_context *softpipe = softpipe_context( pipe );
+
+ softpipe->render_cond_query = query;
+ softpipe->render_cond_mode = mode;
+}
+
+
+
struct pipe_context *
softpipe_create( struct pipe_screen *screen )
{
@@ -174,6 +204,7 @@ softpipe_create( struct pipe_screen *screen )
#endif
softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE );
+ softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE );
softpipe->pipe.winsys = screen->winsys;
softpipe->pipe.screen = screen;
@@ -185,7 +216,8 @@ softpipe_create( struct pipe_screen *screen )
softpipe->pipe.delete_blend_state = softpipe_delete_blend_state;
softpipe->pipe.create_sampler_state = softpipe_create_sampler_state;
- softpipe->pipe.bind_sampler_states = softpipe_bind_sampler_states;
+ softpipe->pipe.bind_fragment_sampler_states = softpipe_bind_sampler_states;
+ softpipe->pipe.bind_vertex_sampler_states = softpipe_bind_vertex_sampler_states;
softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state;
softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state;
@@ -204,13 +236,18 @@ softpipe_create( struct pipe_screen *screen )
softpipe->pipe.bind_vs_state = softpipe_bind_vs_state;
softpipe->pipe.delete_vs_state = softpipe_delete_vs_state;
+ softpipe->pipe.create_gs_state = softpipe_create_gs_state;
+ softpipe->pipe.bind_gs_state = softpipe_bind_gs_state;
+ softpipe->pipe.delete_gs_state = softpipe_delete_gs_state;
+
softpipe->pipe.set_blend_color = softpipe_set_blend_color;
softpipe->pipe.set_clip_state = softpipe_set_clip_state;
softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer;
softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state;
softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple;
softpipe->pipe.set_scissor_state = softpipe_set_scissor_state;
- softpipe->pipe.set_sampler_textures = softpipe_set_sampler_textures;
+ softpipe->pipe.set_fragment_sampler_textures = softpipe_set_sampler_textures;
+ softpipe->pipe.set_vertex_sampler_textures = softpipe_set_vertex_sampler_textures;
softpipe->pipe.set_viewport_state = softpipe_set_viewport_state;
softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers;
@@ -219,8 +256,6 @@ softpipe_create( struct pipe_screen *screen )
softpipe->pipe.draw_arrays = softpipe_draw_arrays;
softpipe->pipe.draw_elements = softpipe_draw_elements;
softpipe->pipe.draw_range_elements = softpipe_draw_range_elements;
- softpipe->pipe.set_edgeflags = softpipe_set_edgeflags;
-
softpipe->pipe.clear = softpipe_clear;
softpipe->pipe.flush = softpipe_flush;
@@ -230,6 +265,8 @@ softpipe_create( struct pipe_screen *screen )
softpipe_init_query_funcs( softpipe );
+ softpipe->pipe.render_condition = softpipe_render_condition;
+
/*
* Alloc caches for accessing drawing surfaces and textures.
* Must be before quad stage setup!
@@ -240,12 +277,14 @@ softpipe_create( struct pipe_screen *screen )
for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
softpipe->tex_cache[i] = sp_create_tex_tile_cache( screen );
-
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ softpipe->vertex_tex_cache[i] = sp_create_tex_tile_cache(screen);
+ }
/* setup quad rendering stages */
- softpipe->quad.shade = sp_quad_shade_stage(softpipe);
- softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe);
- softpipe->quad.blend = sp_quad_blend_stage(softpipe);
+ softpipe->quad.shade = sp_quad_shade_stage(softpipe);
+ softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe);
+ softpipe->quad.blend = sp_quad_blend_stage(softpipe);
/*
@@ -256,7 +295,7 @@ softpipe_create( struct pipe_screen *screen )
goto fail;
draw_texture_samplers(softpipe->draw,
- PIPE_MAX_SAMPLERS,
+ PIPE_MAX_VERTEX_SAMPLERS,
(struct tgsi_sampler **)
softpipe->tgsi.vert_samplers_list);
@@ -275,7 +314,6 @@ softpipe_create( struct pipe_screen *screen )
draw_set_render(softpipe->draw, softpipe->vbuf_backend);
-
/* plug in AA line/point stages */
draw_install_aaline_stage(softpipe->draw, &softpipe->pipe);
draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe);
@@ -291,4 +329,3 @@ softpipe_create( struct pipe_screen *screen )
softpipe_destroy(&softpipe->pipe);
return NULL;
}
-
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index a735573d6fb..73fa744f9d4 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -53,10 +53,12 @@ struct softpipe_context {
/** Constant state objects */
struct pipe_blend_state *blend;
struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+ struct pipe_sampler_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS];
struct pipe_depth_stencil_alpha_state *depth_stencil;
struct pipe_rasterizer_state *rasterizer;
struct sp_fragment_shader *fs;
struct sp_vertex_shader *vs;
+ struct sp_geometry_shader *gs;
/** Other rendering state */
struct pipe_blend_color blend_color;
@@ -66,12 +68,15 @@ struct softpipe_context {
struct pipe_poly_stipple poly_stipple;
struct pipe_scissor_state scissor;
struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+ struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
struct pipe_viewport_state viewport;
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
unsigned num_samplers;
unsigned num_textures;
+ unsigned num_vertex_samplers;
+ unsigned num_vertex_textures;
unsigned num_vertex_elements;
unsigned num_vertex_buffers;
@@ -111,6 +116,10 @@ struct softpipe_context {
unsigned line_stipple_counter;
+ /** Conditional query object and mode */
+ struct pipe_query *render_cond_query;
+ uint render_cond_mode;
+
/** Software quad rendering pipeline */
struct {
struct quad_stage *shade;
@@ -121,7 +130,7 @@ struct softpipe_context {
/** TGSI exec things */
struct {
- struct sp_sampler_varient *vert_samplers_list[PIPE_MAX_SAMPLERS];
+ struct sp_sampler_varient *vert_samplers_list[PIPE_MAX_VERTEX_SAMPLERS];
struct sp_sampler_varient *frag_samplers_list[PIPE_MAX_SAMPLERS];
} tgsi;
@@ -139,9 +148,11 @@ struct softpipe_context {
unsigned tex_timestamp;
struct softpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS];
+ struct softpipe_tex_tile_cache *vertex_tex_cache[PIPE_MAX_VERTEX_SAMPLERS];
unsigned use_sse : 1;
unsigned dump_fs : 1;
+ unsigned dump_gs : 1;
unsigned no_rast : 1;
};
diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c
index d4045816d03..03d35fb3cb5 100644
--- a/src/gallium/drivers/softpipe/sp_draw_arrays.c
+++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c
@@ -38,6 +38,7 @@
#include "util/u_prim.h"
#include "sp_context.h"
+#include "sp_query.h"
#include "sp_state.h"
#include "draw/draw_context.h"
@@ -48,7 +49,7 @@ static void
softpipe_map_constant_buffers(struct softpipe_context *sp)
{
struct pipe_winsys *ws = sp->pipe.winsys;
- uint i, size;
+ uint i, vssize, gssize;
for (i = 0; i < PIPE_SHADER_TYPES; i++) {
if (sp->constants[i].buffer && sp->constants[i].buffer->size)
@@ -57,13 +58,21 @@ softpipe_map_constant_buffers(struct softpipe_context *sp)
}
if (sp->constants[PIPE_SHADER_VERTEX].buffer)
- size = sp->constants[PIPE_SHADER_VERTEX].buffer->size;
+ vssize = sp->constants[PIPE_SHADER_VERTEX].buffer->size;
else
- size = 0;
+ vssize = 0;
- draw_set_mapped_constant_buffer(sp->draw,
+ if (sp->constants[PIPE_SHADER_GEOMETRY].buffer)
+ gssize = sp->constants[PIPE_SHADER_GEOMETRY].buffer->size;
+ else
+ gssize = 0;
+
+ draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX,
sp->mapped_constants[PIPE_SHADER_VERTEX],
- size);
+ vssize);
+ draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY,
+ sp->mapped_constants[PIPE_SHADER_GEOMETRY],
+ gssize);
}
@@ -78,9 +87,10 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp)
*/
draw_flush(sp->draw);
- draw_set_mapped_constant_buffer(sp->draw, NULL, 0);
+ draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, NULL, 0);
+ draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY, NULL, 0);
- for (i = 0; i < 2; i++) {
+ for (i = 0; i < PIPE_SHADER_TYPES; i++) {
if (sp->constants[i].buffer && sp->constants[i].buffer->size)
ws->buffer_unmap(ws, sp->constants[i].buffer);
sp->mapped_constants[i] = NULL;
@@ -88,11 +98,11 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp)
}
-boolean
+void
softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
unsigned start, unsigned count)
{
- return softpipe_draw_elements(pipe, NULL, 0, mode, start, count);
+ softpipe_draw_elements(pipe, NULL, 0, mode, start, count);
}
@@ -101,7 +111,7 @@ softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
* Basically, map the vertex buffers (and drawing surfaces), then hand off
* the drawing to the 'draw' module.
*/
-boolean
+void
softpipe_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -113,6 +123,9 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
struct draw_context *draw = sp->draw;
unsigned i;
+ if (!softpipe_check_render_cond(sp))
+ return;
+
sp->reduced_api_prim = u_reduced_prim(mode);
if (sp->dirty)
@@ -168,27 +181,17 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
softpipe_unmap_constant_buffers(sp);
sp->dirty_render_cache = TRUE;
-
- return TRUE;
}
-boolean
+void
softpipe_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
{
- return softpipe_draw_range_elements( pipe, indexBuffer,
- indexSize,
- 0, 0xffffffff,
- mode, start, count );
-}
-
-
-void
-softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags)
-{
- struct softpipe_context *sp = softpipe_context(pipe);
- draw_set_edgeflags(sp->draw, edgeflags);
+ softpipe_draw_range_elements( pipe, indexBuffer,
+ indexSize,
+ 0, 0xffffffff,
+ mode, start, count );
}
diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c
index e38b767cf2c..75dac810a12 100644
--- a/src/gallium/drivers/softpipe/sp_flush.c
+++ b/src/gallium/drivers/softpipe/sp_flush.c
@@ -55,6 +55,9 @@ softpipe_flush( struct pipe_context *pipe,
for (i = 0; i < softpipe->num_textures; i++) {
sp_flush_tex_tile_cache(softpipe->tex_cache[i]);
}
+ for (i = 0; i < softpipe->num_vertex_textures; i++) {
+ sp_flush_tex_tile_cache(softpipe->vertex_tex_cache[i]);
+ }
}
if (flags & PIPE_FLUSH_SWAPBUFFERS) {
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index 4076114d392..27fa126b7c3 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -126,7 +126,10 @@ exec_run( const struct sp_fragment_shader *base,
setup_pos_vector(quad->posCoef,
(float)quad->input.x0, (float)quad->input.y0,
&machine->QuadPos);
-
+
+ /* convert 0 to 1.0 and 1 to -1.0 */
+ machine->Face = (float) (quad->input.facing * -2 + 1);
+
quad->inout.mask &= tgsi_exec_machine_run( machine );
if (quad->inout.mask == 0)
return FALSE;
diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
index 5fbac06a535..7f573aef3c3 100644
--- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
@@ -128,6 +128,7 @@ sp_vbuf_unmap_vertices(struct vbuf_render *vbr,
{
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size );
+ (void) cvbr;
/* do nothing */
}
diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index e243c63fa23..d9babe81dad 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -229,7 +229,7 @@ blend_quad(struct quad_stage *qs,
static const float zero[4] = { 0, 0, 0, 0 };
static const float one[4] = { 1, 1, 1, 1 };
struct softpipe_context *softpipe = qs->softpipe;
- float source[4][QUAD_SIZE];
+ float source[4][QUAD_SIZE] = { { 0 } };
/*
* Compute src/first term RGB
@@ -478,7 +478,15 @@ blend_quad(struct quad_stage *qs,
VEC4_MUL(dest[2], dest[2], dest[2]); /* B */
break;
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- assert(0); /* illegal */
+ {
+ const float *alpha = quadColor[3];
+ float diff[4], temp[4];
+ VEC4_SUB(diff, one, dest[3]);
+ VEC4_MIN(temp, alpha, diff);
+ VEC4_MUL(dest[0], quadColor[0], temp); /* R */
+ VEC4_MUL(dest[1], quadColor[1], temp); /* G */
+ VEC4_MUL(dest[2], quadColor[2], temp); /* B */
+ }
break;
case PIPE_BLENDFACTOR_CONST_COLOR:
{
@@ -600,7 +608,7 @@ blend_quad(struct quad_stage *qs,
VEC4_MUL(dest[3], dest[3], dest[3]); /* A */
break;
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- assert(0); /* illegal */
+ /* dest = dest * 1 NO-OP, leave dest as-is */
break;
case PIPE_BLENDFACTOR_CONST_COLOR:
/* fall-through */
@@ -946,15 +954,15 @@ choose_blend_quad(struct quad_stage *qs,
qs->run = blend_noop;
}
else if (!softpipe->blend->logicop_enable &&
- softpipe->blend->colormask == 0xf)
+ softpipe->blend->colormask == 0xf &&
+ softpipe->framebuffer.nr_cbufs == 1)
{
if (!blend->blend_enable) {
qs->run = single_output_color;
}
else if (blend->rgb_src_factor == blend->alpha_src_factor &&
blend->rgb_dst_factor == blend->alpha_dst_factor &&
- blend->rgb_func == blend->alpha_func &&
- softpipe->framebuffer.nr_cbufs == 1)
+ blend->rgb_func == blend->alpha_func)
{
if (blend->alpha_func == PIPE_BLEND_ADD) {
if (blend->rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c
index 379cf4ad064..4ef5d9f7b1d 100644
--- a/src/gallium/drivers/softpipe/sp_query.c
+++ b/src/gallium/drivers/softpipe/sp_query.c
@@ -99,6 +99,32 @@ softpipe_get_query_result(struct pipe_context *pipe,
}
+/**
+ * Called by rendering function to check rendering is conditional.
+ * \return TRUE if we should render, FALSE if we should skip rendering
+ */
+boolean
+softpipe_check_render_cond(struct softpipe_context *sp)
+{
+ struct pipe_context *pipe = &sp->pipe;
+ boolean b, wait;
+ uint64_t result;
+
+ if (!sp->render_cond_query) {
+ return TRUE; /* no query predicate, draw normally */
+ }
+
+ wait = (sp->render_cond_mode == PIPE_RENDER_COND_WAIT ||
+ sp->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT);
+
+ b = pipe->get_query_result(pipe, sp->render_cond_query, wait, &result);
+ if (b)
+ return result > 0;
+ else
+ return TRUE;
+}
+
+
void softpipe_init_query_funcs(struct softpipe_context *softpipe )
{
softpipe->pipe.create_query = softpipe_create_query;
diff --git a/src/gallium/drivers/softpipe/sp_query.h b/src/gallium/drivers/softpipe/sp_query.h
index 05060a45759..736c033897e 100644
--- a/src/gallium/drivers/softpipe/sp_query.h
+++ b/src/gallium/drivers/softpipe/sp_query.h
@@ -32,6 +32,10 @@
#ifndef SP_QUERY_H
#define SP_QUERY_H
+extern boolean
+softpipe_check_render_cond(struct softpipe_context *sp);
+
+
struct softpipe_context;
extern void softpipe_init_query_funcs(struct softpipe_context * );
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 81fb7aa20c6..bd3532de4f4 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -58,7 +58,9 @@ softpipe_get_param(struct pipe_screen *screen, int param)
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
return PIPE_MAX_SAMPLERS;
case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
- return PIPE_MAX_SAMPLERS;
+ return PIPE_MAX_VERTEX_SAMPLERS;
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS;
case PIPE_CAP_NPOT_TEXTURES:
return 1;
case PIPE_CAP_TWO_SIDED_STENCIL:
@@ -143,6 +145,11 @@ softpipe_is_format_supported( struct pipe_screen *screen,
case PIPE_FORMAT_DXT3_RGBA:
case PIPE_FORMAT_DXT5_RGBA:
case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_R8G8_SNORM:
+ case PIPE_FORMAT_B6UG5SR5S_NORM:
+ case PIPE_FORMAT_X8UB8UG8SR8S_NORM:
+ case PIPE_FORMAT_A8B8G8R8_SNORM:
+ case PIPE_FORMAT_NONE:
return FALSE;
default:
return TRUE;
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index e55e209fd15..3da75364c5d 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -87,6 +87,8 @@ struct setup_context {
float oneoverarea;
int facing;
+ float pixel_offset;
+
struct quad_header quad[MAX_QUADS];
struct quad_header *quad_ptrs[MAX_QUADS];
unsigned count;
@@ -106,6 +108,7 @@ struct setup_context {
#endif
unsigned winding; /* which winding to cull */
+ unsigned nr_vertex_attrs;
};
@@ -268,8 +271,8 @@ static void print_vertex(const struct setup_context *setup,
const float (*v)[4])
{
int i;
- debug_printf(" Vertex: (%p)\n", v);
- for (i = 0; i < setup->quad[0].nr_attrs; i++) {
+ debug_printf(" Vertex: (%p)\n", (void *) v);
+ for (i = 0; i < setup->nr_vertex_attrs; i++) {
debug_printf(" %d: %f %f %f %f\n", i,
v[i][0], v[i][1], v[i][2], v[i][3]);
if (util_is_inf_or_nan(v[i][0])) {
@@ -378,6 +381,16 @@ static boolean setup_sort_vertices( struct setup_context *setup,
((det > 0.0) ^
(setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW));
+ /* Prepare pixel offset for rasterisation:
+ * - pixel center (0.5, 0.5) for GL, or
+ * - assume (0.0, 0.0) for other APIs.
+ */
+ if (setup->softpipe->rasterizer->gl_rasterization_rules) {
+ setup->pixel_offset = 0.5f;
+ } else {
+ setup->pixel_offset = 0.0f;
+ }
+
return TRUE;
}
@@ -426,7 +439,7 @@ static void tri_linear_coeff( struct setup_context *setup,
/* calculate a0 as the value which would be sampled for the
* fragment at (0,0), taking into account that we want to sample at
- * pixel centers, in other words (0.5, 0.5).
+ * pixel centers, in other words (pixel_offset, pixel_offset).
*
* this is neat but unfortunately not a good way to do things for
* triangles with very large values of dadx or dady as it will
@@ -437,8 +450,8 @@ static void tri_linear_coeff( struct setup_context *setup,
* instead - i'll switch to this later.
*/
coef->a0[i] = (setup->vmin[vertSlot][i] -
- (dadx * (setup->vmin[0][0] - 0.5f) +
- dady * (setup->vmin[0][1] - 0.5f)));
+ (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+ dady * (setup->vmin[0][1] - setup->pixel_offset)));
/*
debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
@@ -486,8 +499,8 @@ static void tri_persp_coeff( struct setup_context *setup,
coef->dadx[i] = dadx;
coef->dady[i] = dady;
coef->a0[i] = (mina -
- (dadx * (setup->vmin[0][0] - 0.5f) +
- dady * (setup->vmin[0][1] - 0.5f)));
+ (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+ dady * (setup->vmin[0][1] - setup->pixel_offset)));
}
@@ -574,12 +587,12 @@ static void setup_tri_coefficients( struct setup_context *setup )
static void setup_tri_edges( struct setup_context *setup )
{
- float vmin_x = setup->vmin[0][0] + 0.5f;
- float vmid_x = setup->vmid[0][0] + 0.5f;
+ float vmin_x = setup->vmin[0][0] + setup->pixel_offset;
+ float vmid_x = setup->vmid[0][0] + setup->pixel_offset;
- float vmin_y = setup->vmin[0][1] - 0.5f;
- float vmid_y = setup->vmid[0][1] - 0.5f;
- float vmax_y = setup->vmax[0][1] - 0.5f;
+ float vmin_y = setup->vmin[0][1] - setup->pixel_offset;
+ float vmid_y = setup->vmid[0][1] - setup->pixel_offset;
+ float vmax_y = setup->vmax[0][1] - setup->pixel_offset;
setup->emaj.sy = ceilf(vmin_y);
setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy);
@@ -779,8 +792,8 @@ line_linear_coeff(const struct setup_context *setup,
coef->dadx[i] = dadx;
coef->dady[i] = dady;
coef->a0[i] = (setup->vmin[vertSlot][i] -
- (dadx * (setup->vmin[0][0] - 0.5f) +
- dady * (setup->vmin[0][1] - 0.5f)));
+ (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+ dady * (setup->vmin[0][1] - setup->pixel_offset)));
}
@@ -802,8 +815,8 @@ line_persp_coeff(const struct setup_context *setup,
coef->dadx[i] = dadx;
coef->dady[i] = dady;
coef->a0[i] = (setup->vmin[vertSlot][i] -
- (dadx * (setup->vmin[0][0] - 0.5f) +
- dady * (setup->vmin[0][1] - 0.5f)));
+ (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+ dady * (setup->vmin[0][1] - setup->pixel_offset)));
}
@@ -1254,6 +1267,9 @@ void sp_setup_prepare( struct setup_context *setup )
softpipe_update_derived(sp);
}
+ /* Note: nr_attrs is only used for debugging (vertex printing) */
+ setup->nr_vertex_attrs = draw_num_shader_outputs(sp->draw);
+
sp->quad.first->begin( sp->quad.first );
if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES &&
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index 77ee3c1136b..9b18dac67bd 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -50,6 +50,7 @@
#define SP_NEW_VERTEX 0x1000
#define SP_NEW_VS 0x2000
#define SP_NEW_QUERY 0x4000
+#define SP_NEW_GS 0x8000
struct tgsi_sampler;
@@ -90,6 +91,11 @@ struct sp_vertex_shader {
int max_sampler; /* -1 if no samplers */
};
+/** Subclass of pipe_shader_state */
+struct sp_geometry_shader {
+ struct pipe_shader_state shader;
+ struct draw_geometry_shader *draw_data;
+};
void *
@@ -104,6 +110,10 @@ void *
softpipe_create_sampler_state(struct pipe_context *,
const struct pipe_sampler_state *);
void softpipe_bind_sampler_states(struct pipe_context *, unsigned, void **);
+void
+softpipe_bind_vertex_sampler_states(struct pipe_context *,
+ unsigned num_samplers,
+ void **samplers);
void softpipe_delete_sampler_state(struct pipe_context *, void *);
void *
@@ -139,6 +149,10 @@ void *softpipe_create_vs_state(struct pipe_context *,
const struct pipe_shader_state *);
void softpipe_bind_vs_state(struct pipe_context *, void *);
void softpipe_delete_vs_state(struct pipe_context *, void *);
+void *softpipe_create_gs_state(struct pipe_context *,
+ const struct pipe_shader_state *);
+void softpipe_bind_gs_state(struct pipe_context *, void *);
+void softpipe_delete_gs_state(struct pipe_context *, void *);
void softpipe_set_polygon_stipple( struct pipe_context *,
const struct pipe_poly_stipple * );
@@ -150,6 +164,11 @@ void softpipe_set_sampler_textures( struct pipe_context *,
unsigned num,
struct pipe_texture ** );
+void
+softpipe_set_vertex_sampler_textures(struct pipe_context *,
+ unsigned num_textures,
+ struct pipe_texture **);
+
void softpipe_set_viewport_state( struct pipe_context *,
const struct pipe_viewport_state * );
@@ -165,14 +184,14 @@ void softpipe_set_vertex_buffers(struct pipe_context *,
void softpipe_update_derived( struct softpipe_context *softpipe );
-boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
- unsigned start, unsigned count);
+void softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
+ unsigned start, unsigned count);
-boolean softpipe_draw_elements(struct pipe_context *pipe,
- struct pipe_buffer *indexBuffer,
- unsigned indexSize,
- unsigned mode, unsigned start, unsigned count);
-boolean
+void softpipe_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count);
+void
softpipe_draw_range_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer,
unsigned indexSize,
@@ -181,10 +200,6 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count);
void
-softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags);
-
-
-void
softpipe_map_transfers(struct softpipe_context *sp);
void
diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c
index efed082f823..95ab3234337 100644
--- a/src/gallium/drivers/softpipe/sp_state_blend.c
+++ b/src/gallium/drivers/softpipe/sp_state_blend.c
@@ -29,6 +29,7 @@
*/
#include "util/u_memory.h"
+#include "draw/draw_context.h"
#include "sp_context.h"
#include "sp_state.h"
@@ -45,6 +46,8 @@ void softpipe_bind_blend_state( struct pipe_context *pipe,
{
struct softpipe_context *softpipe = softpipe_context(pipe);
+ draw_flush(softpipe->draw);
+
softpipe->blend = (struct pipe_blend_state *)blend;
softpipe->dirty |= SP_NEW_BLEND;
@@ -62,6 +65,8 @@ void softpipe_set_blend_color( struct pipe_context *pipe,
{
struct softpipe_context *softpipe = softpipe_context(pipe);
+ draw_flush(softpipe->draw);
+
softpipe->blend_color = *blend_color;
softpipe->dirty |= SP_NEW_BLEND;
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index 1faeca1c2a3..f6856a5f691 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -66,10 +66,8 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
if (vinfo->num_attribs == 0) {
/* compute vertex layout now */
const struct sp_fragment_shader *spfs = softpipe->fs;
- const enum interp_mode colorInterp
- = softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf;
- const uint num = draw_num_vs_outputs(softpipe->draw);
+ const uint num = draw_current_shader_outputs(softpipe->draw);
uint i;
/* Tell draw_vbuf to simply emit the whole post-xform vertex
@@ -108,36 +106,24 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
switch (spfs->info.input_semantic_name[i]) {
case TGSI_SEMANTIC_POSITION:
- src = draw_find_vs_output(softpipe->draw,
- TGSI_SEMANTIC_POSITION, 0);
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src);
+ interp = INTERP_POS;
break;
case TGSI_SEMANTIC_COLOR:
- src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_COLOR,
- spfs->info.input_semantic_index[i]);
- draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
+ if (softpipe->rasterizer->flatshade) {
+ interp = INTERP_CONSTANT;
+ }
break;
-
- case TGSI_SEMANTIC_FOG:
- src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_FOG, 0);
- draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
- break;
-
- case TGSI_SEMANTIC_GENERIC:
- case TGSI_SEMANTIC_FACE:
- /* this includes texcoords and varying vars */
- src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_GENERIC,
- spfs->info.input_semantic_index[i]);
- draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
- break;
-
- default:
- assert(0);
}
+
+ /* this includes texcoords and varying vars */
+ src = draw_find_shader_output(softpipe->draw,
+ spfs->info.input_semantic_name[i],
+ spfs->info.input_semantic_index[i]);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
}
- softpipe->psize_slot = draw_find_vs_output(softpipe->draw,
+ softpipe->psize_slot = draw_find_shader_output(softpipe->draw,
TGSI_SEMANTIC_PSIZE, 0);
if (softpipe->psize_slot > 0) {
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT,
@@ -227,6 +213,19 @@ update_tgsi_samplers( struct softpipe_context *softpipe )
}
}
}
+
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ struct softpipe_tex_tile_cache *tc = softpipe->vertex_tex_cache[i];
+
+ if (tc->texture) {
+ struct softpipe_texture *spt = softpipe_texture(tc->texture);
+
+ if (spt->timestamp != tc->timestamp) {
+ sp_tex_tile_cache_validate_texture(tc);
+ tc->timestamp = spt->timestamp;
+ }
+ }
+ }
}
diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
index 256faa94b84..aa12bb215a8 100644
--- a/src/gallium/drivers/softpipe/sp_state_fs.c
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -69,7 +69,14 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
- softpipe->fs = (struct sp_fragment_shader *) fs;
+ draw_flush(softpipe->draw);
+
+ if (softpipe->fs == fs)
+ return;
+
+ draw_flush(softpipe->draw);
+
+ softpipe->fs = fs;
softpipe->dirty |= SP_NEW_FS;
}
@@ -143,6 +150,7 @@ softpipe_delete_vs_state(struct pipe_context *pipe, void *vs)
struct sp_vertex_shader *state = (struct sp_vertex_shader *) vs;
draw_delete_vertex_shader(softpipe->draw, state->draw_data);
+ FREE( (void *)state->shader.tokens );
FREE( state );
}
@@ -158,9 +166,75 @@ softpipe_set_constant_buffer(struct pipe_context *pipe,
assert(shader < PIPE_SHADER_TYPES);
assert(index == 0);
+ draw_flush(softpipe->draw);
+
/* note: reference counting */
pipe_buffer_reference(&softpipe->constants[shader].buffer,
buf ? buf->buffer : NULL);
softpipe->dirty |= SP_NEW_CONSTANTS;
}
+
+void *
+softpipe_create_gs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+ struct sp_geometry_shader *state;
+
+ state = CALLOC_STRUCT(sp_geometry_shader);
+ if (state == NULL )
+ goto fail;
+
+ /* debug */
+ if (softpipe->dump_gs)
+ tgsi_dump(templ->tokens, 0);
+
+ /* copy shader tokens, the ones passed in will go away.
+ */
+ state->shader.tokens = tgsi_dup_tokens(templ->tokens);
+ if (state->shader.tokens == NULL)
+ goto fail;
+
+ state->draw_data = draw_create_geometry_shader(softpipe->draw, templ);
+ if (state->draw_data == NULL)
+ goto fail;
+
+ return state;
+
+fail:
+ if (state) {
+ FREE( (void *)state->shader.tokens );
+ FREE( state->draw_data );
+ FREE( state );
+ }
+ return NULL;
+}
+
+
+void
+softpipe_bind_gs_state(struct pipe_context *pipe, void *gs)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+
+ softpipe->gs = (struct sp_geometry_shader *)gs;
+
+ draw_bind_geometry_shader(softpipe->draw,
+ (softpipe->gs ? softpipe->gs->draw_data : NULL));
+
+ softpipe->dirty |= SP_NEW_GS;
+}
+
+
+void
+softpipe_delete_gs_state(struct pipe_context *pipe, void *gs)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+
+ struct sp_geometry_shader *state =
+ (struct sp_geometry_shader *)gs;
+
+ draw_delete_geometry_shader(softpipe->draw,
+ (state) ? state->draw_data : 0);
+ FREE(state);
+}
diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c
index 87b72196838..a5b00336d44 100644
--- a/src/gallium/drivers/softpipe/sp_state_rasterizer.c
+++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c
@@ -41,14 +41,17 @@ softpipe_create_rasterizer_state(struct pipe_context *pipe,
}
void softpipe_bind_rasterizer_state(struct pipe_context *pipe,
- void *setup)
+ void *rasterizer)
{
struct softpipe_context *softpipe = softpipe_context(pipe);
+ if (softpipe->rasterizer == rasterizer)
+ return;
+
/* pass-through to draw module */
- draw_set_rasterizer_state(softpipe->draw, setup);
+ draw_set_rasterizer_state(softpipe->draw, rasterizer);
- softpipe->rasterizer = (struct pipe_rasterizer_state *)setup;
+ softpipe->rasterizer = rasterizer;
softpipe->dirty |= SP_NEW_RASTERIZER;
}
diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c
index db0b8ab76b1..ceb4e338f1a 100644
--- a/src/gallium/drivers/softpipe/sp_state_sampler.c
+++ b/src/gallium/drivers/softpipe/sp_state_sampler.c
@@ -94,6 +94,34 @@ softpipe_bind_sampler_states(struct pipe_context *pipe,
void
+softpipe_bind_vertex_sampler_states(struct pipe_context *pipe,
+ unsigned num_samplers,
+ void **samplers)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+ unsigned i;
+
+ assert(num_samplers <= PIPE_MAX_VERTEX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num_samplers == softpipe->num_vertex_samplers &&
+ !memcmp(softpipe->vertex_samplers, samplers, num_samplers * sizeof(void *)))
+ return;
+
+ draw_flush(softpipe->draw);
+
+ for (i = 0; i < num_samplers; ++i)
+ softpipe->vertex_samplers[i] = samplers[i];
+ for (i = num_samplers; i < PIPE_MAX_VERTEX_SAMPLERS; ++i)
+ softpipe->vertex_samplers[i] = NULL;
+
+ softpipe->num_vertex_samplers = num_samplers;
+
+ softpipe->dirty |= SP_NEW_SAMPLER;
+}
+
+
+void
softpipe_set_sampler_textures(struct pipe_context *pipe,
unsigned num, struct pipe_texture **texture)
{
@@ -122,6 +150,37 @@ softpipe_set_sampler_textures(struct pipe_context *pipe,
}
+void
+softpipe_set_vertex_sampler_textures(struct pipe_context *pipe,
+ unsigned num_textures,
+ struct pipe_texture **textures)
+{
+ struct softpipe_context *softpipe = softpipe_context(pipe);
+ uint i;
+
+ assert(num_textures <= PIPE_MAX_VERTEX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num_textures == softpipe->num_vertex_textures &&
+ !memcmp(softpipe->vertex_textures, textures, num_textures * sizeof(struct pipe_texture *))) {
+ return;
+ }
+
+ draw_flush(softpipe->draw);
+
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ struct pipe_texture *tex = i < num_textures ? textures[i] : NULL;
+
+ pipe_texture_reference(&softpipe->vertex_textures[i], tex);
+ sp_tex_tile_cache_set_texture(softpipe->vertex_tex_cache[i], tex);
+ }
+
+ softpipe->num_vertex_textures = num_textures;
+
+ softpipe->dirty |= SP_NEW_TEXTURE;
+}
+
+
/**
* Find/create an sp_sampler_varient object for sampling the given texture,
* sampler and tex unit.
@@ -185,16 +244,16 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe)
* fragment programs.
*/
for (i = 0; i <= softpipe->vs->max_sampler; i++) {
- if (softpipe->sampler[i]) {
+ if (softpipe->vertex_samplers[i]) {
softpipe->tgsi.vert_samplers_list[i] =
get_sampler_varient( i,
- sp_sampler(softpipe->sampler[i]),
- softpipe->texture[i],
+ sp_sampler(softpipe->vertex_samplers[i]),
+ softpipe->vertex_textures[i],
TGSI_PROCESSOR_VERTEX );
sp_sampler_varient_bind_texture( softpipe->tgsi.vert_samplers_list[i],
- softpipe->tex_cache[i],
- softpipe->texture[i] );
+ softpipe->vertex_tex_cache[i],
+ softpipe->vertex_textures[i] );
}
}
diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c
index bc0e2011300..f6154109ea8 100644
--- a/src/gallium/drivers/softpipe/sp_state_surface.c
+++ b/src/gallium/drivers/softpipe/sp_state_surface.c
@@ -35,6 +35,8 @@
#include "draw/draw_context.h"
+#include "util/u_format.h"
+
/**
* XXX this might get moved someday
@@ -49,6 +51,8 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe,
struct softpipe_context *sp = softpipe_context(pipe);
uint i;
+ draw_flush(sp->draw);
+
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
/* check if changing cbuf */
if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) {
@@ -80,8 +84,9 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe,
if (sp->framebuffer.zsbuf) {
int depth_bits;
double mrd;
- depth_bits = pf_get_component_bits(sp->framebuffer.zsbuf->format,
- PIPE_FORMAT_COMP_Z);
+ depth_bits = util_format_get_component_bits(sp->framebuffer.zsbuf->format,
+ UTIL_FORMAT_COLORSPACE_ZS,
+ 0);
if (depth_bits > 16) {
mrd = 0.0000001;
}
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index c22ee86b66c..1ae8fecacf7 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2,7 +2,7 @@
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
- * Copyright 2008 VMware, Inc. All rights reserved.
+ * Copyright 2008-2010 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -514,21 +514,15 @@ static float
compute_lambda_1d(const struct sp_sampler_varient *samp,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias)
+ const float p[QUAD_SIZE])
{
const struct pipe_texture *texture = samp->texture;
const struct pipe_sampler_state *sampler = samp->sampler;
float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
- float rho = MAX2(dsdx, dsdy) * texture->width[0];
- float lambda;
-
- lambda = util_fast_log2(rho);
- lambda += lodbias + sampler->lod_bias;
- lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
+ float rho = MAX2(dsdx, dsdy) * texture->width0;
- return lambda;
+ return util_fast_log2(rho);
}
@@ -536,8 +530,7 @@ static float
compute_lambda_2d(const struct sp_sampler_varient *samp,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias)
+ const float p[QUAD_SIZE])
{
const struct pipe_texture *texture = samp->texture;
const struct pipe_sampler_state *sampler = samp->sampler;
@@ -545,16 +538,11 @@ compute_lambda_2d(const struct sp_sampler_varient *samp,
float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]);
- float maxx = MAX2(dsdx, dsdy) * texture->width[0];
- float maxy = MAX2(dtdx, dtdy) * texture->height[0];
+ float maxx = MAX2(dsdx, dsdy) * texture->width0;
+ float maxy = MAX2(dtdx, dtdy) * texture->height0;
float rho = MAX2(maxx, maxy);
- float lambda;
- lambda = util_fast_log2(rho);
- lambda += lodbias + sampler->lod_bias;
- lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
-
- return lambda;
+ return util_fast_log2(rho);
}
@@ -562,8 +550,7 @@ static float
compute_lambda_3d(const struct sp_sampler_varient *samp,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias)
+ const float p[QUAD_SIZE])
{
const struct pipe_texture *texture = samp->texture;
const struct pipe_sampler_state *sampler = samp->sampler;
@@ -573,34 +560,29 @@ compute_lambda_3d(const struct sp_sampler_varient *samp,
float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]);
float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
float dpdy = fabsf(p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]);
- float maxx = MAX2(dsdx, dsdy) * texture->width[0];
- float maxy = MAX2(dtdx, dtdy) * texture->height[0];
- float maxz = MAX2(dpdx, dpdy) * texture->depth[0];
- float rho, lambda;
+ float maxx = MAX2(dsdx, dsdy) * texture->width0;
+ float maxy = MAX2(dtdx, dtdy) * texture->height0;
+ float maxz = MAX2(dpdx, dpdy) * texture->depth0;
+ float rho;
rho = MAX2(maxx, maxy);
rho = MAX2(rho, maxz);
- lambda = util_fast_log2(rho);
- lambda += lodbias + sampler->lod_bias;
- lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
-
- return lambda;
+ return util_fast_log2(rho);
}
/**
* Compute lambda for a vertex texture sampler.
- * Since there aren't derivatives to use, just return the LOD bias.
+ * Since there aren't derivatives to use, just return 0.
*/
static float
compute_lambda_vert(const struct sp_sampler_varient *samp,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias)
+ const float p[QUAD_SIZE])
{
- return lodbias;
+ return 0.0f;
}
@@ -644,8 +626,8 @@ get_texel_2d(const struct sp_sampler_varient *samp,
const struct pipe_texture *texture = samp->texture;
unsigned level = addr.bits.level;
- if (x < 0 || x >= (int) texture->width[level] ||
- y < 0 || y >= (int) texture->height[level]) {
+ if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
+ y < 0 || y >= (int) u_minify(texture->height0, level)) {
return samp->sampler->border_color;
}
else {
@@ -737,9 +719,9 @@ get_texel_3d(const struct sp_sampler_varient *samp,
const struct pipe_texture *texture = samp->texture;
unsigned level = addr.bits.level;
- if (x < 0 || x >= (int) texture->width[level] ||
- y < 0 || y >= (int) texture->height[level] ||
- z < 0 || z >= (int) texture->depth[level]) {
+ if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
+ y < 0 || y >= (int) u_minify(texture->height0, level) ||
+ z < 0 || z >= (int) u_minify(texture->depth0, level)) {
return samp->sampler->border_color;
}
else {
@@ -769,7 +751,8 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -827,7 +810,8 @@ img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -866,7 +850,8 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -914,7 +899,8 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -925,7 +911,7 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
union tex_tile_address addr;
level0 = samp->level;
- width = texture->width[level0];
+ width = u_minify(texture->width0, level0);
assert(width > 0);
@@ -949,7 +935,8 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -961,8 +948,8 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
level0 = samp->level;
- width = texture->width[level0];
- height = texture->height[level0];
+ width = u_minify(texture->width0, level0);
+ height = u_minify(texture->height0, level0);
assert(width > 0);
assert(height > 0);
@@ -996,7 +983,8 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1008,8 +996,8 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
union tex_tile_address addr;
level0 = samp->level;
- width = texture->width[level0];
- height = texture->height[level0];
+ width = u_minify(texture->width0, level0);
+ height = u_minify(texture->height0, level0);
assert(width > 0);
assert(height > 0);
@@ -1035,7 +1023,8 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1046,9 +1035,9 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
union tex_tile_address addr;
level0 = samp->level;
- width = texture->width[level0];
- height = texture->height[level0];
- depth = texture->depth[level0];
+ width = u_minify(texture->width0, level0);
+ height = u_minify(texture->height0, level0);
+ depth = u_minify(texture->depth0, level0);
assert(width > 0);
assert(height > 0);
@@ -1076,7 +1065,8 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1088,7 +1078,7 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
union tex_tile_address addr;
level0 = samp->level;
- width = texture->width[level0];
+ width = u_minify(texture->width0, level0);
assert(width > 0);
@@ -1115,7 +1105,8 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1127,8 +1118,8 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
union tex_tile_address addr;
level0 = samp->level;
- width = texture->width[level0];
- height = texture->height[level0];
+ width = u_minify(texture->width0, level0);
+ height = u_minify(texture->height0, level0);
assert(width > 0);
assert(height > 0);
@@ -1161,7 +1152,8 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1174,8 +1166,8 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
union tex_tile_address addr;
level0 = samp->level;
- width = texture->width[level0];
- height = texture->height[level0];
+ width = u_minify(texture->width0, level0);
+ height = u_minify(texture->height0, level0);
assert(width > 0);
assert(height > 0);
@@ -1209,7 +1201,8 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1221,9 +1214,9 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
union tex_tile_address addr;
level0 = samp->level;
- width = texture->width[level0];
- height = texture->height[level0];
- depth = texture->depth[level0];
+ width = u_minify(texture->width0, level0);
+ height = u_minify(texture->height0, level0);
+ depth = u_minify(texture->depth0, level0);
addr.value = 0;
addr.bits.level = level0;
@@ -1261,29 +1254,60 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
}
+/* Calculate level of detail for every fragment.
+ * Note that lambda has already been biased by global LOD bias.
+ */
+static INLINE void
+compute_lod(const struct pipe_sampler_state *sampler,
+ const float biased_lambda,
+ const float lodbias[QUAD_SIZE],
+ float lod[QUAD_SIZE])
+{
+ uint i;
+
+ for (i = 0; i < QUAD_SIZE; i++) {
+ lod[i] = biased_lambda + lodbias[i];
+ lod[i] = CLAMP(lod[i], sampler->min_lod, sampler->max_lod);
+ }
+}
+
+
static void
mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
const struct pipe_texture *texture = samp->texture;
int level0;
float lambda;
+ float lod[QUAD_SIZE];
+
+ if (control == tgsi_sampler_lod_bias) {
+ lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
+ compute_lod(samp->sampler, lambda, c0, lod);
+ } else {
+ assert(control == tgsi_sampler_lod_explicit);
- lambda = samp->compute_lambda(samp, s, t, p, lodbias);
+ memcpy(lod, c0, sizeof(lod));
+ }
+
+ /* XXX: Take into account all lod values.
+ */
+ lambda = lod[0];
level0 = (int)lambda;
if (lambda < 0.0) {
samp->level = 0;
- samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+ samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
}
else if (level0 >= texture->last_level) {
samp->level = texture->last_level;
- samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+ samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
}
else {
float levelBlend = lambda - level0;
@@ -1292,10 +1316,10 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
int c,j;
samp->level = level0;
- samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba0 );
+ samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba0);
samp->level = level0+1;
- samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba1 );
+ samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba1);
for (j = 0; j < QUAD_SIZE; j++) {
for (c = 0; c < 4; c++) {
@@ -1311,23 +1335,36 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
const struct pipe_texture *texture = samp->texture;
float lambda;
+ float lod[QUAD_SIZE];
- lambda = samp->compute_lambda(samp, s, t, p, lodbias);
+ if (control == tgsi_sampler_lod_bias) {
+ lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
+ compute_lod(samp->sampler, lambda, c0, lod);
+ } else {
+ assert(control == tgsi_sampler_lod_explicit);
+
+ memcpy(lod, c0, sizeof(lod));
+ }
+
+ /* XXX: Take into account all lod values.
+ */
+ lambda = lod[0];
if (lambda < 0.0) {
samp->level = 0;
- samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+ samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
}
else {
samp->level = (int)(lambda + 0.5) ;
samp->level = MIN2(samp->level, (int)texture->last_level);
- samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+ samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
}
#if 0
@@ -1345,17 +1382,32 @@ mip_filter_none(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
- float lambda = samp->compute_lambda(samp, s, t, p, lodbias);
+ float lambda;
+ float lod[QUAD_SIZE];
+
+ if (control == tgsi_sampler_lod_bias) {
+ lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
+ compute_lod(samp->sampler, lambda, c0, lod);
+ } else {
+ assert(control == tgsi_sampler_lod_explicit);
+
+ memcpy(lod, c0, sizeof(lod));
+ }
+
+ /* XXX: Take into account all lod values.
+ */
+ lambda = lod[0];
if (lambda < 0.0) {
- samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+ samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
}
else {
- samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+ samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
}
}
@@ -1371,15 +1423,28 @@ mip_filter_linear_2d_linear_repeat_POT(
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
const struct pipe_texture *texture = samp->texture;
int level0;
float lambda;
+ float lod[QUAD_SIZE];
- lambda = compute_lambda_2d(samp, s, t, p, lodbias);
+ if (control == tgsi_sampler_lod_bias) {
+ lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
+ compute_lod(samp->sampler, lambda, c0, lod);
+ } else {
+ assert(control == tgsi_sampler_lod_explicit);
+
+ memcpy(lod, c0, sizeof(lod));
+ }
+
+ /* XXX: Take into account all lod values.
+ */
+ lambda = lod[0];
level0 = (int)lambda;
/* Catches both negative and large values of level0:
@@ -1390,7 +1455,7 @@ mip_filter_linear_2d_linear_repeat_POT(
else
samp->level = texture->last_level;
- img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba );
+ img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
}
else {
float levelBlend = lambda - level0;
@@ -1399,10 +1464,10 @@ mip_filter_linear_2d_linear_repeat_POT(
int c,j;
samp->level = level0;
- img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba0 );
+ img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba0);
samp->level = level0+1;
- img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba1 );
+ img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba1);
for (j = 0; j < QUAD_SIZE; j++) {
for (c = 0; c < 4; c++) {
@@ -1422,7 +1487,8 @@ sample_compare(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1430,7 +1496,7 @@ sample_compare(struct tgsi_sampler *tgsi_sampler,
int j, k0, k1, k2, k3;
float val;
- samp->mip_filter( tgsi_sampler, s, t, p, lodbias, rgba );
+ samp->mip_filter(tgsi_sampler, s, t, p, c0, control, rgba);
/**
* Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
@@ -1508,7 +1574,8 @@ sample_cube(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE])
{
struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1589,7 +1656,7 @@ sample_cube(struct tgsi_sampler *tgsi_sampler,
* is not active, this will point somewhere deeper into the
* pipeline, eg. to mip_filter or even img_filter.
*/
- samp->compare(tgsi_sampler, ssss, tttt, NULL, lodbias, rgba);
+ samp->compare(tgsi_sampler, ssss, tttt, NULL, c0, control, rgba);
}
@@ -1778,8 +1845,8 @@ sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp,
samp->texture = texture;
samp->cache = tex_cache;
- samp->xpot = util_unsigned_logbase2( texture->width[0] );
- samp->ypot = util_unsigned_logbase2( texture->height[0] );
+ samp->xpot = util_unsigned_logbase2( texture->width0 );
+ samp->ypot = util_unsigned_logbase2( texture->height0 );
samp->level = CLAMP((int) sampler->min_lod, 0, (int) texture->last_level);
}
@@ -1862,7 +1929,7 @@ sp_create_sampler_varient( const struct pipe_sampler_state *sampler,
break;
}
- if (sampler->compare_mode != FALSE) {
+ if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
samp->compare = sample_compare;
}
else {
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index b0797711d37..b6e66c998ae 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -2,6 +2,7 @@
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
+ * Copyright 2010 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -46,14 +47,14 @@ typedef void (*wrap_linear_func)(const float s[4],
typedef float (*compute_lambda_func)(const struct sp_sampler_varient *sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias);
+ const float p[QUAD_SIZE]);
typedef void (*filter_func)(struct tgsi_sampler *tgsi_sampler,
const float s[QUAD_SIZE],
const float t[QUAD_SIZE],
const float p[QUAD_SIZE],
- float lodbias,
+ const float c0[QUAD_SIZE],
+ enum tgsi_sampler_control control,
float rgba[NUM_CHANNELS][QUAD_SIZE]);
diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
index 407a22a9f4b..e50a76a73bc 100644
--- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
@@ -35,6 +35,7 @@
#include "pipe/p_inlines.h"
#include "util/u_memory.h"
#include "util/u_tile.h"
+#include "util/u_math.h"
#include "sp_context.h"
#include "sp_surface.h"
#include "sp_texture.h"
@@ -246,9 +247,9 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
addr.bits.level,
addr.bits.z,
PIPE_TRANSFER_READ, 0, 0,
- tc->texture->width[addr.bits.level],
- tc->texture->height[addr.bits.level]);
-
+ u_minify(tc->texture->width0, addr.bits.level),
+ u_minify(tc->texture->height0, addr.bits.level));
+
tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans);
tc->tex_face = addr.bits.face;
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index 7caf2928b4b..a9436a33942 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -32,6 +32,8 @@
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
+
+#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
@@ -52,29 +54,28 @@ softpipe_texture_layout(struct pipe_screen *screen,
{
struct pipe_texture *pt = &spt->base;
unsigned level;
- unsigned width = pt->width[0];
- unsigned height = pt->height[0];
- unsigned depth = pt->depth[0];
+ unsigned width = pt->width0;
+ unsigned height = pt->height0;
+ unsigned depth = pt->depth0;
unsigned buffer_size = 0;
+ pt->width0 = width;
+ pt->height0 = height;
+ pt->depth0 = depth;
+
for (level = 0; level <= pt->last_level; level++) {
- pt->width[level] = width;
- pt->height[level] = height;
- pt->depth[level] = depth;
- pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width);
- pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height);
- spt->stride[level] = pt->nblocksx[level]*pt->block.size;
+ spt->stride[level] = util_format_get_stride(pt->format, width);
spt->level_offset[level] = buffer_size;
- buffer_size += (pt->nblocksy[level] *
+ buffer_size += (util_format_get_nblocksy(pt->format, height) *
((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) *
spt->stride[level]);
- width = minify(width);
- height = minify(height);
- depth = minify(depth);
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ depth = u_minify(depth, 1);
}
spt->buffer = screen->buffer_create(screen, 32,
@@ -96,12 +97,9 @@ softpipe_displaytarget_layout(struct pipe_screen *screen,
PIPE_BUFFER_USAGE_GPU_READ_WRITE);
unsigned tex_usage = spt->base.tex_usage;
- spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]);
- spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]);
-
spt->buffer = screen->surface_buffer_create( screen,
- spt->base.width[0],
- spt->base.height[0],
+ spt->base.width0,
+ spt->base.height0,
spt->base.format,
usage,
tex_usage,
@@ -126,9 +124,9 @@ softpipe_texture_create(struct pipe_screen *screen,
pipe_reference_init(&spt->base.reference, 1);
spt->base.screen = screen;
- spt->pot = (util_is_power_of_two(template->width[0]) &&
- util_is_power_of_two(template->height[0]) &&
- util_is_power_of_two(template->depth[0]));
+ spt->pot = (util_is_power_of_two(template->width0) &&
+ util_is_power_of_two(template->height0) &&
+ util_is_power_of_two(template->depth0));
if (spt->base.tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
PIPE_TEXTURE_USAGE_PRIMARY)) {
@@ -163,7 +161,7 @@ softpipe_texture_blanket(struct pipe_screen * screen,
/* Only supports one type */
if (base->target != PIPE_TEXTURE_2D ||
base->last_level != 0 ||
- base->depth[0] != 1) {
+ base->depth0 != 1) {
return NULL;
}
@@ -174,8 +172,6 @@ softpipe_texture_blanket(struct pipe_screen * screen,
spt->base = *base;
pipe_reference_init(&spt->base.reference, 1);
spt->base.screen = screen;
- spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]);
- spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]);
spt->stride[0] = stride[0];
pipe_buffer_reference(&spt->buffer, buffer);
@@ -213,8 +209,8 @@ softpipe_get_tex_surface(struct pipe_screen *screen,
pipe_reference_init(&ps->reference, 1);
pipe_texture_reference(&ps->texture, pt);
ps->format = pt->format;
- ps->width = pt->width[level];
- ps->height = pt->height[level];
+ ps->width = u_minify(pt->width0, level);
+ ps->height = u_minify(pt->height0, level);
ps->offset = spt->level_offset[level];
ps->usage = usage;
@@ -243,10 +239,12 @@ softpipe_get_tex_surface(struct pipe_screen *screen,
ps->zslice = zslice;
if (pt->target == PIPE_TEXTURE_CUBE) {
- ps->offset += face * pt->nblocksy[level] * spt->stride[level];
+ ps->offset += face * util_format_get_nblocksy(pt->format, u_minify(pt->height0, level)) *
+ spt->stride[level];
}
else if (pt->target == PIPE_TEXTURE_3D) {
- ps->offset += zslice * pt->nblocksy[level] * spt->stride[level];
+ ps->offset += zslice * util_format_get_nblocksy(pt->format, u_minify(pt->height0, level)) *
+ spt->stride[level];
}
else {
assert(face == 0);
@@ -301,15 +299,12 @@ softpipe_get_tex_transfer(struct pipe_screen *screen,
spt = CALLOC_STRUCT(softpipe_transfer);
if (spt) {
struct pipe_transfer *pt = &spt->base;
+ int nblocksy = util_format_get_nblocksy(texture->format, u_minify(texture->height0, level));
pipe_texture_reference(&pt->texture, texture);
- pt->format = texture->format;
- pt->block = texture->block;
pt->x = x;
pt->y = y;
pt->width = w;
pt->height = h;
- pt->nblocksx = texture->nblocksx[level];
- pt->nblocksy = texture->nblocksy[level];
pt->stride = sptex->stride[level];
pt->usage = usage;
pt->face = face;
@@ -319,10 +314,10 @@ softpipe_get_tex_transfer(struct pipe_screen *screen,
spt->offset = sptex->level_offset[level];
if (texture->target == PIPE_TEXTURE_CUBE) {
- spt->offset += face * pt->nblocksy * pt->stride;
+ spt->offset += face * nblocksy * pt->stride;
}
else if (texture->target == PIPE_TEXTURE_3D) {
- spt->offset += zslice * pt->nblocksy * pt->stride;
+ spt->offset += zslice * nblocksy * pt->stride;
}
else {
assert(face == 0);
@@ -360,9 +355,11 @@ softpipe_transfer_map( struct pipe_screen *screen,
{
ubyte *map, *xfer_map;
struct softpipe_texture *spt;
+ enum pipe_format format;
assert(transfer->texture);
spt = softpipe_texture(transfer->texture);
+ format = transfer->texture->format;
map = pipe_buffer_map(screen, spt->buffer, pipe_transfer_buffer_flags(transfer));
if (map == NULL)
@@ -379,8 +376,8 @@ softpipe_transfer_map( struct pipe_screen *screen,
}
xfer_map = map + softpipe_transfer(transfer)->offset +
- transfer->y / transfer->block.height * transfer->stride +
- transfer->x / transfer->block.width * transfer->block.size;
+ transfer->y / util_format_get_blockheight(format) * transfer->stride +
+ transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
/*printf("map = %p xfer map = %p\n", map, xfer_map);*/
return xfer_map;
}
@@ -434,10 +431,9 @@ softpipe_video_surface_create(struct pipe_screen *screen,
template.format = PIPE_FORMAT_X8R8G8B8_UNORM;
template.last_level = 0;
/* vl_mpeg12_mc_renderer expects this when it's initialized with pot_buffers=true */
- template.width[0] = util_next_power_of_two(width);
- template.height[0] = util_next_power_of_two(height);
- template.depth[0] = 1;
- pf_get_block(template.format, &template.block);
+ template.width0 = util_next_power_of_two(width);
+ template.height0 = util_next_power_of_two(height);
+ template.depth0 = 1;
template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET;
sp_vsfc->tex = screen->texture_create(screen, &template);
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
index 83fb4e0d151..112a6fe0cf3 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -33,6 +33,7 @@
*/
#include "pipe/p_inlines.h"
+#include "util/u_format.h"
#include "util/u_memory.h"
#include "util/u_tile.h"
#include "sp_tile_cache.h"
@@ -97,7 +98,12 @@ sp_create_tile_cache( struct pipe_screen *screen )
}
tc->last_tile = &tc->entries[0]; /* any tile */
-#if TILE_CLEAR_OPTIMIZATION
+ /* XXX this code prevents valgrind warnings about use of uninitialized
+ * memory in programs that don't clear the surface before rendering.
+ * However, it breaks clearing in other situations (such as in
+ * progs/tests/drawbuffers, see bug 24402).
+ */
+#if 0 && TILE_CLEAR_OPTIMIZATION
/* set flags to indicate all the tiles are cleared */
memset(tc->clear_flags, 255, sizeof(tc->clear_flags));
#endif
@@ -233,7 +239,7 @@ clear_tile(struct softpipe_cached_tile *tile,
{
uint i, j;
- switch (pf_get_size(format)) {
+ switch (util_format_get_blocksize(format)) {
case 1:
memset(tile->data.any, clear_value, TILE_SIZE * TILE_SIZE);
break;
@@ -279,8 +285,9 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc)
uint x, y;
uint numCleared = 0;
+ assert(pt->texture);
/* clear the scratch tile to the clear value */
- clear_tile(&tc->tile, pt->format, tc->clear_val);
+ clear_tile(&tc->tile, pt->texture->format, tc->clear_val);
/* push the tile to all positions marked as clear */
for (y = 0; y < h; y += TILE_SIZE) {
@@ -367,6 +374,7 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc,
if (addr.value != tile->addr.value) {
+ assert(pt->texture);
if (tile->addr.bits.invalid == 0) {
/* put dirty tile back in framebuffer */
if (tc->depth_stencil) {
@@ -390,10 +398,10 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc,
if (is_clear_flag_set(tc->clear_flags, addr)) {
/* don't get tile from framebuffer, just clear it */
if (tc->depth_stencil) {
- clear_tile(tile, pt->format, tc->clear_val);
+ clear_tile(tile, pt->texture->format, tc->clear_val);
}
else {
- clear_tile_rgba(tile, pt->format, tc->clear_color);
+ clear_tile_rgba(tile, pt->texture->format, tc->clear_color);
}
clear_clear_flag(tc->clear_flags, addr);
}
diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_winsys.h
index 9e571862b75..f203ded29ee 100644
--- a/src/gallium/drivers/softpipe/sp_winsys.h
+++ b/src/gallium/drivers/softpipe/sp_winsys.h
@@ -34,15 +34,17 @@
#ifndef SP_WINSYS_H
#define SP_WINSYS_H
-
#ifdef __cplusplus
extern "C" {
#endif
+#include "pipe/p_defines.h"
struct pipe_screen;
struct pipe_winsys;
struct pipe_context;
+struct pipe_texture;
+struct pipe_buffer;
struct pipe_context *softpipe_create( struct pipe_screen * );
diff --git a/src/gallium/drivers/svga/Makefile b/src/gallium/drivers/svga/Makefile
new file mode 100644
index 00000000000..f3619081875
--- /dev/null
+++ b/src/gallium/drivers/svga/Makefile
@@ -0,0 +1,60 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = svga
+
+C_SOURCES = \
+ svgadump/svga_shader_dump.c \
+ svgadump/svga_shader_op.c \
+ svgadump/svga_dump.c \
+ svga_cmd.c \
+ svga_context.c \
+ svga_draw.c \
+ svga_draw_arrays.c \
+ svga_draw_elements.c \
+ svga_pipe_blend.c \
+ svga_pipe_blit.c \
+ svga_pipe_clear.c \
+ svga_pipe_constants.c \
+ svga_pipe_depthstencil.c \
+ svga_pipe_draw.c \
+ svga_pipe_flush.c \
+ svga_pipe_fs.c \
+ svga_pipe_misc.c \
+ svga_pipe_query.c \
+ svga_pipe_rasterizer.c \
+ svga_pipe_sampler.c \
+ svga_pipe_vertex.c \
+ svga_pipe_vs.c \
+ svga_screen.c \
+ svga_screen_buffer.c \
+ svga_screen_texture.c \
+ svga_screen_cache.c \
+ svga_state.c \
+ svga_state_need_swtnl.c \
+ svga_state_constants.c \
+ svga_state_framebuffer.c \
+ svga_state_rss.c \
+ svga_state_tss.c \
+ svga_state_vdecl.c \
+ svga_state_fs.c \
+ svga_state_vs.c \
+ svga_swtnl_backend.c \
+ svga_swtnl_draw.c \
+ svga_swtnl_state.c \
+ svga_tgsi.c \
+ svga_tgsi_decl_sm20.c \
+ svga_tgsi_decl_sm30.c \
+ svga_tgsi_insn.c
+
+LIBRARY_INCLUDES = \
+ -I$(TOP)/src/gallium/drivers/svga/include
+
+# With linux-debug we get a lots of warnings, filter out the bad flags.
+CFLAGS := $(filter-out -pedantic, $(filter-out -ansi, $(CFLAGS)))
+
+LIBRARY_DEFINES = \
+ -std=gnu99 -fvisibility=hidden \
+ -DHAVE_STDINT_H -DHAVE_SYS_TYPES_H
+
+include ../../Makefile.template
diff --git a/src/gallium/drivers/svga/SConscript b/src/gallium/drivers/svga/SConscript
new file mode 100644
index 00000000000..737b791ceb0
--- /dev/null
+++ b/src/gallium/drivers/svga/SConscript
@@ -0,0 +1,72 @@
+Import('*')
+
+env = env.Clone()
+
+if env['platform'] in ['linux']:
+ env.Append(CCFLAGS = ['-fvisibility=hidden'])
+
+if env['gcc']:
+ env.Append(CPPDEFINES = [
+ 'HAVE_STDINT_H',
+ 'HAVE_SYS_TYPES_H',
+ ])
+
+env.Prepend(CPPPATH = [
+ 'include',
+])
+
+env.Append(CPPDEFINES = [
+])
+
+sources = [
+ 'svga_cmd.c',
+ 'svga_context.c',
+ 'svga_draw.c',
+ 'svga_draw_arrays.c',
+ 'svga_draw_elements.c',
+ 'svga_pipe_blend.c',
+ 'svga_pipe_blit.c',
+ 'svga_pipe_clear.c',
+ 'svga_pipe_constants.c',
+ 'svga_pipe_depthstencil.c',
+ 'svga_pipe_draw.c',
+ 'svga_pipe_flush.c',
+ 'svga_pipe_fs.c',
+ 'svga_pipe_misc.c',
+ 'svga_pipe_query.c',
+ 'svga_pipe_rasterizer.c',
+ 'svga_pipe_sampler.c',
+ 'svga_pipe_vertex.c',
+ 'svga_pipe_vs.c',
+ 'svga_screen.c',
+ 'svga_screen_buffer.c',
+ 'svga_screen_cache.c',
+ 'svga_screen_texture.c',
+ 'svga_state.c',
+ 'svga_state_constants.c',
+ 'svga_state_framebuffer.c',
+ 'svga_state_need_swtnl.c',
+ 'svga_state_rss.c',
+ 'svga_state_tss.c',
+ 'svga_state_vdecl.c',
+ 'svga_state_fs.c',
+ 'svga_state_vs.c',
+ 'svga_swtnl_backend.c',
+ 'svga_swtnl_draw.c',
+ 'svga_swtnl_state.c',
+ 'svga_tgsi.c',
+ 'svga_tgsi_decl_sm20.c',
+ 'svga_tgsi_decl_sm30.c',
+ 'svga_tgsi_insn.c',
+
+ 'svgadump/svga_dump.c',
+ 'svgadump/svga_shader_dump.c',
+ 'svgadump/svga_shader_op.c',
+]
+
+svga = env.ConvenienceLibrary(
+ target = 'svga',
+ source = sources,
+)
+
+Export('svga')
diff --git a/src/gallium/drivers/svga/include/README b/src/gallium/drivers/svga/include/README
new file mode 100644
index 00000000000..a0b8916104e
--- /dev/null
+++ b/src/gallium/drivers/svga/include/README
@@ -0,0 +1,3 @@
+This directory contains the headers from the VMware SVGA Device Developer Kit:
+
+ https://vmware-svga.svn.sourceforge.net/svnroot/vmware-svga/trunk/lib/vmware/
diff --git a/src/gallium/drivers/svga/include/svga3d_caps.h b/src/gallium/drivers/svga/include/svga3d_caps.h
new file mode 100644
index 00000000000..714ce9f45fb
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga3d_caps.h
@@ -0,0 +1,139 @@
+/**********************************************************
+ * Copyright 2007-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga3d_caps.h --
+ *
+ * Definitions for SVGA3D hardware capabilities. Capabilities
+ * are used to query for optional rendering features during
+ * driver initialization. The capability data is stored as very
+ * basic key/value dictionary within the "FIFO register" memory
+ * area at the beginning of BAR2.
+ *
+ * Note that these definitions are only for 3D capabilities.
+ * The SVGA device also has "device capabilities" and "FIFO
+ * capabilities", which are non-3D-specific and are stored as
+ * bitfields rather than key/value pairs.
+ */
+
+#ifndef _SVGA3D_CAPS_H_
+#define _SVGA3D_CAPS_H_
+
+#define SVGA_FIFO_3D_CAPS_SIZE (SVGA_FIFO_3D_CAPS_LAST - \
+ SVGA_FIFO_3D_CAPS + 1)
+
+
+/*
+ * SVGA3dCapsRecordType
+ *
+ * Record types that can be found in the caps block.
+ * Related record types are grouped together numerically so that
+ * SVGA3dCaps_FindRecord() can be applied on a range of record
+ * types.
+ */
+
+typedef enum {
+ SVGA3DCAPS_RECORD_UNKNOWN = 0,
+ SVGA3DCAPS_RECORD_DEVCAPS_MIN = 0x100,
+ SVGA3DCAPS_RECORD_DEVCAPS = 0x100,
+ SVGA3DCAPS_RECORD_DEVCAPS_MAX = 0x1ff,
+} SVGA3dCapsRecordType;
+
+
+/*
+ * SVGA3dCapsRecordHeader
+ *
+ * Header field leading each caps block record. Contains the offset (in
+ * register words, NOT bytes) to the next caps block record (or the end
+ * of caps block records which will be a zero word) and the record type
+ * as defined above.
+ */
+
+typedef
+struct SVGA3dCapsRecordHeader {
+ uint32 length;
+ SVGA3dCapsRecordType type;
+}
+SVGA3dCapsRecordHeader;
+
+
+/*
+ * SVGA3dCapsRecord
+ *
+ * Caps block record; "data" is a placeholder for the actual data structure
+ * contained within the record; for example a record containing a FOOBAR
+ * structure would be of size "sizeof(SVGA3dCapsRecordHeader) +
+ * sizeof(FOOBAR)".
+ */
+
+typedef
+struct SVGA3dCapsRecord {
+ SVGA3dCapsRecordHeader header;
+ uint32 data[1];
+}
+SVGA3dCapsRecord;
+
+
+typedef uint32 SVGA3dCapPair[2];
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3dCaps_FindRecord
+ *
+ * Finds the record with the highest-valued type within the given range
+ * in the caps block.
+ *
+ * Result: pointer to found record, or NULL if not found.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static INLINE SVGA3dCapsRecord *
+SVGA3dCaps_FindRecord(const uint32 *capsBlock,
+ SVGA3dCapsRecordType recordTypeMin,
+ SVGA3dCapsRecordType recordTypeMax)
+{
+ SVGA3dCapsRecord *record, *found = NULL;
+ uint32 offset;
+
+ /*
+ * Search linearly through the caps block records for the specified type.
+ */
+ for (offset = 0; capsBlock[offset] != 0; offset += capsBlock[offset]) {
+ record = (SVGA3dCapsRecord *) (capsBlock + offset);
+ if ((record->header.type >= recordTypeMin) &&
+ (record->header.type <= recordTypeMax) &&
+ (!found || (record->header.type > found->header.type))) {
+ found = record;
+ }
+ }
+
+ return found;
+}
+
+
+#endif // _SVGA3D_CAPS_H_
diff --git a/src/gallium/drivers/svga/include/svga3d_reg.h b/src/gallium/drivers/svga/include/svga3d_reg.h
new file mode 100644
index 00000000000..77cb4533100
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga3d_reg.h
@@ -0,0 +1,1793 @@
+/**********************************************************
+ * Copyright 1998-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga3d_reg.h --
+ *
+ * SVGA 3D hardware definitions
+ */
+
+#ifndef _SVGA3D_REG_H_
+#define _SVGA3D_REG_H_
+
+#include "svga_reg.h"
+
+
+/*
+ * 3D Hardware Version
+ *
+ * The hardware version is stored in the SVGA_FIFO_3D_HWVERSION fifo
+ * register. Is set by the host and read by the guest. This lets
+ * us make new guest drivers which are backwards-compatible with old
+ * SVGA hardware revisions. It does not let us support old guest
+ * drivers. Good enough for now.
+ *
+ */
+
+#define SVGA3D_MAKE_HWVERSION(major, minor) (((major) << 16) | ((minor) & 0xFF))
+#define SVGA3D_MAJOR_HWVERSION(version) ((version) >> 16)
+#define SVGA3D_MINOR_HWVERSION(version) ((version) & 0xFF)
+
+typedef enum {
+ SVGA3D_HWVERSION_WS5_RC1 = SVGA3D_MAKE_HWVERSION(0, 1),
+ SVGA3D_HWVERSION_WS5_RC2 = SVGA3D_MAKE_HWVERSION(0, 2),
+ SVGA3D_HWVERSION_WS51_RC1 = SVGA3D_MAKE_HWVERSION(0, 3),
+ SVGA3D_HWVERSION_WS6_B1 = SVGA3D_MAKE_HWVERSION(1, 1),
+ SVGA3D_HWVERSION_FUSION_11 = SVGA3D_MAKE_HWVERSION(1, 4),
+ SVGA3D_HWVERSION_WS65_B1 = SVGA3D_MAKE_HWVERSION(2, 0),
+ SVGA3D_HWVERSION_CURRENT = SVGA3D_HWVERSION_WS65_B1,
+} SVGA3dHardwareVersion;
+
+/*
+ * Generic Types
+ */
+
+typedef uint32 SVGA3dBool; /* 32-bit Bool definition */
+#define SVGA3D_NUM_CLIPPLANES 6
+#define SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS 8
+
+
+/*
+ * Surface formats.
+ *
+ * If you modify this list, be sure to keep GLUtil.c in sync. It
+ * includes the internal format definition of each surface in
+ * GLUtil_ConvertSurfaceFormat, and it contains a table of
+ * human-readable names in GLUtil_GetFormatName.
+ */
+
+typedef enum SVGA3dSurfaceFormat {
+ SVGA3D_FORMAT_INVALID = 0,
+
+ SVGA3D_X8R8G8B8 = 1,
+ SVGA3D_A8R8G8B8 = 2,
+
+ SVGA3D_R5G6B5 = 3,
+ SVGA3D_X1R5G5B5 = 4,
+ SVGA3D_A1R5G5B5 = 5,
+ SVGA3D_A4R4G4B4 = 6,
+
+ SVGA3D_Z_D32 = 7,
+ SVGA3D_Z_D16 = 8,
+ SVGA3D_Z_D24S8 = 9,
+ SVGA3D_Z_D15S1 = 10,
+
+ SVGA3D_LUMINANCE8 = 11,
+ SVGA3D_LUMINANCE4_ALPHA4 = 12,
+ SVGA3D_LUMINANCE16 = 13,
+ SVGA3D_LUMINANCE8_ALPHA8 = 14,
+
+ SVGA3D_DXT1 = 15,
+ SVGA3D_DXT2 = 16,
+ SVGA3D_DXT3 = 17,
+ SVGA3D_DXT4 = 18,
+ SVGA3D_DXT5 = 19,
+
+ SVGA3D_BUMPU8V8 = 20,
+ SVGA3D_BUMPL6V5U5 = 21,
+ SVGA3D_BUMPX8L8V8U8 = 22,
+ SVGA3D_BUMPL8V8U8 = 23,
+
+ SVGA3D_ARGB_S10E5 = 24, /* 16-bit floating-point ARGB */
+ SVGA3D_ARGB_S23E8 = 25, /* 32-bit floating-point ARGB */
+
+ SVGA3D_A2R10G10B10 = 26,
+
+ /* signed formats */
+ SVGA3D_V8U8 = 27,
+ SVGA3D_Q8W8V8U8 = 28,
+ SVGA3D_CxV8U8 = 29,
+
+ /* mixed formats */
+ SVGA3D_X8L8V8U8 = 30,
+ SVGA3D_A2W10V10U10 = 31,
+
+ SVGA3D_ALPHA8 = 32,
+
+ /* Single- and dual-component floating point formats */
+ SVGA3D_R_S10E5 = 33,
+ SVGA3D_R_S23E8 = 34,
+ SVGA3D_RG_S10E5 = 35,
+ SVGA3D_RG_S23E8 = 36,
+
+ /*
+ * Any surface can be used as a buffer object, but SVGA3D_BUFFER is
+ * the most efficient format to use when creating new surfaces
+ * expressly for index or vertex data.
+ */
+ SVGA3D_BUFFER = 37,
+
+ SVGA3D_Z_D24X8 = 38,
+
+ SVGA3D_V16U16 = 39,
+
+ SVGA3D_G16R16 = 40,
+ SVGA3D_A16B16G16R16 = 41,
+
+ /* Packed Video formats */
+ SVGA3D_UYVY = 42,
+ SVGA3D_YUY2 = 43,
+
+ SVGA3D_FORMAT_MAX
+} SVGA3dSurfaceFormat;
+
+typedef uint32 SVGA3dColor; /* a, r, g, b */
+
+/*
+ * These match the D3DFORMAT_OP definitions used by Direct3D. We need
+ * them so that we can query the host for what the supported surface
+ * operations are (when we're using the D3D backend, in particular),
+ * and so we can send those operations to the guest.
+ */
+typedef enum {
+ SVGA3DFORMAT_OP_TEXTURE = 0x00000001,
+ SVGA3DFORMAT_OP_VOLUMETEXTURE = 0x00000002,
+ SVGA3DFORMAT_OP_CUBETEXTURE = 0x00000004,
+ SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET = 0x00000008,
+ SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET = 0x00000010,
+ SVGA3DFORMAT_OP_ZSTENCIL = 0x00000040,
+ SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH = 0x00000080,
+
+/*
+ * This format can be used as a render target if the current display mode
+ * is the same depth if the alpha channel is ignored. e.g. if the device
+ * can render to A8R8G8B8 when the display mode is X8R8G8B8, then the
+ * format op list entry for A8R8G8B8 should have this cap.
+ */
+ SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET = 0x00000100,
+
+/*
+ * This format contains DirectDraw support (including Flip). This flag
+ * should not to be set on alpha formats.
+ */
+ SVGA3DFORMAT_OP_DISPLAYMODE = 0x00000400,
+
+/*
+ * The rasterizer can support some level of Direct3D support in this format
+ * and implies that the driver can create a Context in this mode (for some
+ * render target format). When this flag is set, the SVGA3DFORMAT_OP_DISPLAYMODE
+ * flag must also be set.
+ */
+ SVGA3DFORMAT_OP_3DACCELERATION = 0x00000800,
+
+/*
+ * This is set for a private format when the driver has put the bpp in
+ * the structure.
+ */
+ SVGA3DFORMAT_OP_PIXELSIZE = 0x00001000,
+
+/*
+ * Indicates that this format can be converted to any RGB format for which
+ * SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB is specified
+ */
+ SVGA3DFORMAT_OP_CONVERT_TO_ARGB = 0x00002000,
+
+/*
+ * Indicates that this format can be used to create offscreen plain surfaces.
+ */
+ SVGA3DFORMAT_OP_OFFSCREENPLAIN = 0x00004000,
+
+/*
+ * Indicated that this format can be read as an SRGB texture (meaning that the
+ * sampler will linearize the looked up data)
+ */
+ SVGA3DFORMAT_OP_SRGBREAD = 0x00008000,
+
+/*
+ * Indicates that this format can be used in the bumpmap instructions
+ */
+ SVGA3DFORMAT_OP_BUMPMAP = 0x00010000,
+
+/*
+ * Indicates that this format can be sampled by the displacement map sampler
+ */
+ SVGA3DFORMAT_OP_DMAP = 0x00020000,
+
+/*
+ * Indicates that this format cannot be used with texture filtering
+ */
+ SVGA3DFORMAT_OP_NOFILTER = 0x00040000,
+
+/*
+ * Indicates that format conversions are supported to this RGB format if
+ * SVGA3DFORMAT_OP_CONVERT_TO_ARGB is specified in the source format.
+ */
+ SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB = 0x00080000,
+
+/*
+ * Indicated that this format can be written as an SRGB target (meaning that the
+ * pixel pipe will DE-linearize data on output to format)
+ */
+ SVGA3DFORMAT_OP_SRGBWRITE = 0x00100000,
+
+/*
+ * Indicates that this format cannot be used with alpha blending
+ */
+ SVGA3DFORMAT_OP_NOALPHABLEND = 0x00200000,
+
+/*
+ * Indicates that the device can auto-generated sublevels for resources
+ * of this format
+ */
+ SVGA3DFORMAT_OP_AUTOGENMIPMAP = 0x00400000,
+
+/*
+ * Indicates that this format can be used by vertex texture sampler
+ */
+ SVGA3DFORMAT_OP_VERTEXTEXTURE = 0x00800000,
+
+/*
+ * Indicates that this format supports neither texture coordinate wrap
+ * modes, nor mipmapping
+ */
+ SVGA3DFORMAT_OP_NOTEXCOORDWRAPNORMIP = 0x01000000
+} SVGA3dFormatOp;
+
+/*
+ * This structure is a conversion of SVGA3DFORMAT_OP_*.
+ * Entries must be located at the same position.
+ */
+typedef union {
+ uint32 value;
+ struct {
+ uint32 texture : 1;
+ uint32 volumeTexture : 1;
+ uint32 cubeTexture : 1;
+ uint32 offscreenRenderTarget : 1;
+ uint32 sameFormatRenderTarget : 1;
+ uint32 unknown1 : 1;
+ uint32 zStencil : 1;
+ uint32 zStencilArbitraryDepth : 1;
+ uint32 sameFormatUpToAlpha : 1;
+ uint32 unknown2 : 1;
+ uint32 displayMode : 1;
+ uint32 acceleration3d : 1;
+ uint32 pixelSize : 1;
+ uint32 convertToARGB : 1;
+ uint32 offscreenPlain : 1;
+ uint32 sRGBRead : 1;
+ uint32 bumpMap : 1;
+ uint32 dmap : 1;
+ uint32 noFilter : 1;
+ uint32 memberOfGroupARGB : 1;
+ uint32 sRGBWrite : 1;
+ uint32 noAlphaBlend : 1;
+ uint32 autoGenMipMap : 1;
+ uint32 vertexTexture : 1;
+ uint32 noTexCoordWrapNorMip : 1;
+ };
+} SVGA3dSurfaceFormatCaps;
+
+/*
+ * SVGA_3D_CMD_SETRENDERSTATE Types. All value types
+ * must fit in a uint32.
+ */
+
+typedef enum {
+ SVGA3D_RS_INVALID = 0,
+ SVGA3D_RS_ZENABLE = 1, /* SVGA3dBool */
+ SVGA3D_RS_ZWRITEENABLE = 2, /* SVGA3dBool */
+ SVGA3D_RS_ALPHATESTENABLE = 3, /* SVGA3dBool */
+ SVGA3D_RS_DITHERENABLE = 4, /* SVGA3dBool */
+ SVGA3D_RS_BLENDENABLE = 5, /* SVGA3dBool */
+ SVGA3D_RS_FOGENABLE = 6, /* SVGA3dBool */
+ SVGA3D_RS_SPECULARENABLE = 7, /* SVGA3dBool */
+ SVGA3D_RS_STENCILENABLE = 8, /* SVGA3dBool */
+ SVGA3D_RS_LIGHTINGENABLE = 9, /* SVGA3dBool */
+ SVGA3D_RS_NORMALIZENORMALS = 10, /* SVGA3dBool */
+ SVGA3D_RS_POINTSPRITEENABLE = 11, /* SVGA3dBool */
+ SVGA3D_RS_POINTSCALEENABLE = 12, /* SVGA3dBool */
+ SVGA3D_RS_STENCILREF = 13, /* uint32 */
+ SVGA3D_RS_STENCILMASK = 14, /* uint32 */
+ SVGA3D_RS_STENCILWRITEMASK = 15, /* uint32 */
+ SVGA3D_RS_FOGSTART = 16, /* float */
+ SVGA3D_RS_FOGEND = 17, /* float */
+ SVGA3D_RS_FOGDENSITY = 18, /* float */
+ SVGA3D_RS_POINTSIZE = 19, /* float */
+ SVGA3D_RS_POINTSIZEMIN = 20, /* float */
+ SVGA3D_RS_POINTSIZEMAX = 21, /* float */
+ SVGA3D_RS_POINTSCALE_A = 22, /* float */
+ SVGA3D_RS_POINTSCALE_B = 23, /* float */
+ SVGA3D_RS_POINTSCALE_C = 24, /* float */
+ SVGA3D_RS_FOGCOLOR = 25, /* SVGA3dColor */
+ SVGA3D_RS_AMBIENT = 26, /* SVGA3dColor */
+ SVGA3D_RS_CLIPPLANEENABLE = 27, /* SVGA3dClipPlanes */
+ SVGA3D_RS_FOGMODE = 28, /* SVGA3dFogMode */
+ SVGA3D_RS_FILLMODE = 29, /* SVGA3dFillMode */
+ SVGA3D_RS_SHADEMODE = 30, /* SVGA3dShadeMode */
+ SVGA3D_RS_LINEPATTERN = 31, /* SVGA3dLinePattern */
+ SVGA3D_RS_SRCBLEND = 32, /* SVGA3dBlendOp */
+ SVGA3D_RS_DSTBLEND = 33, /* SVGA3dBlendOp */
+ SVGA3D_RS_BLENDEQUATION = 34, /* SVGA3dBlendEquation */
+ SVGA3D_RS_CULLMODE = 35, /* SVGA3dFace */
+ SVGA3D_RS_ZFUNC = 36, /* SVGA3dCmpFunc */
+ SVGA3D_RS_ALPHAFUNC = 37, /* SVGA3dCmpFunc */
+ SVGA3D_RS_STENCILFUNC = 38, /* SVGA3dCmpFunc */
+ SVGA3D_RS_STENCILFAIL = 39, /* SVGA3dStencilOp */
+ SVGA3D_RS_STENCILZFAIL = 40, /* SVGA3dStencilOp */
+ SVGA3D_RS_STENCILPASS = 41, /* SVGA3dStencilOp */
+ SVGA3D_RS_ALPHAREF = 42, /* float (0.0 .. 1.0) */
+ SVGA3D_RS_FRONTWINDING = 43, /* SVGA3dFrontWinding */
+ SVGA3D_RS_COORDINATETYPE = 44, /* SVGA3dCoordinateType */
+ SVGA3D_RS_ZBIAS = 45, /* float */
+ SVGA3D_RS_RANGEFOGENABLE = 46, /* SVGA3dBool */
+ SVGA3D_RS_COLORWRITEENABLE = 47, /* SVGA3dColorMask */
+ SVGA3D_RS_VERTEXMATERIALENABLE = 48, /* SVGA3dBool */
+ SVGA3D_RS_DIFFUSEMATERIALSOURCE = 49, /* SVGA3dVertexMaterial */
+ SVGA3D_RS_SPECULARMATERIALSOURCE = 50, /* SVGA3dVertexMaterial */
+ SVGA3D_RS_AMBIENTMATERIALSOURCE = 51, /* SVGA3dVertexMaterial */
+ SVGA3D_RS_EMISSIVEMATERIALSOURCE = 52, /* SVGA3dVertexMaterial */
+ SVGA3D_RS_TEXTUREFACTOR = 53, /* SVGA3dColor */
+ SVGA3D_RS_LOCALVIEWER = 54, /* SVGA3dBool */
+ SVGA3D_RS_SCISSORTESTENABLE = 55, /* SVGA3dBool */
+ SVGA3D_RS_BLENDCOLOR = 56, /* SVGA3dColor */
+ SVGA3D_RS_STENCILENABLE2SIDED = 57, /* SVGA3dBool */
+ SVGA3D_RS_CCWSTENCILFUNC = 58, /* SVGA3dCmpFunc */
+ SVGA3D_RS_CCWSTENCILFAIL = 59, /* SVGA3dStencilOp */
+ SVGA3D_RS_CCWSTENCILZFAIL = 60, /* SVGA3dStencilOp */
+ SVGA3D_RS_CCWSTENCILPASS = 61, /* SVGA3dStencilOp */
+ SVGA3D_RS_VERTEXBLEND = 62, /* SVGA3dVertexBlendFlags */
+ SVGA3D_RS_SLOPESCALEDEPTHBIAS = 63, /* float */
+ SVGA3D_RS_DEPTHBIAS = 64, /* float */
+
+
+ /*
+ * Output Gamma Level
+ *
+ * Output gamma effects the gamma curve of colors that are output from the
+ * rendering pipeline. A value of 1.0 specifies a linear color space. If the
+ * value is <= 0.0, gamma correction is ignored and linear color space is
+ * used.
+ */
+
+ SVGA3D_RS_OUTPUTGAMMA = 65, /* float */
+ SVGA3D_RS_ZVISIBLE = 66, /* SVGA3dBool */
+ SVGA3D_RS_LASTPIXEL = 67, /* SVGA3dBool */
+ SVGA3D_RS_CLIPPING = 68, /* SVGA3dBool */
+ SVGA3D_RS_WRAP0 = 69, /* SVGA3dWrapFlags */
+ SVGA3D_RS_WRAP1 = 70, /* SVGA3dWrapFlags */
+ SVGA3D_RS_WRAP2 = 71, /* SVGA3dWrapFlags */
+ SVGA3D_RS_WRAP3 = 72, /* SVGA3dWrapFlags */
+ SVGA3D_RS_WRAP4 = 73, /* SVGA3dWrapFlags */
+ SVGA3D_RS_WRAP5 = 74, /* SVGA3dWrapFlags */
+ SVGA3D_RS_WRAP6 = 75, /* SVGA3dWrapFlags */
+ SVGA3D_RS_WRAP7 = 76, /* SVGA3dWrapFlags */
+ SVGA3D_RS_WRAP8 = 77, /* SVGA3dWrapFlags */
+ SVGA3D_RS_WRAP9 = 78, /* SVGA3dWrapFlags */
+ SVGA3D_RS_WRAP10 = 79, /* SVGA3dWrapFlags */
+ SVGA3D_RS_WRAP11 = 80, /* SVGA3dWrapFlags */
+ SVGA3D_RS_WRAP12 = 81, /* SVGA3dWrapFlags */
+ SVGA3D_RS_WRAP13 = 82, /* SVGA3dWrapFlags */
+ SVGA3D_RS_WRAP14 = 83, /* SVGA3dWrapFlags */
+ SVGA3D_RS_WRAP15 = 84, /* SVGA3dWrapFlags */
+ SVGA3D_RS_MULTISAMPLEANTIALIAS = 85, /* SVGA3dBool */
+ SVGA3D_RS_MULTISAMPLEMASK = 86, /* uint32 */
+ SVGA3D_RS_INDEXEDVERTEXBLENDENABLE = 87, /* SVGA3dBool */
+ SVGA3D_RS_TWEENFACTOR = 88, /* float */
+ SVGA3D_RS_ANTIALIASEDLINEENABLE = 89, /* SVGA3dBool */
+ SVGA3D_RS_COLORWRITEENABLE1 = 90, /* SVGA3dColorMask */
+ SVGA3D_RS_COLORWRITEENABLE2 = 91, /* SVGA3dColorMask */
+ SVGA3D_RS_COLORWRITEENABLE3 = 92, /* SVGA3dColorMask */
+ SVGA3D_RS_SEPARATEALPHABLENDENABLE = 93, /* SVGA3dBool */
+ SVGA3D_RS_SRCBLENDALPHA = 94, /* SVGA3dBlendOp */
+ SVGA3D_RS_DSTBLENDALPHA = 95, /* SVGA3dBlendOp */
+ SVGA3D_RS_BLENDEQUATIONALPHA = 96, /* SVGA3dBlendEquation */
+ SVGA3D_RS_MAX
+} SVGA3dRenderStateName;
+
+typedef enum {
+ SVGA3D_VERTEXMATERIAL_NONE = 0, /* Use the value in the current material */
+ SVGA3D_VERTEXMATERIAL_DIFFUSE = 1, /* Use the value in the diffuse component */
+ SVGA3D_VERTEXMATERIAL_SPECULAR = 2, /* Use the value in the specular component */
+} SVGA3dVertexMaterial;
+
+typedef enum {
+ SVGA3D_FILLMODE_INVALID = 0,
+ SVGA3D_FILLMODE_POINT = 1,
+ SVGA3D_FILLMODE_LINE = 2,
+ SVGA3D_FILLMODE_FILL = 3,
+ SVGA3D_FILLMODE_MAX
+} SVGA3dFillModeType;
+
+
+typedef
+union {
+ struct {
+ uint16 mode; /* SVGA3dFillModeType */
+ uint16 face; /* SVGA3dFace */
+ };
+ uint32 uintValue;
+} SVGA3dFillMode;
+
+typedef enum {
+ SVGA3D_SHADEMODE_INVALID = 0,
+ SVGA3D_SHADEMODE_FLAT = 1,
+ SVGA3D_SHADEMODE_SMOOTH = 2,
+ SVGA3D_SHADEMODE_PHONG = 3, /* Not supported */
+ SVGA3D_SHADEMODE_MAX
+} SVGA3dShadeMode;
+
+typedef
+union {
+ struct {
+ uint16 repeat;
+ uint16 pattern;
+ };
+ uint32 uintValue;
+} SVGA3dLinePattern;
+
+typedef enum {
+ SVGA3D_BLENDOP_INVALID = 0,
+ SVGA3D_BLENDOP_ZERO = 1,
+ SVGA3D_BLENDOP_ONE = 2,
+ SVGA3D_BLENDOP_SRCCOLOR = 3,
+ SVGA3D_BLENDOP_INVSRCCOLOR = 4,
+ SVGA3D_BLENDOP_SRCALPHA = 5,
+ SVGA3D_BLENDOP_INVSRCALPHA = 6,
+ SVGA3D_BLENDOP_DESTALPHA = 7,
+ SVGA3D_BLENDOP_INVDESTALPHA = 8,
+ SVGA3D_BLENDOP_DESTCOLOR = 9,
+ SVGA3D_BLENDOP_INVDESTCOLOR = 10,
+ SVGA3D_BLENDOP_SRCALPHASAT = 11,
+ SVGA3D_BLENDOP_BLENDFACTOR = 12,
+ SVGA3D_BLENDOP_INVBLENDFACTOR = 13,
+ SVGA3D_BLENDOP_MAX
+} SVGA3dBlendOp;
+
+typedef enum {
+ SVGA3D_BLENDEQ_INVALID = 0,
+ SVGA3D_BLENDEQ_ADD = 1,
+ SVGA3D_BLENDEQ_SUBTRACT = 2,
+ SVGA3D_BLENDEQ_REVSUBTRACT = 3,
+ SVGA3D_BLENDEQ_MINIMUM = 4,
+ SVGA3D_BLENDEQ_MAXIMUM = 5,
+ SVGA3D_BLENDEQ_MAX
+} SVGA3dBlendEquation;
+
+typedef enum {
+ SVGA3D_FRONTWINDING_INVALID = 0,
+ SVGA3D_FRONTWINDING_CW = 1,
+ SVGA3D_FRONTWINDING_CCW = 2,
+ SVGA3D_FRONTWINDING_MAX
+} SVGA3dFrontWinding;
+
+typedef enum {
+ SVGA3D_FACE_INVALID = 0,
+ SVGA3D_FACE_NONE = 1,
+ SVGA3D_FACE_FRONT = 2,
+ SVGA3D_FACE_BACK = 3,
+ SVGA3D_FACE_FRONT_BACK = 4,
+ SVGA3D_FACE_MAX
+} SVGA3dFace;
+
+/*
+ * The order and the values should not be changed
+ */
+
+typedef enum {
+ SVGA3D_CMP_INVALID = 0,
+ SVGA3D_CMP_NEVER = 1,
+ SVGA3D_CMP_LESS = 2,
+ SVGA3D_CMP_EQUAL = 3,
+ SVGA3D_CMP_LESSEQUAL = 4,
+ SVGA3D_CMP_GREATER = 5,
+ SVGA3D_CMP_NOTEQUAL = 6,
+ SVGA3D_CMP_GREATEREQUAL = 7,
+ SVGA3D_CMP_ALWAYS = 8,
+ SVGA3D_CMP_MAX
+} SVGA3dCmpFunc;
+
+/*
+ * SVGA3D_FOGFUNC_* specifies the fog equation, or PER_VERTEX which allows
+ * the fog factor to be specified in the alpha component of the specular
+ * (a.k.a. secondary) vertex color.
+ */
+typedef enum {
+ SVGA3D_FOGFUNC_INVALID = 0,
+ SVGA3D_FOGFUNC_EXP = 1,
+ SVGA3D_FOGFUNC_EXP2 = 2,
+ SVGA3D_FOGFUNC_LINEAR = 3,
+ SVGA3D_FOGFUNC_PER_VERTEX = 4
+} SVGA3dFogFunction;
+
+/*
+ * SVGA3D_FOGTYPE_* specifies if fog factors are computed on a per-vertex
+ * or per-pixel basis.
+ */
+typedef enum {
+ SVGA3D_FOGTYPE_INVALID = 0,
+ SVGA3D_FOGTYPE_VERTEX = 1,
+ SVGA3D_FOGTYPE_PIXEL = 2,
+ SVGA3D_FOGTYPE_MAX = 3
+} SVGA3dFogType;
+
+/*
+ * SVGA3D_FOGBASE_* selects depth or range-based fog. Depth-based fog is
+ * computed using the eye Z value of each pixel (or vertex), whereas range-
+ * based fog is computed using the actual distance (range) to the eye.
+ */
+typedef enum {
+ SVGA3D_FOGBASE_INVALID = 0,
+ SVGA3D_FOGBASE_DEPTHBASED = 1,
+ SVGA3D_FOGBASE_RANGEBASED = 2,
+ SVGA3D_FOGBASE_MAX = 3
+} SVGA3dFogBase;
+
+typedef enum {
+ SVGA3D_STENCILOP_INVALID = 0,
+ SVGA3D_STENCILOP_KEEP = 1,
+ SVGA3D_STENCILOP_ZERO = 2,
+ SVGA3D_STENCILOP_REPLACE = 3,
+ SVGA3D_STENCILOP_INCRSAT = 4,
+ SVGA3D_STENCILOP_DECRSAT = 5,
+ SVGA3D_STENCILOP_INVERT = 6,
+ SVGA3D_STENCILOP_INCR = 7,
+ SVGA3D_STENCILOP_DECR = 8,
+ SVGA3D_STENCILOP_MAX
+} SVGA3dStencilOp;
+
+typedef enum {
+ SVGA3D_CLIPPLANE_0 = (1 << 0),
+ SVGA3D_CLIPPLANE_1 = (1 << 1),
+ SVGA3D_CLIPPLANE_2 = (1 << 2),
+ SVGA3D_CLIPPLANE_3 = (1 << 3),
+ SVGA3D_CLIPPLANE_4 = (1 << 4),
+ SVGA3D_CLIPPLANE_5 = (1 << 5),
+} SVGA3dClipPlanes;
+
+typedef enum {
+ SVGA3D_CLEAR_COLOR = 0x1,
+ SVGA3D_CLEAR_DEPTH = 0x2,
+ SVGA3D_CLEAR_STENCIL = 0x4
+} SVGA3dClearFlag;
+
+typedef enum {
+ SVGA3D_RT_DEPTH = 0,
+ SVGA3D_RT_STENCIL = 1,
+ SVGA3D_RT_COLOR0 = 2,
+ SVGA3D_RT_COLOR1 = 3,
+ SVGA3D_RT_COLOR2 = 4,
+ SVGA3D_RT_COLOR3 = 5,
+ SVGA3D_RT_COLOR4 = 6,
+ SVGA3D_RT_COLOR5 = 7,
+ SVGA3D_RT_COLOR6 = 8,
+ SVGA3D_RT_COLOR7 = 9,
+ SVGA3D_RT_MAX,
+ SVGA3D_RT_INVALID = ((uint32)-1),
+} SVGA3dRenderTargetType;
+
+#define SVGA3D_MAX_RT_COLOR (SVGA3D_RT_COLOR7 - SVGA3D_RT_COLOR0 + 1)
+
+typedef
+union {
+ struct {
+ uint32 red : 1;
+ uint32 green : 1;
+ uint32 blue : 1;
+ uint32 alpha : 1;
+ };
+ uint32 uintValue;
+} SVGA3dColorMask;
+
+typedef enum {
+ SVGA3D_VBLEND_DISABLE = 0,
+ SVGA3D_VBLEND_1WEIGHT = 1,
+ SVGA3D_VBLEND_2WEIGHT = 2,
+ SVGA3D_VBLEND_3WEIGHT = 3,
+} SVGA3dVertexBlendFlags;
+
+typedef enum {
+ SVGA3D_WRAPCOORD_0 = 1 << 0,
+ SVGA3D_WRAPCOORD_1 = 1 << 1,
+ SVGA3D_WRAPCOORD_2 = 1 << 2,
+ SVGA3D_WRAPCOORD_3 = 1 << 3,
+ SVGA3D_WRAPCOORD_ALL = 0xF,
+} SVGA3dWrapFlags;
+
+/*
+ * SVGA_3D_CMD_TEXTURESTATE Types. All value types
+ * must fit in a uint32.
+ */
+
+typedef enum {
+ SVGA3D_TS_INVALID = 0,
+ SVGA3D_TS_BIND_TEXTURE = 1, /* SVGA3dSurfaceId */
+ SVGA3D_TS_COLOROP = 2, /* SVGA3dTextureCombiner */
+ SVGA3D_TS_COLORARG1 = 3, /* SVGA3dTextureArgData */
+ SVGA3D_TS_COLORARG2 = 4, /* SVGA3dTextureArgData */
+ SVGA3D_TS_ALPHAOP = 5, /* SVGA3dTextureCombiner */
+ SVGA3D_TS_ALPHAARG1 = 6, /* SVGA3dTextureArgData */
+ SVGA3D_TS_ALPHAARG2 = 7, /* SVGA3dTextureArgData */
+ SVGA3D_TS_ADDRESSU = 8, /* SVGA3dTextureAddress */
+ SVGA3D_TS_ADDRESSV = 9, /* SVGA3dTextureAddress */
+ SVGA3D_TS_MIPFILTER = 10, /* SVGA3dTextureFilter */
+ SVGA3D_TS_MAGFILTER = 11, /* SVGA3dTextureFilter */
+ SVGA3D_TS_MINFILTER = 12, /* SVGA3dTextureFilter */
+ SVGA3D_TS_BORDERCOLOR = 13, /* SVGA3dColor */
+ SVGA3D_TS_TEXCOORDINDEX = 14, /* uint32 */
+ SVGA3D_TS_TEXTURETRANSFORMFLAGS = 15, /* SVGA3dTexTransformFlags */
+ SVGA3D_TS_TEXCOORDGEN = 16, /* SVGA3dTextureCoordGen */
+ SVGA3D_TS_BUMPENVMAT00 = 17, /* float */
+ SVGA3D_TS_BUMPENVMAT01 = 18, /* float */
+ SVGA3D_TS_BUMPENVMAT10 = 19, /* float */
+ SVGA3D_TS_BUMPENVMAT11 = 20, /* float */
+ SVGA3D_TS_TEXTURE_MIPMAP_LEVEL = 21, /* uint32 */
+ SVGA3D_TS_TEXTURE_LOD_BIAS = 22, /* float */
+ SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL = 23, /* uint32 */
+ SVGA3D_TS_ADDRESSW = 24, /* SVGA3dTextureAddress */
+
+
+ /*
+ * Sampler Gamma Level
+ *
+ * Sampler gamma effects the color of samples taken from the sampler. A
+ * value of 1.0 will produce linear samples. If the value is <= 0.0 the
+ * gamma value is ignored and a linear space is used.
+ */
+
+ SVGA3D_TS_GAMMA = 25, /* float */
+ SVGA3D_TS_BUMPENVLSCALE = 26, /* float */
+ SVGA3D_TS_BUMPENVLOFFSET = 27, /* float */
+ SVGA3D_TS_COLORARG0 = 28, /* SVGA3dTextureArgData */
+ SVGA3D_TS_ALPHAARG0 = 29, /* SVGA3dTextureArgData */
+ SVGA3D_TS_MAX
+} SVGA3dTextureStateName;
+
+typedef enum {
+ SVGA3D_TC_INVALID = 0,
+ SVGA3D_TC_DISABLE = 1,
+ SVGA3D_TC_SELECTARG1 = 2,
+ SVGA3D_TC_SELECTARG2 = 3,
+ SVGA3D_TC_MODULATE = 4,
+ SVGA3D_TC_ADD = 5,
+ SVGA3D_TC_ADDSIGNED = 6,
+ SVGA3D_TC_SUBTRACT = 7,
+ SVGA3D_TC_BLENDTEXTUREALPHA = 8,
+ SVGA3D_TC_BLENDDIFFUSEALPHA = 9,
+ SVGA3D_TC_BLENDCURRENTALPHA = 10,
+ SVGA3D_TC_BLENDFACTORALPHA = 11,
+ SVGA3D_TC_MODULATE2X = 12,
+ SVGA3D_TC_MODULATE4X = 13,
+ SVGA3D_TC_DSDT = 14,
+ SVGA3D_TC_DOTPRODUCT3 = 15,
+ SVGA3D_TC_BLENDTEXTUREALPHAPM = 16,
+ SVGA3D_TC_ADDSIGNED2X = 17,
+ SVGA3D_TC_ADDSMOOTH = 18,
+ SVGA3D_TC_PREMODULATE = 19,
+ SVGA3D_TC_MODULATEALPHA_ADDCOLOR = 20,
+ SVGA3D_TC_MODULATECOLOR_ADDALPHA = 21,
+ SVGA3D_TC_MODULATEINVALPHA_ADDCOLOR = 22,
+ SVGA3D_TC_MODULATEINVCOLOR_ADDALPHA = 23,
+ SVGA3D_TC_BUMPENVMAPLUMINANCE = 24,
+ SVGA3D_TC_MULTIPLYADD = 25,
+ SVGA3D_TC_LERP = 26,
+ SVGA3D_TC_MAX
+} SVGA3dTextureCombiner;
+
+#define SVGA3D_TC_CAP_BIT(svga3d_tc_op) (svga3d_tc_op ? (1 << (svga3d_tc_op - 1)) : 0)
+
+typedef enum {
+ SVGA3D_TEX_ADDRESS_INVALID = 0,
+ SVGA3D_TEX_ADDRESS_WRAP = 1,
+ SVGA3D_TEX_ADDRESS_MIRROR = 2,
+ SVGA3D_TEX_ADDRESS_CLAMP = 3,
+ SVGA3D_TEX_ADDRESS_BORDER = 4,
+ SVGA3D_TEX_ADDRESS_MIRRORONCE = 5,
+ SVGA3D_TEX_ADDRESS_EDGE = 6,
+ SVGA3D_TEX_ADDRESS_MAX
+} SVGA3dTextureAddress;
+
+/*
+ * SVGA3D_TEX_FILTER_NONE as the minification filter means mipmapping is
+ * disabled, and the rasterizer should use the magnification filter instead.
+ */
+typedef enum {
+ SVGA3D_TEX_FILTER_NONE = 0,
+ SVGA3D_TEX_FILTER_NEAREST = 1,
+ SVGA3D_TEX_FILTER_LINEAR = 2,
+ SVGA3D_TEX_FILTER_ANISOTROPIC = 3,
+ SVGA3D_TEX_FILTER_FLATCUBIC = 4, // Deprecated, not implemented
+ SVGA3D_TEX_FILTER_GAUSSIANCUBIC = 5, // Deprecated, not implemented
+ SVGA3D_TEX_FILTER_PYRAMIDALQUAD = 6, // Not currently implemented
+ SVGA3D_TEX_FILTER_GAUSSIANQUAD = 7, // Not currently implemented
+ SVGA3D_TEX_FILTER_MAX
+} SVGA3dTextureFilter;
+
+typedef enum {
+ SVGA3D_TEX_TRANSFORM_OFF = 0,
+ SVGA3D_TEX_TRANSFORM_S = (1 << 0),
+ SVGA3D_TEX_TRANSFORM_T = (1 << 1),
+ SVGA3D_TEX_TRANSFORM_R = (1 << 2),
+ SVGA3D_TEX_TRANSFORM_Q = (1 << 3),
+ SVGA3D_TEX_PROJECTED = (1 << 15),
+} SVGA3dTexTransformFlags;
+
+typedef enum {
+ SVGA3D_TEXCOORD_GEN_OFF = 0,
+ SVGA3D_TEXCOORD_GEN_EYE_POSITION = 1,
+ SVGA3D_TEXCOORD_GEN_EYE_NORMAL = 2,
+ SVGA3D_TEXCOORD_GEN_REFLECTIONVECTOR = 3,
+ SVGA3D_TEXCOORD_GEN_SPHERE = 4,
+ SVGA3D_TEXCOORD_GEN_MAX
+} SVGA3dTextureCoordGen;
+
+/*
+ * Texture argument constants for texture combiner
+ */
+typedef enum {
+ SVGA3D_TA_INVALID = 0,
+ SVGA3D_TA_CONSTANT = 1,
+ SVGA3D_TA_PREVIOUS = 2,
+ SVGA3D_TA_DIFFUSE = 3,
+ SVGA3D_TA_TEXTURE = 4,
+ SVGA3D_TA_SPECULAR = 5,
+ SVGA3D_TA_MAX
+} SVGA3dTextureArgData;
+
+#define SVGA3D_TM_MASK_LEN 4
+
+/* Modifiers for texture argument constants defined above. */
+typedef enum {
+ SVGA3D_TM_NONE = 0,
+ SVGA3D_TM_ALPHA = (1 << SVGA3D_TM_MASK_LEN),
+ SVGA3D_TM_ONE_MINUS = (2 << SVGA3D_TM_MASK_LEN),
+} SVGA3dTextureArgModifier;
+
+#define SVGA3D_INVALID_ID ((uint32)-1)
+#define SVGA3D_MAX_CLIP_PLANES 6
+
+/*
+ * This is the limit to the number of fixed-function texture
+ * transforms and texture coordinates we can support. It does *not*
+ * correspond to the number of texture image units (samplers) we
+ * support!
+ */
+#define SVGA3D_MAX_TEXTURE_COORDS 8
+
+/*
+ * Vertex declarations
+ *
+ * Notes:
+ *
+ * SVGA3D_DECLUSAGE_POSITIONT is for pre-transformed vertices. If you
+ * draw with any POSITIONT vertex arrays, the programmable vertex
+ * pipeline will be implicitly disabled. Drawing will take place as if
+ * no vertex shader was bound.
+ */
+
+typedef enum {
+ SVGA3D_DECLUSAGE_POSITION = 0,
+ SVGA3D_DECLUSAGE_BLENDWEIGHT, // 1
+ SVGA3D_DECLUSAGE_BLENDINDICES, // 2
+ SVGA3D_DECLUSAGE_NORMAL, // 3
+ SVGA3D_DECLUSAGE_PSIZE, // 4
+ SVGA3D_DECLUSAGE_TEXCOORD, // 5
+ SVGA3D_DECLUSAGE_TANGENT, // 6
+ SVGA3D_DECLUSAGE_BINORMAL, // 7
+ SVGA3D_DECLUSAGE_TESSFACTOR, // 8
+ SVGA3D_DECLUSAGE_POSITIONT, // 9
+ SVGA3D_DECLUSAGE_COLOR, // 10
+ SVGA3D_DECLUSAGE_FOG, // 11
+ SVGA3D_DECLUSAGE_DEPTH, // 12
+ SVGA3D_DECLUSAGE_SAMPLE, // 13
+ SVGA3D_DECLUSAGE_MAX
+} SVGA3dDeclUsage;
+
+typedef enum {
+ SVGA3D_DECLMETHOD_DEFAULT = 0,
+ SVGA3D_DECLMETHOD_PARTIALU,
+ SVGA3D_DECLMETHOD_PARTIALV,
+ SVGA3D_DECLMETHOD_CROSSUV, // Normal
+ SVGA3D_DECLMETHOD_UV,
+ SVGA3D_DECLMETHOD_LOOKUP, // Lookup a displacement map
+ SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, // Lookup a pre-sampled displacement map
+} SVGA3dDeclMethod;
+
+typedef enum {
+ SVGA3D_DECLTYPE_FLOAT1 = 0,
+ SVGA3D_DECLTYPE_FLOAT2 = 1,
+ SVGA3D_DECLTYPE_FLOAT3 = 2,
+ SVGA3D_DECLTYPE_FLOAT4 = 3,
+ SVGA3D_DECLTYPE_D3DCOLOR = 4,
+ SVGA3D_DECLTYPE_UBYTE4 = 5,
+ SVGA3D_DECLTYPE_SHORT2 = 6,
+ SVGA3D_DECLTYPE_SHORT4 = 7,
+ SVGA3D_DECLTYPE_UBYTE4N = 8,
+ SVGA3D_DECLTYPE_SHORT2N = 9,
+ SVGA3D_DECLTYPE_SHORT4N = 10,
+ SVGA3D_DECLTYPE_USHORT2N = 11,
+ SVGA3D_DECLTYPE_USHORT4N = 12,
+ SVGA3D_DECLTYPE_UDEC3 = 13,
+ SVGA3D_DECLTYPE_DEC3N = 14,
+ SVGA3D_DECLTYPE_FLOAT16_2 = 15,
+ SVGA3D_DECLTYPE_FLOAT16_4 = 16,
+ SVGA3D_DECLTYPE_MAX,
+} SVGA3dDeclType;
+
+/*
+ * This structure is used for the divisor for geometry instancing;
+ * it's a direct translation of the Direct3D equivalent.
+ */
+typedef union {
+ struct {
+ /*
+ * For index data, this number represents the number of instances to draw.
+ * For instance data, this number represents the number of
+ * instances/vertex in this stream
+ */
+ uint32 count : 30;
+
+ /*
+ * This is 1 if this is supposed to be the data that is repeated for
+ * every instance.
+ */
+ uint32 indexedData : 1;
+
+ /*
+ * This is 1 if this is supposed to be the per-instance data.
+ */
+ uint32 instanceData : 1;
+ };
+
+ uint32 value;
+} SVGA3dVertexDivisor;
+
+typedef enum {
+ SVGA3D_PRIMITIVE_INVALID = 0,
+ SVGA3D_PRIMITIVE_TRIANGLELIST = 1,
+ SVGA3D_PRIMITIVE_POINTLIST = 2,
+ SVGA3D_PRIMITIVE_LINELIST = 3,
+ SVGA3D_PRIMITIVE_LINESTRIP = 4,
+ SVGA3D_PRIMITIVE_TRIANGLESTRIP = 5,
+ SVGA3D_PRIMITIVE_TRIANGLEFAN = 6,
+ SVGA3D_PRIMITIVE_MAX
+} SVGA3dPrimitiveType;
+
+typedef enum {
+ SVGA3D_COORDINATE_INVALID = 0,
+ SVGA3D_COORDINATE_LEFTHANDED = 1,
+ SVGA3D_COORDINATE_RIGHTHANDED = 2,
+ SVGA3D_COORDINATE_MAX
+} SVGA3dCoordinateType;
+
+typedef enum {
+ SVGA3D_TRANSFORM_INVALID = 0,
+ SVGA3D_TRANSFORM_WORLD = 1,
+ SVGA3D_TRANSFORM_VIEW = 2,
+ SVGA3D_TRANSFORM_PROJECTION = 3,
+ SVGA3D_TRANSFORM_TEXTURE0 = 4,
+ SVGA3D_TRANSFORM_TEXTURE1 = 5,
+ SVGA3D_TRANSFORM_TEXTURE2 = 6,
+ SVGA3D_TRANSFORM_TEXTURE3 = 7,
+ SVGA3D_TRANSFORM_TEXTURE4 = 8,
+ SVGA3D_TRANSFORM_TEXTURE5 = 9,
+ SVGA3D_TRANSFORM_TEXTURE6 = 10,
+ SVGA3D_TRANSFORM_TEXTURE7 = 11,
+ SVGA3D_TRANSFORM_WORLD1 = 12,
+ SVGA3D_TRANSFORM_WORLD2 = 13,
+ SVGA3D_TRANSFORM_WORLD3 = 14,
+ SVGA3D_TRANSFORM_MAX
+} SVGA3dTransformType;
+
+typedef enum {
+ SVGA3D_LIGHTTYPE_INVALID = 0,
+ SVGA3D_LIGHTTYPE_POINT = 1,
+ SVGA3D_LIGHTTYPE_SPOT1 = 2, /* 1-cone, in degrees */
+ SVGA3D_LIGHTTYPE_SPOT2 = 3, /* 2-cone, in radians */
+ SVGA3D_LIGHTTYPE_DIRECTIONAL = 4,
+ SVGA3D_LIGHTTYPE_MAX
+} SVGA3dLightType;
+
+typedef enum {
+ SVGA3D_CUBEFACE_POSX = 0,
+ SVGA3D_CUBEFACE_NEGX = 1,
+ SVGA3D_CUBEFACE_POSY = 2,
+ SVGA3D_CUBEFACE_NEGY = 3,
+ SVGA3D_CUBEFACE_POSZ = 4,
+ SVGA3D_CUBEFACE_NEGZ = 5,
+} SVGA3dCubeFace;
+
+typedef enum {
+ SVGA3D_SHADERTYPE_COMPILED_DX8 = 0,
+ SVGA3D_SHADERTYPE_VS = 1,
+ SVGA3D_SHADERTYPE_PS = 2,
+ SVGA3D_SHADERTYPE_MAX
+} SVGA3dShaderType;
+
+typedef enum {
+ SVGA3D_CONST_TYPE_FLOAT = 0,
+ SVGA3D_CONST_TYPE_INT = 1,
+ SVGA3D_CONST_TYPE_BOOL = 2,
+} SVGA3dShaderConstType;
+
+#define SVGA3D_MAX_SURFACE_FACES 6
+
+typedef enum {
+ SVGA3D_STRETCH_BLT_POINT = 0,
+ SVGA3D_STRETCH_BLT_LINEAR = 1,
+ SVGA3D_STRETCH_BLT_MAX
+} SVGA3dStretchBltMode;
+
+typedef enum {
+ SVGA3D_QUERYTYPE_OCCLUSION = 0,
+ SVGA3D_QUERYTYPE_MAX
+} SVGA3dQueryType;
+
+typedef enum {
+ SVGA3D_QUERYSTATE_PENDING = 0, /* Waiting on the host (set by guest) */
+ SVGA3D_QUERYSTATE_SUCCEEDED = 1, /* Completed successfully (set by host) */
+ SVGA3D_QUERYSTATE_FAILED = 2, /* Completed unsuccessfully (set by host) */
+ SVGA3D_QUERYSTATE_NEW = 3, /* Never submitted (For guest use only) */
+} SVGA3dQueryState;
+
+typedef enum {
+ SVGA3D_WRITE_HOST_VRAM = 1,
+ SVGA3D_READ_HOST_VRAM = 2,
+} SVGA3dTransferType;
+
+/*
+ * The maximum number vertex arrays we're guaranteed to support in
+ * SVGA_3D_CMD_DRAWPRIMITIVES.
+ */
+#define SVGA3D_MAX_VERTEX_ARRAYS 32
+
+/*
+ * Identifiers for commands in the command FIFO.
+ *
+ * IDs between 1000 and 1039 (inclusive) were used by obsolete versions of
+ * the SVGA3D protocol and remain reserved; they should not be used in the
+ * future.
+ *
+ * IDs between 1040 and 1999 (inclusive) are available for use by the
+ * current SVGA3D protocol.
+ *
+ * FIFO clients other than SVGA3D should stay below 1000, or at 2000
+ * and up.
+ */
+
+#define SVGA_3D_CMD_LEGACY_BASE 1000
+#define SVGA_3D_CMD_BASE 1040
+
+#define SVGA_3D_CMD_SURFACE_DEFINE SVGA_3D_CMD_BASE + 0
+#define SVGA_3D_CMD_SURFACE_DESTROY SVGA_3D_CMD_BASE + 1
+#define SVGA_3D_CMD_SURFACE_COPY SVGA_3D_CMD_BASE + 2
+#define SVGA_3D_CMD_SURFACE_STRETCHBLT SVGA_3D_CMD_BASE + 3
+#define SVGA_3D_CMD_SURFACE_DMA SVGA_3D_CMD_BASE + 4
+#define SVGA_3D_CMD_CONTEXT_DEFINE SVGA_3D_CMD_BASE + 5
+#define SVGA_3D_CMD_CONTEXT_DESTROY SVGA_3D_CMD_BASE + 6
+#define SVGA_3D_CMD_SETTRANSFORM SVGA_3D_CMD_BASE + 7
+#define SVGA_3D_CMD_SETZRANGE SVGA_3D_CMD_BASE + 8
+#define SVGA_3D_CMD_SETRENDERSTATE SVGA_3D_CMD_BASE + 9
+#define SVGA_3D_CMD_SETRENDERTARGET SVGA_3D_CMD_BASE + 10
+#define SVGA_3D_CMD_SETTEXTURESTATE SVGA_3D_CMD_BASE + 11
+#define SVGA_3D_CMD_SETMATERIAL SVGA_3D_CMD_BASE + 12
+#define SVGA_3D_CMD_SETLIGHTDATA SVGA_3D_CMD_BASE + 13
+#define SVGA_3D_CMD_SETLIGHTENABLED SVGA_3D_CMD_BASE + 14
+#define SVGA_3D_CMD_SETVIEWPORT SVGA_3D_CMD_BASE + 15
+#define SVGA_3D_CMD_SETCLIPPLANE SVGA_3D_CMD_BASE + 16
+#define SVGA_3D_CMD_CLEAR SVGA_3D_CMD_BASE + 17
+#define SVGA_3D_CMD_PRESENT SVGA_3D_CMD_BASE + 18 // Deprecated
+#define SVGA_3D_CMD_SHADER_DEFINE SVGA_3D_CMD_BASE + 19
+#define SVGA_3D_CMD_SHADER_DESTROY SVGA_3D_CMD_BASE + 20
+#define SVGA_3D_CMD_SET_SHADER SVGA_3D_CMD_BASE + 21
+#define SVGA_3D_CMD_SET_SHADER_CONST SVGA_3D_CMD_BASE + 22
+#define SVGA_3D_CMD_DRAW_PRIMITIVES SVGA_3D_CMD_BASE + 23
+#define SVGA_3D_CMD_SETSCISSORRECT SVGA_3D_CMD_BASE + 24
+#define SVGA_3D_CMD_BEGIN_QUERY SVGA_3D_CMD_BASE + 25
+#define SVGA_3D_CMD_END_QUERY SVGA_3D_CMD_BASE + 26
+#define SVGA_3D_CMD_WAIT_FOR_QUERY SVGA_3D_CMD_BASE + 27
+#define SVGA_3D_CMD_PRESENT_READBACK SVGA_3D_CMD_BASE + 28 // Deprecated
+#define SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN SVGA_3D_CMD_BASE + 29
+#define SVGA_3D_CMD_MAX SVGA_3D_CMD_BASE + 30
+
+#define SVGA_3D_CMD_FUTURE_MAX 2000
+
+/*
+ * Common substructures used in multiple FIFO commands:
+ */
+
+typedef struct {
+ union {
+ struct {
+ uint16 function; // SVGA3dFogFunction
+ uint8 type; // SVGA3dFogType
+ uint8 base; // SVGA3dFogBase
+ };
+ uint32 uintValue;
+ };
+} SVGA3dFogMode;
+
+/*
+ * Uniquely identify one image (a 1D/2D/3D array) from a surface. This
+ * is a surface ID as well as face/mipmap indices.
+ */
+
+typedef
+struct SVGA3dSurfaceImageId {
+ uint32 sid;
+ uint32 face;
+ uint32 mipmap;
+} SVGA3dSurfaceImageId;
+
+typedef
+struct SVGA3dGuestImage {
+ SVGAGuestPtr ptr;
+
+ /*
+ * A note on interpretation of pitch: This value of pitch is the
+ * number of bytes between vertically adjacent image
+ * blocks. Normally this is the number of bytes between the first
+ * pixel of two adjacent scanlines. With compressed textures,
+ * however, this may represent the number of bytes between
+ * compression blocks rather than between rows of pixels.
+ *
+ * XXX: Compressed textures currently must be tightly packed in guest memory.
+ *
+ * If the image is 1-dimensional, pitch is ignored.
+ *
+ * If 'pitch' is zero, the SVGA3D device calculates a pitch value
+ * assuming each row of blocks is tightly packed.
+ */
+ uint32 pitch;
+} SVGA3dGuestImage;
+
+
+/*
+ * FIFO command format definitions:
+ */
+
+/*
+ * The data size header following cmdNum for every 3d command
+ */
+typedef
+struct {
+ uint32 id;
+ uint32 size;
+} SVGA3dCmdHeader;
+
+/*
+ * A surface is a hierarchy of host VRAM surfaces: 1D, 2D, or 3D, with
+ * optional mipmaps and cube faces.
+ */
+
+typedef
+struct {
+ uint32 width;
+ uint32 height;
+ uint32 depth;
+} SVGA3dSize;
+
+typedef enum {
+ SVGA3D_SURFACE_CUBEMAP = (1 << 0),
+ SVGA3D_SURFACE_HINT_STATIC = (1 << 1),
+ SVGA3D_SURFACE_HINT_DYNAMIC = (1 << 2),
+ SVGA3D_SURFACE_HINT_INDEXBUFFER = (1 << 3),
+ SVGA3D_SURFACE_HINT_VERTEXBUFFER = (1 << 4),
+ SVGA3D_SURFACE_HINT_TEXTURE = (1 << 5),
+ SVGA3D_SURFACE_HINT_RENDERTARGET = (1 << 6),
+ SVGA3D_SURFACE_HINT_DEPTHSTENCIL = (1 << 7),
+ SVGA3D_SURFACE_HINT_WRITEONLY = (1 << 8),
+} SVGA3dSurfaceFlags;
+
+typedef
+struct {
+ uint32 numMipLevels;
+} SVGA3dSurfaceFace;
+
+typedef
+struct {
+ uint32 sid;
+ SVGA3dSurfaceFlags surfaceFlags;
+ SVGA3dSurfaceFormat format;
+ SVGA3dSurfaceFace face[SVGA3D_MAX_SURFACE_FACES];
+ /*
+ * Followed by an SVGA3dSize structure for each mip level in each face.
+ *
+ * A note on surface sizes: Sizes are always specified in pixels,
+ * even if the true surface size is not a multiple of the minimum
+ * block size of the surface's format. For example, a 3x3x1 DXT1
+ * compressed texture would actually be stored as a 4x4x1 image in
+ * memory.
+ */
+} SVGA3dCmdDefineSurface; /* SVGA_3D_CMD_SURFACE_DEFINE */
+
+typedef
+struct {
+ uint32 sid;
+} SVGA3dCmdDestroySurface; /* SVGA_3D_CMD_SURFACE_DESTROY */
+
+typedef
+struct {
+ uint32 cid;
+} SVGA3dCmdDefineContext; /* SVGA_3D_CMD_CONTEXT_DEFINE */
+
+typedef
+struct {
+ uint32 cid;
+} SVGA3dCmdDestroyContext; /* SVGA_3D_CMD_CONTEXT_DESTROY */
+
+typedef
+struct {
+ uint32 cid;
+ SVGA3dClearFlag clearFlag;
+ uint32 color;
+ float depth;
+ uint32 stencil;
+ /* Followed by variable number of SVGA3dRect structures */
+} SVGA3dCmdClear; /* SVGA_3D_CMD_CLEAR */
+
+typedef
+struct SVGA3dCopyRect {
+ uint32 x;
+ uint32 y;
+ uint32 w;
+ uint32 h;
+ uint32 srcx;
+ uint32 srcy;
+} SVGA3dCopyRect;
+
+typedef
+struct SVGA3dCopyBox {
+ uint32 x;
+ uint32 y;
+ uint32 z;
+ uint32 w;
+ uint32 h;
+ uint32 d;
+ uint32 srcx;
+ uint32 srcy;
+ uint32 srcz;
+} SVGA3dCopyBox;
+
+typedef
+struct {
+ uint32 x;
+ uint32 y;
+ uint32 w;
+ uint32 h;
+} SVGA3dRect;
+
+typedef
+struct {
+ uint32 x;
+ uint32 y;
+ uint32 z;
+ uint32 w;
+ uint32 h;
+ uint32 d;
+} SVGA3dBox;
+
+typedef
+struct {
+ uint32 x;
+ uint32 y;
+ uint32 z;
+} SVGA3dPoint;
+
+typedef
+struct {
+ SVGA3dLightType type;
+ SVGA3dBool inWorldSpace;
+ float diffuse[4];
+ float specular[4];
+ float ambient[4];
+ float position[4];
+ float direction[4];
+ float range;
+ float falloff;
+ float attenuation0;
+ float attenuation1;
+ float attenuation2;
+ float theta;
+ float phi;
+} SVGA3dLightData;
+
+typedef
+struct {
+ uint32 sid;
+ /* Followed by variable number of SVGA3dCopyRect structures */
+} SVGA3dCmdPresent; /* SVGA_3D_CMD_PRESENT */
+
+typedef
+struct {
+ SVGA3dRenderStateName state;
+ union {
+ uint32 uintValue;
+ float floatValue;
+ };
+} SVGA3dRenderState;
+
+typedef
+struct {
+ uint32 cid;
+ /* Followed by variable number of SVGA3dRenderState structures */
+} SVGA3dCmdSetRenderState; /* SVGA_3D_CMD_SETRENDERSTATE */
+
+typedef
+struct {
+ uint32 cid;
+ SVGA3dRenderTargetType type;
+ SVGA3dSurfaceImageId target;
+} SVGA3dCmdSetRenderTarget; /* SVGA_3D_CMD_SETRENDERTARGET */
+
+typedef
+struct {
+ SVGA3dSurfaceImageId src;
+ SVGA3dSurfaceImageId dest;
+ /* Followed by variable number of SVGA3dCopyBox structures */
+} SVGA3dCmdSurfaceCopy; /* SVGA_3D_CMD_SURFACE_COPY */
+
+typedef
+struct {
+ SVGA3dSurfaceImageId src;
+ SVGA3dSurfaceImageId dest;
+ SVGA3dBox boxSrc;
+ SVGA3dBox boxDest;
+ SVGA3dStretchBltMode mode;
+} SVGA3dCmdSurfaceStretchBlt; /* SVGA_3D_CMD_SURFACE_STRETCHBLT */
+
+typedef
+struct {
+ /*
+ * If the discard flag is present in a surface DMA operation, the host may
+ * discard the contents of the current mipmap level and face of the target
+ * surface before applying the surface DMA contents.
+ */
+ uint32 discard : 1;
+
+ /*
+ * If the unsynchronized flag is present, the host may perform this upload
+ * without syncing to pending reads on this surface.
+ */
+ uint32 unsynchronized : 1;
+
+ /*
+ * Guests *MUST* set the reserved bits to 0 before submitting the command
+ * suffix as future flags may occupy these bits.
+ */
+ uint32 reserved : 30;
+} SVGA3dSurfaceDMAFlags;
+
+typedef
+struct {
+ SVGA3dGuestImage guest;
+ SVGA3dSurfaceImageId host;
+ SVGA3dTransferType transfer;
+ /*
+ * Followed by variable number of SVGA3dCopyBox structures. For consistency
+ * in all clipping logic and coordinate translation, we define the
+ * "source" in each copyBox as the guest image and the
+ * "destination" as the host image, regardless of transfer
+ * direction.
+ *
+ * For efficiency, the SVGA3D device is free to copy more data than
+ * specified. For example, it may round copy boxes outwards such
+ * that they lie on particular alignment boundaries.
+ */
+} SVGA3dCmdSurfaceDMA; /* SVGA_3D_CMD_SURFACE_DMA */
+
+/*
+ * SVGA3dCmdSurfaceDMASuffix --
+ *
+ * This is a command suffix that will appear after a SurfaceDMA command in
+ * the FIFO. It contains some extra information that hosts may use to
+ * optimize performance or protect the guest. This suffix exists to preserve
+ * backwards compatibility while also allowing for new functionality to be
+ * implemented.
+ */
+
+typedef
+struct {
+ uint32 suffixSize;
+
+ /*
+ * The maximum offset is used to determine the maximum offset from the
+ * guestPtr base address that will be accessed or written to during this
+ * surfaceDMA. If the suffix is supported, the host will respect this
+ * boundary while performing surface DMAs.
+ *
+ * Defaults to MAX_UINT32
+ */
+ uint32 maximumOffset;
+
+ /*
+ * A set of flags that describes optimizations that the host may perform
+ * while performing this surface DMA operation. The guest should never rely
+ * on behaviour that is different when these flags are set for correctness.
+ *
+ * Defaults to 0
+ */
+ SVGA3dSurfaceDMAFlags flags;
+} SVGA3dCmdSurfaceDMASuffix;
+
+/*
+ * SVGA_3D_CMD_DRAW_PRIMITIVES --
+ *
+ * This command is the SVGA3D device's generic drawing entry point.
+ * It can draw multiple ranges of primitives, optionally using an
+ * index buffer, using an arbitrary collection of vertex buffers.
+ *
+ * Each SVGA3dVertexDecl defines a distinct vertex array to bind
+ * during this draw call. The declarations specify which surface
+ * the vertex data lives in, what that vertex data is used for,
+ * and how to interpret it.
+ *
+ * Each SVGA3dPrimitiveRange defines a collection of primitives
+ * to render using the same vertex arrays. An index buffer is
+ * optional.
+ */
+
+typedef
+struct {
+ /*
+ * A range hint is an optional specification for the range of indices
+ * in an SVGA3dArray that will be used. If 'last' is zero, it is assumed
+ * that the entire array will be used.
+ *
+ * These are only hints. The SVGA3D device may use them for
+ * performance optimization if possible, but it's also allowed to
+ * ignore these values.
+ */
+ uint32 first;
+ uint32 last;
+} SVGA3dArrayRangeHint;
+
+typedef
+struct {
+ /*
+ * Define the origin and shape of a vertex or index array. Both
+ * 'offset' and 'stride' are in bytes. The provided surface will be
+ * reinterpreted as a flat array of bytes in the same format used
+ * by surface DMA operations. To avoid unnecessary conversions, the
+ * surface should be created with the SVGA3D_BUFFER format.
+ *
+ * Index 0 in the array starts 'offset' bytes into the surface.
+ * Index 1 begins at byte 'offset + stride', etc. Array indices may
+ * not be negative.
+ */
+ uint32 surfaceId;
+ uint32 offset;
+ uint32 stride;
+} SVGA3dArray;
+
+typedef
+struct {
+ /*
+ * Describe a vertex array's data type, and define how it is to be
+ * used by the fixed function pipeline or the vertex shader. It
+ * isn't useful to have two VertexDecls with the same
+ * VertexArrayIdentity in one draw call.
+ */
+ SVGA3dDeclType type;
+ SVGA3dDeclMethod method;
+ SVGA3dDeclUsage usage;
+ uint32 usageIndex;
+} SVGA3dVertexArrayIdentity;
+
+typedef
+struct {
+ SVGA3dVertexArrayIdentity identity;
+ SVGA3dArray array;
+ SVGA3dArrayRangeHint rangeHint;
+} SVGA3dVertexDecl;
+
+typedef
+struct {
+ /*
+ * Define a group of primitives to render, from sequential indices.
+ *
+ * The value of 'primitiveType' and 'primitiveCount' imply the
+ * total number of vertices that will be rendered.
+ */
+ SVGA3dPrimitiveType primType;
+ uint32 primitiveCount;
+
+ /*
+ * Optional index buffer. If indexArray.surfaceId is
+ * SVGA3D_INVALID_ID, we render without an index buffer. Rendering
+ * without an index buffer is identical to rendering with an index
+ * buffer containing the sequence [0, 1, 2, 3, ...].
+ *
+ * If an index buffer is in use, indexWidth specifies the width in
+ * bytes of each index value. It must be less than or equal to
+ * indexArray.stride.
+ *
+ * (Currently, the SVGA3D device requires index buffers to be tightly
+ * packed. In other words, indexWidth == indexArray.stride)
+ */
+ SVGA3dArray indexArray;
+ uint32 indexWidth;
+
+ /*
+ * Optional index bias. This number is added to all indices from
+ * indexArray before they are used as vertex array indices. This
+ * can be used in multiple ways:
+ *
+ * - When not using an indexArray, this bias can be used to
+ * specify where in the vertex arrays to begin rendering.
+ *
+ * - A positive number here is equivalent to increasing the
+ * offset in each vertex array.
+ *
+ * - A negative number can be used to render using a small
+ * vertex array and an index buffer that contains large
+ * values. This may be used by some applications that
+ * crop a vertex buffer without modifying their index
+ * buffer.
+ *
+ * Note that rendering with a negative bias value may be slower and
+ * use more memory than rendering with a positive or zero bias.
+ */
+ int32 indexBias;
+} SVGA3dPrimitiveRange;
+
+typedef
+struct {
+ uint32 cid;
+ uint32 numVertexDecls;
+ uint32 numRanges;
+
+ /*
+ * There are two variable size arrays after the
+ * SVGA3dCmdDrawPrimitives structure. In order,
+ * they are:
+ *
+ * 1. SVGA3dVertexDecl, quantity 'numVertexDecls'
+ * 2. SVGA3dPrimitiveRange, quantity 'numRanges'
+ * 3. Optionally, SVGA3dVertexDivisor, quantity 'numVertexDecls' (contains
+ * the frequency divisor for this the corresponding vertex decl)
+ */
+} SVGA3dCmdDrawPrimitives; /* SVGA_3D_CMD_DRAWPRIMITIVES */
+
+typedef
+struct {
+ uint32 stage;
+ SVGA3dTextureStateName name;
+ union {
+ uint32 value;
+ float floatValue;
+ };
+} SVGA3dTextureState;
+
+typedef
+struct {
+ uint32 cid;
+ /* Followed by variable number of SVGA3dTextureState structures */
+} SVGA3dCmdSetTextureState; /* SVGA_3D_CMD_SETTEXTURESTATE */
+
+typedef
+struct {
+ uint32 cid;
+ SVGA3dTransformType type;
+ float matrix[16];
+} SVGA3dCmdSetTransform; /* SVGA_3D_CMD_SETTRANSFORM */
+
+typedef
+struct {
+ float min;
+ float max;
+} SVGA3dZRange;
+
+typedef
+struct {
+ uint32 cid;
+ SVGA3dZRange zRange;
+} SVGA3dCmdSetZRange; /* SVGA_3D_CMD_SETZRANGE */
+
+typedef
+struct {
+ float diffuse[4];
+ float ambient[4];
+ float specular[4];
+ float emissive[4];
+ float shininess;
+} SVGA3dMaterial;
+
+typedef
+struct {
+ uint32 cid;
+ SVGA3dFace face;
+ SVGA3dMaterial material;
+} SVGA3dCmdSetMaterial; /* SVGA_3D_CMD_SETMATERIAL */
+
+typedef
+struct {
+ uint32 cid;
+ uint32 index;
+ SVGA3dLightData data;
+} SVGA3dCmdSetLightData; /* SVGA_3D_CMD_SETLIGHTDATA */
+
+typedef
+struct {
+ uint32 cid;
+ uint32 index;
+ uint32 enabled;
+} SVGA3dCmdSetLightEnabled; /* SVGA_3D_CMD_SETLIGHTENABLED */
+
+typedef
+struct {
+ uint32 cid;
+ SVGA3dRect rect;
+} SVGA3dCmdSetViewport; /* SVGA_3D_CMD_SETVIEWPORT */
+
+typedef
+struct {
+ uint32 cid;
+ SVGA3dRect rect;
+} SVGA3dCmdSetScissorRect; /* SVGA_3D_CMD_SETSCISSORRECT */
+
+typedef
+struct {
+ uint32 cid;
+ uint32 index;
+ float plane[4];
+} SVGA3dCmdSetClipPlane; /* SVGA_3D_CMD_SETCLIPPLANE */
+
+typedef
+struct {
+ uint32 cid;
+ uint32 shid;
+ SVGA3dShaderType type;
+ /* Followed by variable number of DWORDs for shader bycode */
+} SVGA3dCmdDefineShader; /* SVGA_3D_CMD_SHADER_DEFINE */
+
+typedef
+struct {
+ uint32 cid;
+ uint32 shid;
+ SVGA3dShaderType type;
+} SVGA3dCmdDestroyShader; /* SVGA_3D_CMD_SHADER_DESTROY */
+
+typedef
+struct {
+ uint32 cid;
+ uint32 reg; /* register number */
+ SVGA3dShaderType type;
+ SVGA3dShaderConstType ctype;
+ uint32 values[4];
+} SVGA3dCmdSetShaderConst; /* SVGA_3D_CMD_SET_SHADER_CONST */
+
+typedef
+struct {
+ uint32 cid;
+ SVGA3dShaderType type;
+ uint32 shid;
+} SVGA3dCmdSetShader; /* SVGA_3D_CMD_SET_SHADER */
+
+typedef
+struct {
+ uint32 cid;
+ SVGA3dQueryType type;
+} SVGA3dCmdBeginQuery; /* SVGA_3D_CMD_BEGIN_QUERY */
+
+typedef
+struct {
+ uint32 cid;
+ SVGA3dQueryType type;
+ SVGAGuestPtr guestResult; /* Points to an SVGA3dQueryResult structure */
+} SVGA3dCmdEndQuery; /* SVGA_3D_CMD_END_QUERY */
+
+typedef
+struct {
+ uint32 cid; /* Same parameters passed to END_QUERY */
+ SVGA3dQueryType type;
+ SVGAGuestPtr guestResult;
+} SVGA3dCmdWaitForQuery; /* SVGA_3D_CMD_WAIT_FOR_QUERY */
+
+typedef
+struct {
+ uint32 totalSize; /* Set by guest before query is ended. */
+ SVGA3dQueryState state; /* Set by host or guest. See SVGA3dQueryState. */
+ union { /* Set by host on exit from PENDING state */
+ uint32 result32;
+ };
+} SVGA3dQueryResult;
+
+/*
+ * SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN --
+ *
+ * This is a blit from an SVGA3D surface to a Screen Object. Just
+ * like GMR-to-screen blits, this blit may be directed at a
+ * specific screen or to the virtual coordinate space.
+ *
+ * The blit copies from a rectangular region of an SVGA3D surface
+ * image to a rectangular region of a screen or screens.
+ *
+ * This command takes an optional variable-length list of clipping
+ * rectangles after the body of the command. If no rectangles are
+ * specified, there is no clipping region. The entire destRect is
+ * drawn to. If one or more rectangles are included, they describe
+ * a clipping region. The clip rectangle coordinates are measured
+ * relative to the top-left corner of destRect.
+ *
+ * This clipping region serves multiple purposes:
+ *
+ * - It can be used to perform an irregularly shaped blit more
+ * efficiently than by issuing many separate blit commands.
+ *
+ * - It is equivalent to allowing blits with non-integer
+ * source coordinates. You could blit just one half-pixel
+ * of a source, for example, by specifying a larger
+ * destination rectangle than you need, then removing
+ * part of it using a clip rectangle.
+ *
+ * Availability:
+ * SVGA_FIFO_CAP_SCREEN_OBJECT
+ *
+ * Limitations:
+ *
+ * - Currently, no backend supports blits from a mipmap or face
+ * other than the first one.
+ */
+
+typedef
+struct {
+ SVGA3dSurfaceImageId srcImage;
+ SVGASignedRect srcRect;
+ uint32 destScreenId; /* Screen ID or SVGA_ID_INVALID for virt. coords */
+ SVGASignedRect destRect; /* Supports scaling if src/rest different size */
+ /* Clipping: zero or more SVGASignedRects follow */
+} SVGA3dCmdBlitSurfaceToScreen; /* SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN */
+
+
+/*
+ * Capability query index.
+ *
+ * Notes:
+ *
+ * 1. SVGA3D_DEVCAP_MAX_TEXTURES reflects the maximum number of
+ * fixed-function texture units available. Each of these units
+ * work in both FFP and Shader modes, and they support texture
+ * transforms and texture coordinates. The host may have additional
+ * texture image units that are only usable with shaders.
+ *
+ * 2. The BUFFER_FORMAT capabilities are deprecated, and they always
+ * return TRUE. Even on physical hardware that does not support
+ * these formats natively, the SVGA3D device will provide an emulation
+ * which should be invisible to the guest OS.
+ *
+ * In general, the SVGA3D device should support any operation on
+ * any surface format, it just may perform some of these
+ * operations in software depending on the capabilities of the
+ * available physical hardware.
+ *
+ * XXX: In the future, we will add capabilities that describe in
+ * detail what formats are supported in hardware for what kinds
+ * of operations.
+ */
+
+typedef enum {
+ SVGA3D_DEVCAP_3D = 0,
+ SVGA3D_DEVCAP_MAX_LIGHTS = 1,
+ SVGA3D_DEVCAP_MAX_TEXTURES = 2, /* See note (1) */
+ SVGA3D_DEVCAP_MAX_CLIP_PLANES = 3,
+ SVGA3D_DEVCAP_VERTEX_SHADER_VERSION = 4,
+ SVGA3D_DEVCAP_VERTEX_SHADER = 5,
+ SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION = 6,
+ SVGA3D_DEVCAP_FRAGMENT_SHADER = 7,
+ SVGA3D_DEVCAP_MAX_RENDER_TARGETS = 8,
+ SVGA3D_DEVCAP_S23E8_TEXTURES = 9,
+ SVGA3D_DEVCAP_S10E5_TEXTURES = 10,
+ SVGA3D_DEVCAP_MAX_FIXED_VERTEXBLEND = 11,
+ SVGA3D_DEVCAP_D16_BUFFER_FORMAT = 12, /* See note (2) */
+ SVGA3D_DEVCAP_D24S8_BUFFER_FORMAT = 13, /* See note (2) */
+ SVGA3D_DEVCAP_D24X8_BUFFER_FORMAT = 14, /* See note (2) */
+ SVGA3D_DEVCAP_QUERY_TYPES = 15,
+ SVGA3D_DEVCAP_TEXTURE_GRADIENT_SAMPLING = 16,
+ SVGA3D_DEVCAP_MAX_POINT_SIZE = 17,
+ SVGA3D_DEVCAP_MAX_SHADER_TEXTURES = 18,
+ SVGA3D_DEVCAP_MAX_TEXTURE_WIDTH = 19,
+ SVGA3D_DEVCAP_MAX_TEXTURE_HEIGHT = 20,
+ SVGA3D_DEVCAP_MAX_VOLUME_EXTENT = 21,
+ SVGA3D_DEVCAP_MAX_TEXTURE_REPEAT = 22,
+ SVGA3D_DEVCAP_MAX_TEXTURE_ASPECT_RATIO = 23,
+ SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY = 24,
+ SVGA3D_DEVCAP_MAX_PRIMITIVE_COUNT = 25,
+ SVGA3D_DEVCAP_MAX_VERTEX_INDEX = 26,
+ SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS = 27,
+ SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_INSTRUCTIONS = 28,
+ SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS = 29,
+ SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS = 30,
+ SVGA3D_DEVCAP_TEXTURE_OPS = 31,
+ SVGA3D_DEVCAP_SURFACEFMT_X8R8G8B8 = 32,
+ SVGA3D_DEVCAP_SURFACEFMT_A8R8G8B8 = 33,
+ SVGA3D_DEVCAP_SURFACEFMT_A2R10G10B10 = 34,
+ SVGA3D_DEVCAP_SURFACEFMT_X1R5G5B5 = 35,
+ SVGA3D_DEVCAP_SURFACEFMT_A1R5G5B5 = 36,
+ SVGA3D_DEVCAP_SURFACEFMT_A4R4G4B4 = 37,
+ SVGA3D_DEVCAP_SURFACEFMT_R5G6B5 = 38,
+ SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE16 = 39,
+ SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8_ALPHA8 = 40,
+ SVGA3D_DEVCAP_SURFACEFMT_ALPHA8 = 41,
+ SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8 = 42,
+ SVGA3D_DEVCAP_SURFACEFMT_Z_D16 = 43,
+ SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8 = 44,
+ SVGA3D_DEVCAP_SURFACEFMT_Z_D24X8 = 45,
+ SVGA3D_DEVCAP_SURFACEFMT_DXT1 = 46,
+ SVGA3D_DEVCAP_SURFACEFMT_DXT2 = 47,
+ SVGA3D_DEVCAP_SURFACEFMT_DXT3 = 48,
+ SVGA3D_DEVCAP_SURFACEFMT_DXT4 = 49,
+ SVGA3D_DEVCAP_SURFACEFMT_DXT5 = 50,
+ SVGA3D_DEVCAP_SURFACEFMT_BUMPX8L8V8U8 = 51,
+ SVGA3D_DEVCAP_SURFACEFMT_A2W10V10U10 = 52,
+ SVGA3D_DEVCAP_SURFACEFMT_BUMPU8V8 = 53,
+ SVGA3D_DEVCAP_SURFACEFMT_Q8W8V8U8 = 54,
+ SVGA3D_DEVCAP_SURFACEFMT_CxV8U8 = 55,
+ SVGA3D_DEVCAP_SURFACEFMT_R_S10E5 = 56,
+ SVGA3D_DEVCAP_SURFACEFMT_R_S23E8 = 57,
+ SVGA3D_DEVCAP_SURFACEFMT_RG_S10E5 = 58,
+ SVGA3D_DEVCAP_SURFACEFMT_RG_S23E8 = 59,
+ SVGA3D_DEVCAP_SURFACEFMT_ARGB_S10E5 = 60,
+ SVGA3D_DEVCAP_SURFACEFMT_ARGB_S23E8 = 61,
+ SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEXTURES = 63,
+
+ /*
+ * Note that MAX_SIMULTANEOUS_RENDER_TARGETS is a maximum count of color
+ * render targets. This does no include the depth or stencil targets.
+ */
+ SVGA3D_DEVCAP_MAX_SIMULTANEOUS_RENDER_TARGETS = 64,
+
+ SVGA3D_DEVCAP_SURFACEFMT_V16U16 = 65,
+ SVGA3D_DEVCAP_SURFACEFMT_G16R16 = 66,
+ SVGA3D_DEVCAP_SURFACEFMT_A16B16G16R16 = 67,
+ SVGA3D_DEVCAP_SURFACEFMT_UYVY = 68,
+ SVGA3D_DEVCAP_SURFACEFMT_YUY2 = 69,
+
+ /*
+ * Don't add new caps into the previous section; the values in this
+ * enumeration must not change. You can put new values right before
+ * SVGA3D_DEVCAP_MAX.
+ */
+ SVGA3D_DEVCAP_MAX /* This must be the last index. */
+} SVGA3dDevCapIndex;
+
+typedef union {
+ Bool b;
+ uint32 u;
+ int32 i;
+ float f;
+} SVGA3dDevCapResult;
+
+#endif /* _SVGA3D_REG_H_ */
diff --git a/src/gallium/drivers/svga/include/svga3d_shaderdefs.h b/src/gallium/drivers/svga/include/svga3d_shaderdefs.h
new file mode 100644
index 00000000000..2078c4a8a44
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga3d_shaderdefs.h
@@ -0,0 +1,519 @@
+/**********************************************************
+ * Copyright 2007-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga3d_shaderdefs.h --
+ *
+ * SVGA3D byte code format and limit definitions.
+ *
+ * The format of the byte code directly corresponds to that defined
+ * by Microsoft DirectX SDK 9.0c (file d3d9types.h). The format can
+ * also be extended so that different shader formats can be supported
+ * for example GLSL, ARB vp/fp, NV/ATI shader formats, etc.
+ *
+ */
+
+#ifndef __SVGA3D_SHADER_DEFS__
+#define __SVGA3D_SHADER_DEFS__
+
+/* SVGA3D shader hardware limits. */
+
+#define SVGA3D_INPUTREG_MAX 16
+#define SVGA3D_OUTPUTREG_MAX 12
+#define SVGA3D_VERTEX_SAMPLERREG_MAX 4
+#define SVGA3D_PIXEL_SAMPLERREG_MAX 16
+#define SVGA3D_SAMPLERREG_MAX (SVGA3D_PIXEL_SAMPLERREG_MAX+\
+ SVGA3D_VERTEX_SAMPLERREG_MAX)
+#define SVGA3D_TEMPREG_MAX 32
+#define SVGA3D_CONSTREG_MAX 256
+#define SVGA3D_CONSTINTREG_MAX 16
+#define SVGA3D_CONSTBOOLREG_MAX 16
+#define SVGA3D_ADDRREG_MAX 1
+#define SVGA3D_PREDREG_MAX 1
+
+/* SVGA3D byte code specific limits */
+
+#define SVGA3D_MAX_SRC_REGS 4
+#define SVGA3D_MAX_NESTING_LEVEL 32
+
+/* SVGA3D version information. */
+
+#define SVGA3D_VS_TYPE 0xFFFE
+#define SVGA3D_PS_TYPE 0xFFFF
+
+typedef struct {
+ union {
+ struct {
+ uint32 minor : 8;
+ uint32 major : 8;
+ uint32 type : 16;
+ };
+
+ uint32 value;
+ };
+} SVGA3dShaderVersion;
+
+#define SVGA3D_VS_10 ((SVGA3D_VS_TYPE << 16) | 1 << 8)
+#define SVGA3D_VS_11 (SVGA3D_VS_10 | 1)
+#define SVGA3D_VS_20 ((SVGA3D_VS_TYPE << 16) | 2 << 8)
+#define SVGA3D_VS_30 ((SVGA3D_VS_TYPE << 16) | 3 << 8)
+
+#define SVGA3D_PS_10 ((SVGA3D_PS_TYPE << 16) | 1 << 8)
+#define SVGA3D_PS_11 (SVGA3D_PS_10 | 1)
+#define SVGA3D_PS_12 (SVGA3D_PS_10 | 2)
+#define SVGA3D_PS_13 (SVGA3D_PS_10 | 3)
+#define SVGA3D_PS_14 (SVGA3D_PS_10 | 4)
+#define SVGA3D_PS_20 ((SVGA3D_PS_TYPE << 16) | 2 << 8)
+#define SVGA3D_PS_30 ((SVGA3D_PS_TYPE << 16) | 3 << 8)
+
+/* The *_ENABLED are for backwards compatibility with old drivers */
+typedef enum {
+ SVGA3DPSVERSION_NONE = 0,
+ SVGA3DPSVERSION_ENABLED = 1,
+ SVGA3DPSVERSION_11 = 3,
+ SVGA3DPSVERSION_12 = 5,
+ SVGA3DPSVERSION_13 = 7,
+ SVGA3DPSVERSION_14 = 9,
+ SVGA3DPSVERSION_20 = 11,
+ SVGA3DPSVERSION_30 = 13,
+ SVGA3DPSVERSION_40 = 15,
+ SVGA3DPSVERSION_MAX
+} SVGA3dPixelShaderVersion;
+
+typedef enum {
+ SVGA3DVSVERSION_NONE = 0,
+ SVGA3DVSVERSION_ENABLED = 1,
+ SVGA3DVSVERSION_11 = 3,
+ SVGA3DVSVERSION_20 = 5,
+ SVGA3DVSVERSION_30 = 7,
+ SVGA3DVSVERSION_40 = 9,
+ SVGA3DVSVERSION_MAX
+} SVGA3dVertexShaderVersion;
+
+/* SVGA3D instruction op codes. */
+
+typedef enum {
+ SVGA3DOP_NOP = 0,
+ SVGA3DOP_MOV,
+ SVGA3DOP_ADD,
+ SVGA3DOP_SUB,
+ SVGA3DOP_MAD,
+ SVGA3DOP_MUL,
+ SVGA3DOP_RCP,
+ SVGA3DOP_RSQ,
+ SVGA3DOP_DP3,
+ SVGA3DOP_DP4,
+ SVGA3DOP_MIN,
+ SVGA3DOP_MAX,
+ SVGA3DOP_SLT,
+ SVGA3DOP_SGE,
+ SVGA3DOP_EXP,
+ SVGA3DOP_LOG,
+ SVGA3DOP_LIT,
+ SVGA3DOP_DST,
+ SVGA3DOP_LRP,
+ SVGA3DOP_FRC,
+ SVGA3DOP_M4x4,
+ SVGA3DOP_M4x3,
+ SVGA3DOP_M3x4,
+ SVGA3DOP_M3x3,
+ SVGA3DOP_M3x2,
+ SVGA3DOP_CALL,
+ SVGA3DOP_CALLNZ,
+ SVGA3DOP_LOOP,
+ SVGA3DOP_RET,
+ SVGA3DOP_ENDLOOP,
+ SVGA3DOP_LABEL,
+ SVGA3DOP_DCL,
+ SVGA3DOP_POW,
+ SVGA3DOP_CRS,
+ SVGA3DOP_SGN,
+ SVGA3DOP_ABS,
+ SVGA3DOP_NRM,
+ SVGA3DOP_SINCOS,
+ SVGA3DOP_REP,
+ SVGA3DOP_ENDREP,
+ SVGA3DOP_IF,
+ SVGA3DOP_IFC,
+ SVGA3DOP_ELSE,
+ SVGA3DOP_ENDIF,
+ SVGA3DOP_BREAK,
+ SVGA3DOP_BREAKC,
+ SVGA3DOP_MOVA,
+ SVGA3DOP_DEFB,
+ SVGA3DOP_DEFI,
+ SVGA3DOP_TEXCOORD = 64,
+ SVGA3DOP_TEXKILL,
+ SVGA3DOP_TEX,
+ SVGA3DOP_TEXBEM,
+ SVGA3DOP_TEXBEML,
+ SVGA3DOP_TEXREG2AR,
+ SVGA3DOP_TEXREG2GB = 70,
+ SVGA3DOP_TEXM3x2PAD,
+ SVGA3DOP_TEXM3x2TEX,
+ SVGA3DOP_TEXM3x3PAD,
+ SVGA3DOP_TEXM3x3TEX,
+ SVGA3DOP_RESERVED0,
+ SVGA3DOP_TEXM3x3SPEC,
+ SVGA3DOP_TEXM3x3VSPEC,
+ SVGA3DOP_EXPP,
+ SVGA3DOP_LOGP,
+ SVGA3DOP_CND = 80,
+ SVGA3DOP_DEF,
+ SVGA3DOP_TEXREG2RGB,
+ SVGA3DOP_TEXDP3TEX,
+ SVGA3DOP_TEXM3x2DEPTH,
+ SVGA3DOP_TEXDP3,
+ SVGA3DOP_TEXM3x3,
+ SVGA3DOP_TEXDEPTH,
+ SVGA3DOP_CMP,
+ SVGA3DOP_BEM,
+ SVGA3DOP_DP2ADD = 90,
+ SVGA3DOP_DSX,
+ SVGA3DOP_DSY,
+ SVGA3DOP_TEXLDD,
+ SVGA3DOP_SETP,
+ SVGA3DOP_TEXLDL,
+ SVGA3DOP_BREAKP = 96,
+ SVGA3DOP_LAST_INST,
+ SVGA3DOP_PHASE = 0xFFFD,
+ SVGA3DOP_COMMENT = 0xFFFE,
+ SVGA3DOP_END = 0xFFFF,
+} SVGA3dShaderOpCodeType;
+
+/* SVGA3D operation control/comparison function types */
+
+typedef enum {
+ SVGA3DOPCONT_NONE,
+ SVGA3DOPCONT_PROJECT, /* Projective texturing */
+ SVGA3DOPCONT_BIAS, /* Texturing with a LOD bias */
+} SVGA3dShaderOpCodeControlFnType;
+
+typedef enum {
+ SVGA3DOPCOMP_RESERVED0 = 0,
+ SVGA3DOPCOMP_GT,
+ SVGA3DOPCOMP_EQ,
+ SVGA3DOPCOMP_GE,
+ SVGA3DOPCOMP_LT,
+ SVGA3DOPCOMPC_NE,
+ SVGA3DOPCOMP_LE,
+ SVGA3DOPCOMP_RESERVED1
+} SVGA3dShaderOpCodeCompFnType;
+
+/* SVGA3D register types */
+
+typedef enum {
+ SVGA3DREG_TEMP = 0, /* Temporary register file */
+ SVGA3DREG_INPUT, /* Input register file */
+ SVGA3DREG_CONST, /* Constant register file */
+ SVGA3DREG_ADDR, /* Address register for VS */
+ SVGA3DREG_TEXTURE = 3, /* Texture register file for PS */
+ SVGA3DREG_RASTOUT, /* Rasterizer register file */
+ SVGA3DREG_ATTROUT, /* Attribute output register file */
+ SVGA3DREG_TEXCRDOUT, /* Texture coordinate output register file */
+ SVGA3DREG_OUTPUT = 6, /* Output register file for VS 3.0+ */
+ SVGA3DREG_CONSTINT, /* Constant integer vector register file */
+ SVGA3DREG_COLOROUT, /* Color output register file */
+ SVGA3DREG_DEPTHOUT, /* Depth output register file */
+ SVGA3DREG_SAMPLER, /* Sampler state register file */
+ SVGA3DREG_CONST2, /* Constant register file 2048 - 4095 */
+ SVGA3DREG_CONST3, /* Constant register file 4096 - 6143 */
+ SVGA3DREG_CONST4, /* Constant register file 6144 - 8191 */
+ SVGA3DREG_CONSTBOOL, /* Constant boolean register file */
+ SVGA3DREG_LOOP, /* Loop counter register file */
+ SVGA3DREG_TEMPFLOAT16, /* 16-bit float temp register file */
+ SVGA3DREG_MISCTYPE, /* Miscellaneous (single) registers */
+ SVGA3DREG_LABEL, /* Label */
+ SVGA3DREG_PREDICATE, /* Predicate register */
+} SVGA3dShaderRegType;
+
+/* SVGA3D rasterizer output register types */
+
+typedef enum {
+ SVGA3DRASTOUT_POSITION = 0,
+ SVGA3DRASTOUT_FOG,
+ SVGA3DRASTOUT_PSIZE
+} SVGA3dShaderRastOutRegType;
+
+/* SVGA3D miscellaneous register types */
+
+typedef enum {
+ SVGA3DMISCREG_POSITION = 0, /* Input position x,y,z,rhw (PS) */
+ SVGA3DMISCREG_FACE /* Floating point primitive area (PS) */
+} SVGA3DShaderMiscRegType;
+
+/* SVGA3D sampler types */
+
+typedef enum {
+ SVGA3DSAMP_UNKNOWN = 0, /* Uninitialized value */
+ SVGA3DSAMP_2D = 2, /* dcl_2d s# (for declaring a 2-D texture) */
+ SVGA3DSAMP_CUBE, /* dcl_cube s# (for declaring a cube texture) */
+ SVGA3DSAMP_VOLUME, /* dcl_volume s# (for declaring a volume texture) */
+} SVGA3dShaderSamplerType;
+
+/* SVGA3D sampler format classes */
+
+typedef enum {
+ SVGA3DSAMPFORMAT_ARGB, /* ARGB formats */
+ SVGA3DSAMPFORMAT_V8U8, /* Sign and normalize (SNORM) V & U */
+ SVGA3DSAMPFORMAT_Q8W8V8U8, /* SNORM all */
+ SVGA3DSAMPFORMAT_CxV8U8, /* SNORM V & U, C=SQRT(1-U^2-V^2) */
+ SVGA3DSAMPFORMAT_X8L8V8U8, /* SNORM V & U */
+ SVGA3DSAMPFORMAT_A2W10V10U10, /* SNORM W, V & U */
+ SVGA3DSAMPFORMAT_DXT_PMA, /* DXT pre-multiplied alpha */
+ SVGA3DSAMPFORMAT_YUV, /* YUV video format */
+ SVGA3DSAMPFORMAT_UYVY, /* UYVY video format */
+ SVGA3DSAMPFORMAT_Rx, /* R16F/32F */
+ SVGA3DSAMPFORMAT_RxGx, /* R16FG16F, R32FG32F */
+ SVGA3DSAMPFORMAT_V16U16, /* SNORM all */
+} SVGA3DShaderSamplerFormatClass;
+
+/* SVGA3D write mask */
+
+#define SVGA3DWRITEMASK_0 1 /* Component 0 (X;Red) */
+#define SVGA3DWRITEMASK_1 2 /* Component 1 (Y;Green) */
+#define SVGA3DWRITEMASK_2 4 /* Component 2 (Z;Blue) */
+#define SVGA3DWRITEMASK_3 8 /* Component 3 (W;Alpha) */
+#define SVGA3DWRITEMASK_ALL 15 /* All components */
+
+/* SVGA3D destination modifiers */
+
+#define SVGA3DDSTMOD_NONE 0 /* nop */
+#define SVGA3DDSTMOD_SATURATE 1 /* clamp to [0, 1] */
+#define SVGA3DDSTMOD_PARTIALPRECISION 2 /* Partial precision hint */
+
+/*
+ * Relevant to multisampling only:
+ * When the pixel center is not covered, sample
+ * attribute or compute gradients/LOD
+ * using multisample "centroid" location.
+ * "Centroid" is some location within the covered
+ * region of the pixel.
+ */
+
+#define SVGA3DDSTMOD_MSAMPCENTROID 4
+
+/* SVGA3D source swizzle */
+
+#define SVGA3DSWIZZLE_REPLICATEX 0x00
+#define SVGA3DSWIZZLE_REPLICATEY 0x55
+#define SVGA3DSWIZZLE_REPLICATEZ 0xAA
+#define SVGA3DSWIZZLE_REPLICATEW 0xFF
+#define SVGA3DSWIZZLE_NONE 0xE4
+#define SVGA3DSWIZZLE_YZXW 0xC9
+#define SVGA3DSWIZZLE_ZXYW 0xD2
+#define SVGA3DSWIZZLE_WXYZ 0x1B
+
+/* SVGA3D source modifiers */
+
+typedef enum {
+ SVGA3DSRCMOD_NONE = 0, /* nop */
+ SVGA3DSRCMOD_NEG, /* negate */
+ SVGA3DSRCMOD_BIAS, /* bias */
+ SVGA3DSRCMOD_BIASNEG, /* bias and negate */
+ SVGA3DSRCMOD_SIGN, /* sign */
+ SVGA3DSRCMOD_SIGNNEG, /* sign and negate */
+ SVGA3DSRCMOD_COMP, /* complement */
+ SVGA3DSRCMOD_X2, /* x2 */
+ SVGA3DSRCMOD_X2NEG, /* x2 and negate */
+ SVGA3DSRCMOD_DZ, /* divide through by z component */
+ SVGA3DSRCMOD_DW, /* divide through by w component */
+ SVGA3DSRCMOD_ABS, /* abs() */
+ SVGA3DSRCMOD_ABSNEG, /* -abs() */
+ SVGA3DSRCMOD_NOT, /* ! (for predicate register) */
+} SVGA3dShaderSrcModType;
+
+/* SVGA3D instruction token */
+
+typedef struct {
+ union {
+ struct {
+ uint32 comment_op : 16;
+ uint32 comment_size : 16;
+ };
+
+ struct {
+ uint32 op : 16;
+ uint32 control : 3;
+ uint32 reserved2 : 5;
+ uint32 size : 4;
+ uint32 predicated : 1;
+ uint32 reserved1 : 1;
+ uint32 coissue : 1;
+ uint32 reserved0 : 1;
+ };
+
+ uint32 value;
+ };
+} SVGA3dShaderInstToken;
+
+/* SVGA3D destination parameter token */
+
+typedef struct {
+ union {
+ struct {
+ uint32 num : 11;
+ uint32 type_upper : 2;
+ uint32 relAddr : 1;
+ uint32 reserved1 : 2;
+ uint32 mask : 4;
+ uint32 dstMod : 4;
+ uint32 shfScale : 4;
+ uint32 type_lower : 3;
+ uint32 reserved0 : 1;
+ };
+
+ uint32 value;
+ };
+} SVGA3dShaderDestToken;
+
+/* SVGA3D source parameter token */
+
+typedef struct {
+ union {
+ struct {
+ uint32 num : 11;
+ uint32 type_upper : 2;
+ uint32 relAddr : 1;
+ uint32 reserved1 : 2;
+ uint32 swizzle : 8;
+ uint32 srcMod : 4;
+ uint32 type_lower : 3;
+ uint32 reserved0 : 1;
+ };
+
+ uint32 value;
+ };
+} SVGA3dShaderSrcToken;
+
+/* SVGA3DOP_DCL parameter tokens */
+
+typedef struct {
+ union {
+ struct {
+ union {
+ struct {
+ uint32 usage : 5;
+ uint32 reserved1 : 11;
+ uint32 index : 4;
+ uint32 reserved0 : 12;
+ }; /* input / output declaration */
+
+ struct {
+ uint32 reserved3 : 27;
+ uint32 type : 4;
+ uint32 reserved2 : 1;
+ }; /* sampler declaration */
+ };
+
+ SVGA3dShaderDestToken dst;
+ };
+
+ uint32 values[2];
+ };
+} SVGA3DOpDclArgs;
+
+/* SVGA3DOP_DEF parameter tokens */
+
+typedef struct {
+ union {
+ struct {
+ SVGA3dShaderDestToken dst;
+
+ union {
+ float constValues[4];
+ int constIValues[4];
+ Bool constBValue;
+ };
+ };
+
+ uint32 values[5];
+ };
+} SVGA3DOpDefArgs;
+
+/* SVGA3D shader token */
+
+typedef union {
+ uint32 value;
+ SVGA3dShaderInstToken inst;
+ SVGA3dShaderDestToken dest;
+ SVGA3dShaderSrcToken src;
+} SVGA3dShaderToken;
+
+/* SVGA3D shader program */
+
+typedef struct {
+ SVGA3dShaderVersion version;
+ /* SVGA3dShaderToken stream */
+} SVGA3dShaderProgram;
+
+/* SVGA3D version specific register assignments */
+
+static const uint32 SVGA3D_INPUT_REG_POSITION_VS11 = 0;
+static const uint32 SVGA3D_INPUT_REG_PSIZE_VS11 = 1;
+static const uint32 SVGA3D_INPUT_REG_FOG_VS11 = 3;
+static const uint32 SVGA3D_INPUT_REG_FOG_MASK_VS11 = SVGA3DWRITEMASK_3;
+static const uint32 SVGA3D_INPUT_REG_COLOR_BASE_VS11 = 2;
+static const uint32 SVGA3D_INPUT_REG_TEXCOORD_BASE_VS11 = 4;
+
+static const uint32 SVGA3D_INPUT_REG_COLOR_BASE_PS11 = 0;
+static const uint32 SVGA3D_INPUT_REG_TEXCOORD_BASE_PS11 = 2;
+static const uint32 SVGA3D_OUTPUT_REG_DEPTH_PS11 = 0;
+static const uint32 SVGA3D_OUTPUT_REG_COLOR_PS11 = 1;
+
+static const uint32 SVGA3D_INPUT_REG_COLOR_BASE_PS20 = 0;
+static const uint32 SVGA3D_INPUT_REG_COLOR_NUM_PS20 = 2;
+static const uint32 SVGA3D_INPUT_REG_TEXCOORD_BASE_PS20 = 2;
+static const uint32 SVGA3D_INPUT_REG_TEXCOORD_NUM_PS20 = 8;
+static const uint32 SVGA3D_OUTPUT_REG_COLOR_BASE_PS20 = 1;
+static const uint32 SVGA3D_OUTPUT_REG_COLOR_NUM_PS20 = 4;
+static const uint32 SVGA3D_OUTPUT_REG_DEPTH_BASE_PS20 = 0;
+static const uint32 SVGA3D_OUTPUT_REG_DEPTH_NUM_PS20 = 1;
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3dShaderGetRegType --
+ *
+ * As the register type is split into two non sequential fields,
+ * this function provides an useful way of accessing the actual
+ * register type without having to manually concatenate the
+ * type_upper and type_lower fields.
+ *
+ * Results:
+ * Returns the register type.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static INLINE SVGA3dShaderRegType
+SVGA3dShaderGetRegType(uint32 token)
+{
+ SVGA3dShaderSrcToken src;
+ src.value = token;
+ return (SVGA3dShaderRegType)(src.type_upper << 3 | src.type_lower);
+}
+
+#endif /* __SVGA3D_SHADER_DEFS__ */
diff --git a/src/gallium/drivers/svga/include/svga_escape.h b/src/gallium/drivers/svga/include/svga_escape.h
new file mode 100644
index 00000000000..7b85e9b8c85
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga_escape.h
@@ -0,0 +1,89 @@
+/**********************************************************
+ * Copyright 2007-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga_escape.h --
+ *
+ * Definitions for our own (vendor-specific) SVGA Escape commands.
+ */
+
+#ifndef _SVGA_ESCAPE_H_
+#define _SVGA_ESCAPE_H_
+
+
+/*
+ * Namespace IDs for the escape command
+ */
+
+#define SVGA_ESCAPE_NSID_VMWARE 0x00000000
+#define SVGA_ESCAPE_NSID_DEVEL 0xFFFFFFFF
+
+
+/*
+ * Within SVGA_ESCAPE_NSID_VMWARE, we multiplex commands according to
+ * the first DWORD of escape data (after the nsID and size). As a
+ * guideline we're using the high word and low word as a major and
+ * minor command number, respectively.
+ *
+ * Major command number allocation:
+ *
+ * 0000: Reserved
+ * 0001: SVGA_ESCAPE_VMWARE_LOG (svga_binary_logger.h)
+ * 0002: SVGA_ESCAPE_VMWARE_VIDEO (svga_overlay.h)
+ * 0003: SVGA_ESCAPE_VMWARE_HINT (svga_escape.h)
+ */
+
+#define SVGA_ESCAPE_VMWARE_MAJOR_MASK 0xFFFF0000
+
+
+/*
+ * SVGA Hint commands.
+ *
+ * These escapes let the SVGA driver provide optional information to
+ * he host about the state of the guest or guest applications. The
+ * host can use these hints to make user interface or performance
+ * decisions.
+ *
+ * Notes:
+ *
+ * - SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN is deprecated for guests
+ * that use the SVGA Screen Object extension. Instead of sending
+ * this escape, use the SVGA_SCREEN_FULLSCREEN_HINT flag on your
+ * Screen Object.
+ */
+
+#define SVGA_ESCAPE_VMWARE_HINT 0x00030000
+#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN 0x00030001 // Deprecated
+
+typedef
+struct {
+ uint32 command;
+ uint32 fullscreen;
+ struct {
+ int32 x, y;
+ } monitorPosition;
+} SVGAEscapeHintFullscreen;
+
+#endif /* _SVGA_ESCAPE_H_ */
diff --git a/src/gallium/drivers/svga/include/svga_overlay.h b/src/gallium/drivers/svga/include/svga_overlay.h
new file mode 100644
index 00000000000..82c1d3ff3e2
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga_overlay.h
@@ -0,0 +1,201 @@
+/**********************************************************
+ * Copyright 2007-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga_overlay.h --
+ *
+ * Definitions for video-overlay support.
+ */
+
+#ifndef _SVGA_OVERLAY_H_
+#define _SVGA_OVERLAY_H_
+
+#include "svga_reg.h"
+
+/*
+ * Video formats we support
+ */
+
+#define VMWARE_FOURCC_YV12 0x32315659 // 'Y' 'V' '1' '2'
+#define VMWARE_FOURCC_YUY2 0x32595559 // 'Y' 'U' 'Y' '2'
+#define VMWARE_FOURCC_UYVY 0x59565955 // 'U' 'Y' 'V' 'Y'
+
+typedef enum {
+ SVGA_OVERLAY_FORMAT_INVALID = 0,
+ SVGA_OVERLAY_FORMAT_YV12 = VMWARE_FOURCC_YV12,
+ SVGA_OVERLAY_FORMAT_YUY2 = VMWARE_FOURCC_YUY2,
+ SVGA_OVERLAY_FORMAT_UYVY = VMWARE_FOURCC_UYVY,
+} SVGAOverlayFormat;
+
+#define SVGA_VIDEO_COLORKEY_MASK 0x00ffffff
+
+#define SVGA_ESCAPE_VMWARE_VIDEO 0x00020000
+
+#define SVGA_ESCAPE_VMWARE_VIDEO_SET_REGS 0x00020001
+ /* FIFO escape layout:
+ * Type, Stream Id, (Register Id, Value) pairs */
+
+#define SVGA_ESCAPE_VMWARE_VIDEO_FLUSH 0x00020002
+ /* FIFO escape layout:
+ * Type, Stream Id */
+
+typedef
+struct SVGAEscapeVideoSetRegs {
+ struct {
+ uint32 cmdType;
+ uint32 streamId;
+ } header;
+
+ // May include zero or more items.
+ struct {
+ uint32 registerId;
+ uint32 value;
+ } items[1];
+} SVGAEscapeVideoSetRegs;
+
+typedef
+struct SVGAEscapeVideoFlush {
+ uint32 cmdType;
+ uint32 streamId;
+} SVGAEscapeVideoFlush;
+
+
+/*
+ * Struct definitions for the video overlay commands built on
+ * SVGAFifoCmdEscape.
+ */
+typedef
+struct {
+ uint32 command;
+ uint32 overlay;
+} SVGAFifoEscapeCmdVideoBase;
+
+typedef
+struct {
+ SVGAFifoEscapeCmdVideoBase videoCmd;
+} SVGAFifoEscapeCmdVideoFlush;
+
+typedef
+struct {
+ SVGAFifoEscapeCmdVideoBase videoCmd;
+ struct {
+ uint32 regId;
+ uint32 value;
+ } items[1];
+} SVGAFifoEscapeCmdVideoSetRegs;
+
+typedef
+struct {
+ SVGAFifoEscapeCmdVideoBase videoCmd;
+ struct {
+ uint32 regId;
+ uint32 value;
+ } items[SVGA_VIDEO_NUM_REGS];
+} SVGAFifoEscapeCmdVideoSetAllRegs;
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * VMwareVideoGetAttributes --
+ *
+ * Computes the size, pitches and offsets for YUV frames.
+ *
+ * Results:
+ * TRUE on success; otherwise FALSE on failure.
+ *
+ * Side effects:
+ * Pitches and offsets for the given YUV frame are put in 'pitches'
+ * and 'offsets' respectively. They are both optional though.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static INLINE Bool
+VMwareVideoGetAttributes(const SVGAOverlayFormat format, // IN
+ uint32 *width, // IN / OUT
+ uint32 *height, // IN / OUT
+ uint32 *size, // OUT
+ uint32 *pitches, // OUT (optional)
+ uint32 *offsets) // OUT (optional)
+{
+ int tmp;
+
+ *width = (*width + 1) & ~1;
+
+ if (offsets) {
+ offsets[0] = 0;
+ }
+
+ switch (format) {
+ case VMWARE_FOURCC_YV12:
+ *height = (*height + 1) & ~1;
+ *size = (*width + 3) & ~3;
+
+ if (pitches) {
+ pitches[0] = *size;
+ }
+
+ *size *= *height;
+
+ if (offsets) {
+ offsets[1] = *size;
+ }
+
+ tmp = ((*width >> 1) + 3) & ~3;
+
+ if (pitches) {
+ pitches[1] = pitches[2] = tmp;
+ }
+
+ tmp *= (*height >> 1);
+ *size += tmp;
+
+ if (offsets) {
+ offsets[2] = *size;
+ }
+
+ *size += tmp;
+ break;
+
+ case VMWARE_FOURCC_YUY2:
+ case VMWARE_FOURCC_UYVY:
+ *size = *width * 2;
+
+ if (pitches) {
+ pitches[0] = *size;
+ }
+
+ *size *= *height;
+ break;
+
+ default:
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+#endif // _SVGA_OVERLAY_H_
diff --git a/src/gallium/drivers/svga/include/svga_reg.h b/src/gallium/drivers/svga/include/svga_reg.h
new file mode 100644
index 00000000000..1b96c2ec07d
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga_reg.h
@@ -0,0 +1,1346 @@
+/**********************************************************
+ * Copyright 1998-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga_reg.h --
+ *
+ * Virtual hardware definitions for the VMware SVGA II device.
+ */
+
+#ifndef _SVGA_REG_H_
+#define _SVGA_REG_H_
+
+/*
+ * PCI device IDs.
+ */
+#define PCI_VENDOR_ID_VMWARE 0x15AD
+#define PCI_DEVICE_ID_VMWARE_SVGA2 0x0405
+
+/*
+ * Legal values for the SVGA_REG_CURSOR_ON register in old-fashioned
+ * cursor bypass mode. This is still supported, but no new guest
+ * drivers should use it.
+ */
+#define SVGA_CURSOR_ON_HIDE 0x0 /* Must be 0 to maintain backward compatibility */
+#define SVGA_CURSOR_ON_SHOW 0x1 /* Must be 1 to maintain backward compatibility */
+#define SVGA_CURSOR_ON_REMOVE_FROM_FB 0x2 /* Remove the cursor from the framebuffer because we need to see what's under it */
+#define SVGA_CURSOR_ON_RESTORE_TO_FB 0x3 /* Put the cursor back in the framebuffer so the user can see it */
+
+/*
+ * The maximum framebuffer size that can traced for e.g. guests in VESA mode.
+ * The changeMap in the monitor is proportional to this number. Therefore, we'd
+ * like to keep it as small as possible to reduce monitor overhead (using
+ * SVGA_VRAM_MAX_SIZE for this increases the size of the shared area by over
+ * 4k!).
+ *
+ * NB: For compatibility reasons, this value must be greater than 0xff0000.
+ * See bug 335072.
+ */
+#define SVGA_FB_MAX_TRACEABLE_SIZE 0x1000000
+
+#define SVGA_MAX_PSEUDOCOLOR_DEPTH 8
+#define SVGA_MAX_PSEUDOCOLORS (1 << SVGA_MAX_PSEUDOCOLOR_DEPTH)
+#define SVGA_NUM_PALETTE_REGS (3 * SVGA_MAX_PSEUDOCOLORS)
+
+#define SVGA_MAGIC 0x900000UL
+#define SVGA_MAKE_ID(ver) (SVGA_MAGIC << 8 | (ver))
+
+/* Version 2 let the address of the frame buffer be unsigned on Win32 */
+#define SVGA_VERSION_2 2
+#define SVGA_ID_2 SVGA_MAKE_ID(SVGA_VERSION_2)
+
+/* Version 1 has new registers starting with SVGA_REG_CAPABILITIES so
+ PALETTE_BASE has moved */
+#define SVGA_VERSION_1 1
+#define SVGA_ID_1 SVGA_MAKE_ID(SVGA_VERSION_1)
+
+/* Version 0 is the initial version */
+#define SVGA_VERSION_0 0
+#define SVGA_ID_0 SVGA_MAKE_ID(SVGA_VERSION_0)
+
+/* "Invalid" value for all SVGA IDs. (Version ID, screen object ID, surface ID...) */
+#define SVGA_ID_INVALID 0xFFFFFFFF
+
+/* Port offsets, relative to BAR0 */
+#define SVGA_INDEX_PORT 0x0
+#define SVGA_VALUE_PORT 0x1
+#define SVGA_BIOS_PORT 0x2
+#define SVGA_IRQSTATUS_PORT 0x8
+
+/*
+ * Interrupt source flags for IRQSTATUS_PORT and IRQMASK.
+ *
+ * Interrupts are only supported when the
+ * SVGA_CAP_IRQMASK capability is present.
+ */
+#define SVGA_IRQFLAG_ANY_FENCE 0x1 /* Any fence was passed */
+#define SVGA_IRQFLAG_FIFO_PROGRESS 0x2 /* Made forward progress in the FIFO */
+#define SVGA_IRQFLAG_FENCE_GOAL 0x4 /* SVGA_FIFO_FENCE_GOAL reached */
+
+/*
+ * Registers
+ */
+
+enum {
+ SVGA_REG_ID = 0,
+ SVGA_REG_ENABLE = 1,
+ SVGA_REG_WIDTH = 2,
+ SVGA_REG_HEIGHT = 3,
+ SVGA_REG_MAX_WIDTH = 4,
+ SVGA_REG_MAX_HEIGHT = 5,
+ SVGA_REG_DEPTH = 6,
+ SVGA_REG_BITS_PER_PIXEL = 7, /* Current bpp in the guest */
+ SVGA_REG_PSEUDOCOLOR = 8,
+ SVGA_REG_RED_MASK = 9,
+ SVGA_REG_GREEN_MASK = 10,
+ SVGA_REG_BLUE_MASK = 11,
+ SVGA_REG_BYTES_PER_LINE = 12,
+ SVGA_REG_FB_START = 13, /* (Deprecated) */
+ SVGA_REG_FB_OFFSET = 14,
+ SVGA_REG_VRAM_SIZE = 15,
+ SVGA_REG_FB_SIZE = 16,
+
+ /* ID 0 implementation only had the above registers, then the palette */
+
+ SVGA_REG_CAPABILITIES = 17,
+ SVGA_REG_MEM_START = 18, /* (Deprecated) */
+ SVGA_REG_MEM_SIZE = 19,
+ SVGA_REG_CONFIG_DONE = 20, /* Set when memory area configured */
+ SVGA_REG_SYNC = 21, /* See "FIFO Synchronization Registers" */
+ SVGA_REG_BUSY = 22, /* See "FIFO Synchronization Registers" */
+ SVGA_REG_GUEST_ID = 23, /* Set guest OS identifier */
+ SVGA_REG_CURSOR_ID = 24, /* (Deprecated) */
+ SVGA_REG_CURSOR_X = 25, /* (Deprecated) */
+ SVGA_REG_CURSOR_Y = 26, /* (Deprecated) */
+ SVGA_REG_CURSOR_ON = 27, /* (Deprecated) */
+ SVGA_REG_HOST_BITS_PER_PIXEL = 28, /* (Deprecated) */
+ SVGA_REG_SCRATCH_SIZE = 29, /* Number of scratch registers */
+ SVGA_REG_MEM_REGS = 30, /* Number of FIFO registers */
+ SVGA_REG_NUM_DISPLAYS = 31, /* (Deprecated) */
+ SVGA_REG_PITCHLOCK = 32, /* Fixed pitch for all modes */
+ SVGA_REG_IRQMASK = 33, /* Interrupt mask */
+
+ /* Legacy multi-monitor support */
+ SVGA_REG_NUM_GUEST_DISPLAYS = 34,/* Number of guest displays in X/Y direction */
+ SVGA_REG_DISPLAY_ID = 35, /* Display ID for the following display attributes */
+ SVGA_REG_DISPLAY_IS_PRIMARY = 36,/* Whether this is a primary display */
+ SVGA_REG_DISPLAY_POSITION_X = 37,/* The display position x */
+ SVGA_REG_DISPLAY_POSITION_Y = 38,/* The display position y */
+ SVGA_REG_DISPLAY_WIDTH = 39, /* The display's width */
+ SVGA_REG_DISPLAY_HEIGHT = 40, /* The display's height */
+
+ /* See "Guest memory regions" below. */
+ SVGA_REG_GMR_ID = 41,
+ SVGA_REG_GMR_DESCRIPTOR = 42,
+ SVGA_REG_GMR_MAX_IDS = 43,
+ SVGA_REG_GMR_MAX_DESCRIPTOR_LENGTH = 44,
+
+ SVGA_REG_TRACES = 45, /* Enable trace-based updates even when FIFO is on */
+ SVGA_REG_TOP = 46, /* Must be 1 more than the last register */
+
+ SVGA_PALETTE_BASE = 1024, /* Base of SVGA color map */
+ /* Next 768 (== 256*3) registers exist for colormap */
+
+ SVGA_SCRATCH_BASE = SVGA_PALETTE_BASE + SVGA_NUM_PALETTE_REGS
+ /* Base of scratch registers */
+ /* Next reg[SVGA_REG_SCRATCH_SIZE] registers exist for scratch usage:
+ First 4 are reserved for VESA BIOS Extension; any remaining are for
+ the use of the current SVGA driver. */
+};
+
+
+/*
+ * Guest memory regions (GMRs):
+ *
+ * This is a new memory mapping feature available in SVGA devices
+ * which have the SVGA_CAP_GMR bit set. Previously, there were two
+ * fixed memory regions available with which to share data between the
+ * device and the driver: the FIFO ('MEM') and the framebuffer. GMRs
+ * are our name for an extensible way of providing arbitrary DMA
+ * buffers for use between the driver and the SVGA device. They are a
+ * new alternative to framebuffer memory, usable for both 2D and 3D
+ * graphics operations.
+ *
+ * Since GMR mapping must be done synchronously with guest CPU
+ * execution, we use a new pair of SVGA registers:
+ *
+ * SVGA_REG_GMR_ID --
+ *
+ * Read/write.
+ * This register holds the 32-bit ID (a small positive integer)
+ * of a GMR to create, delete, or redefine. Writing this register
+ * has no side-effects.
+ *
+ * SVGA_REG_GMR_DESCRIPTOR --
+ *
+ * Write-only.
+ * Writing this register will create, delete, or redefine the GMR
+ * specified by the above ID register. If this register is zero,
+ * the GMR is deleted. Any pointers into this GMR (including those
+ * currently being processed by FIFO commands) will be
+ * synchronously invalidated.
+ *
+ * If this register is nonzero, it must be the physical page
+ * number (PPN) of a data structure which describes the physical
+ * layout of the memory region this GMR should describe. The
+ * descriptor structure will be read synchronously by the SVGA
+ * device when this register is written. The descriptor need not
+ * remain allocated for the lifetime of the GMR.
+ *
+ * The guest driver should write SVGA_REG_GMR_ID first, then
+ * SVGA_REG_GMR_DESCRIPTOR.
+ *
+ * SVGA_REG_GMR_MAX_IDS --
+ *
+ * Read-only.
+ * The SVGA device may choose to support a maximum number of
+ * user-defined GMR IDs. This register holds the number of supported
+ * IDs. (The maximum supported ID plus 1)
+ *
+ * SVGA_REG_GMR_MAX_DESCRIPTOR_LENGTH --
+ *
+ * Read-only.
+ * The SVGA device may choose to put a limit on the total number
+ * of SVGAGuestMemDescriptor structures it will read when defining
+ * a single GMR.
+ *
+ * The descriptor structure is an array of SVGAGuestMemDescriptor
+ * structures. Each structure may do one of three things:
+ *
+ * - Terminate the GMR descriptor list.
+ * (ppn==0, numPages==0)
+ *
+ * - Add a PPN or range of PPNs to the GMR's virtual address space.
+ * (ppn != 0, numPages != 0)
+ *
+ * - Provide the PPN of the next SVGAGuestMemDescriptor, in order to
+ * support multi-page GMR descriptor tables without forcing the
+ * driver to allocate physically contiguous memory.
+ * (ppn != 0, numPages == 0)
+ *
+ * Note that each physical page of SVGAGuestMemDescriptor structures
+ * can describe at least 2MB of guest memory. If the driver needs to
+ * use more than one page of descriptor structures, it must use one of
+ * its SVGAGuestMemDescriptors to point to an additional page. The
+ * device will never automatically cross a page boundary.
+ *
+ * Once the driver has described a GMR, it is immediately available
+ * for use via any FIFO command that uses an SVGAGuestPtr structure.
+ * These pointers include a GMR identifier plus an offset into that
+ * GMR.
+ *
+ * The driver must check the SVGA_CAP_GMR bit before using the GMR
+ * registers.
+ */
+
+/*
+ * Special GMR IDs, allowing SVGAGuestPtrs to point to framebuffer
+ * memory as well. In the future, these IDs could even be used to
+ * allow legacy memory regions to be redefined by the guest as GMRs.
+ *
+ * Using the guest framebuffer (GFB) at BAR1 for general purpose DMA
+ * is being phased out. Please try to use user-defined GMRs whenever
+ * possible.
+ */
+#define SVGA_GMR_NULL ((uint32) -1)
+#define SVGA_GMR_FRAMEBUFFER ((uint32) -2) // Guest Framebuffer (GFB)
+
+typedef
+struct SVGAGuestMemDescriptor {
+ uint32 ppn;
+ uint32 numPages;
+} SVGAGuestMemDescriptor;
+
+typedef
+struct SVGAGuestPtr {
+ uint32 gmrId;
+ uint32 offset;
+} SVGAGuestPtr;
+
+
+/*
+ * SVGAGMRImageFormat --
+ *
+ * This is a packed representation of the source 2D image format
+ * for a GMR-to-screen blit. Currently it is defined as an encoding
+ * of the screen's color depth and bits-per-pixel, however, 16 bits
+ * are reserved for future use to identify other encodings (such as
+ * RGBA or higher-precision images).
+ *
+ * Currently supported formats:
+ *
+ * bpp depth Format Name
+ * --- ----- -----------
+ * 32 24 32-bit BGRX
+ * 24 24 24-bit BGR
+ * 16 16 RGB 5-6-5
+ * 16 15 RGB 5-5-5
+ *
+ */
+
+typedef
+struct SVGAGMRImageFormat {
+ union {
+ struct {
+ uint32 bitsPerPixel : 8;
+ uint32 colorDepth : 8;
+ uint32 reserved : 16; // Must be zero
+ };
+
+ uint32 value;
+ };
+} SVGAGMRImageFormat;
+
+/*
+ * SVGAColorBGRX --
+ *
+ * A 24-bit color format (BGRX), which does not depend on the
+ * format of the legacy guest framebuffer (GFB) or the current
+ * GMRFB state.
+ */
+
+typedef
+struct SVGAColorBGRX {
+ union {
+ struct {
+ uint32 b : 8;
+ uint32 g : 8;
+ uint32 r : 8;
+ uint32 x : 8; // Unused
+ };
+
+ uint32 value;
+ };
+} SVGAColorBGRX;
+
+
+/*
+ * SVGASignedRect --
+ * SVGASignedPoint --
+ *
+ * Signed rectangle and point primitives. These are used by the new
+ * 2D primitives for drawing to Screen Objects, which can occupy a
+ * signed virtual coordinate space.
+ *
+ * SVGASignedRect specifies a half-open interval: the (left, top)
+ * pixel is part of the rectangle, but the (right, bottom) pixel is
+ * not.
+ */
+
+typedef
+struct SVGASignedRect {
+ int32 left;
+ int32 top;
+ int32 right;
+ int32 bottom;
+} SVGASignedRect;
+
+typedef
+struct SVGASignedPoint {
+ int32 x;
+ int32 y;
+} SVGASignedPoint;
+
+
+/*
+ * Capabilities
+ *
+ * Note the holes in the bitfield. Missing bits have been deprecated,
+ * and must not be reused. Those capabilities will never be reported
+ * by new versions of the SVGA device.
+ */
+
+#define SVGA_CAP_NONE 0x00000000
+#define SVGA_CAP_RECT_COPY 0x00000002
+#define SVGA_CAP_CURSOR 0x00000020
+#define SVGA_CAP_CURSOR_BYPASS 0x00000040 // Legacy (Use Cursor Bypass 3 instead)
+#define SVGA_CAP_CURSOR_BYPASS_2 0x00000080 // Legacy (Use Cursor Bypass 3 instead)
+#define SVGA_CAP_8BIT_EMULATION 0x00000100
+#define SVGA_CAP_ALPHA_CURSOR 0x00000200
+#define SVGA_CAP_3D 0x00004000
+#define SVGA_CAP_EXTENDED_FIFO 0x00008000
+#define SVGA_CAP_MULTIMON 0x00010000 // Legacy multi-monitor support
+#define SVGA_CAP_PITCHLOCK 0x00020000
+#define SVGA_CAP_IRQMASK 0x00040000
+#define SVGA_CAP_DISPLAY_TOPOLOGY 0x00080000 // Legacy multi-monitor support
+#define SVGA_CAP_GMR 0x00100000
+#define SVGA_CAP_TRACES 0x00200000
+
+
+/*
+ * FIFO register indices.
+ *
+ * The FIFO is a chunk of device memory mapped into guest physmem. It
+ * is always treated as 32-bit words.
+ *
+ * The guest driver gets to decide how to partition it between
+ * - FIFO registers (there are always at least 4, specifying where the
+ * following data area is and how much data it contains; there may be
+ * more registers following these, depending on the FIFO protocol
+ * version in use)
+ * - FIFO data, written by the guest and slurped out by the VMX.
+ * These indices are 32-bit word offsets into the FIFO.
+ */
+
+enum {
+ /*
+ * Block 1 (basic registers): The originally defined FIFO registers.
+ * These exist and are valid for all versions of the FIFO protocol.
+ */
+
+ SVGA_FIFO_MIN = 0,
+ SVGA_FIFO_MAX, /* The distance from MIN to MAX must be at least 10K */
+ SVGA_FIFO_NEXT_CMD,
+ SVGA_FIFO_STOP,
+
+ /*
+ * Block 2 (extended registers): Mandatory registers for the extended
+ * FIFO. These exist if the SVGA caps register includes
+ * SVGA_CAP_EXTENDED_FIFO; some of them are valid only if their
+ * associated capability bit is enabled.
+ *
+ * Note that when originally defined, SVGA_CAP_EXTENDED_FIFO implied
+ * support only for (FIFO registers) CAPABILITIES, FLAGS, and FENCE.
+ * This means that the guest has to test individually (in most cases
+ * using FIFO caps) for the presence of registers after this; the VMX
+ * can define "extended FIFO" to mean whatever it wants, and currently
+ * won't enable it unless there's room for that set and much more.
+ */
+
+ SVGA_FIFO_CAPABILITIES = 4,
+ SVGA_FIFO_FLAGS,
+ // Valid with SVGA_FIFO_CAP_FENCE:
+ SVGA_FIFO_FENCE,
+
+ /*
+ * Block 3a (optional extended registers): Additional registers for the
+ * extended FIFO, whose presence isn't actually implied by
+ * SVGA_CAP_EXTENDED_FIFO; these exist if SVGA_FIFO_MIN is high enough to
+ * leave room for them.
+ *
+ * These in block 3a, the VMX currently considers mandatory for the
+ * extended FIFO.
+ */
+
+ // Valid if exists (i.e. if extended FIFO enabled):
+ SVGA_FIFO_3D_HWVERSION, /* See SVGA3dHardwareVersion in svga3d_reg.h */
+ // Valid with SVGA_FIFO_CAP_PITCHLOCK:
+ SVGA_FIFO_PITCHLOCK,
+
+ // Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3:
+ SVGA_FIFO_CURSOR_ON, /* Cursor bypass 3 show/hide register */
+ SVGA_FIFO_CURSOR_X, /* Cursor bypass 3 x register */
+ SVGA_FIFO_CURSOR_Y, /* Cursor bypass 3 y register */
+ SVGA_FIFO_CURSOR_COUNT, /* Incremented when any of the other 3 change */
+ SVGA_FIFO_CURSOR_LAST_UPDATED,/* Last time the host updated the cursor */
+
+ // Valid with SVGA_FIFO_CAP_RESERVE:
+ SVGA_FIFO_RESERVED, /* Bytes past NEXT_CMD with real contents */
+
+ /*
+ * Valid with SVGA_FIFO_CAP_SCREEN_OBJECT:
+ *
+ * By default this is SVGA_ID_INVALID, to indicate that the cursor
+ * coordinates are specified relative to the virtual root. If this
+ * is set to a specific screen ID, cursor position is reinterpreted
+ * as a signed offset relative to that screen's origin. This is the
+ * only way to place the cursor on a non-rooted screen.
+ */
+ SVGA_FIFO_CURSOR_SCREEN_ID,
+
+ /*
+ * XXX: The gap here, up until SVGA_FIFO_3D_CAPS, can be used for new
+ * registers, but this must be done carefully and with judicious use of
+ * capability bits, since comparisons based on SVGA_FIFO_MIN aren't
+ * enough to tell you whether the register exists: we've shipped drivers
+ * and products that used SVGA_FIFO_3D_CAPS but didn't know about some of
+ * the earlier ones. The actual order of introduction was:
+ * - PITCHLOCK
+ * - 3D_CAPS
+ * - CURSOR_* (cursor bypass 3)
+ * - RESERVED
+ * So, code that wants to know whether it can use any of the
+ * aforementioned registers, or anything else added after PITCHLOCK and
+ * before 3D_CAPS, needs to reason about something other than
+ * SVGA_FIFO_MIN.
+ */
+
+ /*
+ * 3D caps block space; valid with 3D hardware version >=
+ * SVGA3D_HWVERSION_WS6_B1.
+ */
+ SVGA_FIFO_3D_CAPS = 32,
+ SVGA_FIFO_3D_CAPS_LAST = 32 + 255,
+
+ /*
+ * End of VMX's current definition of "extended-FIFO registers".
+ * Registers before here are always enabled/disabled as a block; either
+ * the extended FIFO is enabled and includes all preceding registers, or
+ * it's disabled entirely.
+ *
+ * Block 3b (truly optional extended registers): Additional registers for
+ * the extended FIFO, which the VMX already knows how to enable and
+ * disable with correct granularity.
+ *
+ * Registers after here exist if and only if the guest SVGA driver
+ * sets SVGA_FIFO_MIN high enough to leave room for them.
+ */
+
+ // Valid if register exists:
+ SVGA_FIFO_GUEST_3D_HWVERSION, /* Guest driver's 3D version */
+ SVGA_FIFO_FENCE_GOAL, /* Matching target for SVGA_IRQFLAG_FENCE_GOAL */
+ SVGA_FIFO_BUSY, /* See "FIFO Synchronization Registers" */
+
+ /*
+ * Always keep this last. This defines the maximum number of
+ * registers we know about. At power-on, this value is placed in
+ * the SVGA_REG_MEM_REGS register, and we expect the guest driver
+ * to allocate this much space in FIFO memory for registers.
+ */
+ SVGA_FIFO_NUM_REGS
+};
+
+
+/*
+ * Definition of registers included in extended FIFO support.
+ *
+ * The guest SVGA driver gets to allocate the FIFO between registers
+ * and data. It must always allocate at least 4 registers, but old
+ * drivers stopped there.
+ *
+ * The VMX will enable extended FIFO support if and only if the guest
+ * left enough room for all registers defined as part of the mandatory
+ * set for the extended FIFO.
+ *
+ * Note that the guest drivers typically allocate the FIFO only at
+ * initialization time, not at mode switches, so it's likely that the
+ * number of FIFO registers won't change without a reboot.
+ *
+ * All registers less than this value are guaranteed to be present if
+ * svgaUser->fifo.extended is set. Any later registers must be tested
+ * individually for compatibility at each use (in the VMX).
+ *
+ * This value is used only by the VMX, so it can change without
+ * affecting driver compatibility; keep it that way?
+ */
+#define SVGA_FIFO_EXTENDED_MANDATORY_REGS (SVGA_FIFO_3D_CAPS_LAST + 1)
+
+
+/*
+ * FIFO Synchronization Registers
+ *
+ * This explains the relationship between the various FIFO
+ * sync-related registers in IOSpace and in FIFO space.
+ *
+ * SVGA_REG_SYNC --
+ *
+ * The SYNC register can be used in two different ways by the guest:
+ *
+ * 1. If the guest wishes to fully sync (drain) the FIFO,
+ * it will write once to SYNC then poll on the BUSY
+ * register. The FIFO is sync'ed once BUSY is zero.
+ *
+ * 2. If the guest wants to asynchronously wake up the host,
+ * it will write once to SYNC without polling on BUSY.
+ * Ideally it will do this after some new commands have
+ * been placed in the FIFO, and after reading a zero
+ * from SVGA_FIFO_BUSY.
+ *
+ * (1) is the original behaviour that SYNC was designed to
+ * support. Originally, a write to SYNC would implicitly
+ * trigger a read from BUSY. This causes us to synchronously
+ * process the FIFO.
+ *
+ * This behaviour has since been changed so that writing SYNC
+ * will *not* implicitly cause a read from BUSY. Instead, it
+ * makes a channel call which asynchronously wakes up the MKS
+ * thread.
+ *
+ * New guests can use this new behaviour to implement (2)
+ * efficiently. This lets guests get the host's attention
+ * without waiting for the MKS to poll, which gives us much
+ * better CPU utilization on SMP hosts and on UP hosts while
+ * we're blocked on the host GPU.
+ *
+ * Old guests shouldn't notice the behaviour change. SYNC was
+ * never guaranteed to process the entire FIFO, since it was
+ * bounded to a particular number of CPU cycles. Old guests will
+ * still loop on the BUSY register until the FIFO is empty.
+ *
+ * Writing to SYNC currently has the following side-effects:
+ *
+ * - Sets SVGA_REG_BUSY to TRUE (in the monitor)
+ * - Asynchronously wakes up the MKS thread for FIFO processing
+ * - The value written to SYNC is recorded as a "reason", for
+ * stats purposes.
+ *
+ * If SVGA_FIFO_BUSY is available, drivers are advised to only
+ * write to SYNC if SVGA_FIFO_BUSY is FALSE. Drivers should set
+ * SVGA_FIFO_BUSY to TRUE after writing to SYNC. The MKS will
+ * eventually set SVGA_FIFO_BUSY on its own, but this approach
+ * lets the driver avoid sending multiple asynchronous wakeup
+ * messages to the MKS thread.
+ *
+ * SVGA_REG_BUSY --
+ *
+ * This register is set to TRUE when SVGA_REG_SYNC is written,
+ * and it reads as FALSE when the FIFO has been completely
+ * drained.
+ *
+ * Every read from this register causes us to synchronously
+ * process FIFO commands. There is no guarantee as to how many
+ * commands each read will process.
+ *
+ * CPU time spent processing FIFO commands will be billed to
+ * the guest.
+ *
+ * New drivers should avoid using this register unless they
+ * need to guarantee that the FIFO is completely drained. It
+ * is overkill for performing a sync-to-fence. Older drivers
+ * will use this register for any type of synchronization.
+ *
+ * SVGA_FIFO_BUSY --
+ *
+ * This register is a fast way for the guest driver to check
+ * whether the FIFO is already being processed. It reads and
+ * writes at normal RAM speeds, with no monitor intervention.
+ *
+ * If this register reads as TRUE, the host is guaranteeing that
+ * any new commands written into the FIFO will be noticed before
+ * the MKS goes back to sleep.
+ *
+ * If this register reads as FALSE, no such guarantee can be
+ * made.
+ *
+ * The guest should use this register to quickly determine
+ * whether or not it needs to wake up the host. If the guest
+ * just wrote a command or group of commands that it would like
+ * the host to begin processing, it should:
+ *
+ * 1. Read SVGA_FIFO_BUSY. If it reads as TRUE, no further
+ * action is necessary.
+ *
+ * 2. Write TRUE to SVGA_FIFO_BUSY. This informs future guest
+ * code that we've already sent a SYNC to the host and we
+ * don't need to send a duplicate.
+ *
+ * 3. Write a reason to SVGA_REG_SYNC. This will send an
+ * asynchronous wakeup to the MKS thread.
+ */
+
+
+/*
+ * FIFO Capabilities
+ *
+ * Fence -- Fence register and command are supported
+ * Accel Front -- Front buffer only commands are supported
+ * Pitch Lock -- Pitch lock register is supported
+ * Video -- SVGA Video overlay units are supported
+ * Escape -- Escape command is supported
+ *
+ * XXX: Add longer descriptions for each capability, including a list
+ * of the new features that each capability provides.
+ *
+ * SVGA_FIFO_CAP_SCREEN_OBJECT --
+ *
+ * Provides dynamic multi-screen rendering, for improved Unity and
+ * multi-monitor modes. With Screen Object, the guest can
+ * dynamically create and destroy 'screens', which can represent
+ * Unity windows or virtual monitors. Screen Object also provides
+ * strong guarantees that DMA operations happen only when
+ * guest-initiated. Screen Object deprecates the BAR1 guest
+ * framebuffer (GFB) and all commands that work only with the GFB.
+ *
+ * New registers:
+ * FIFO_CURSOR_SCREEN_ID, VIDEO_DATA_GMRID, VIDEO_DST_SCREEN_ID
+ *
+ * New 2D commands:
+ * DEFINE_SCREEN, DESTROY_SCREEN, DEFINE_GMRFB, BLIT_GMRFB_TO_SCREEN,
+ * BLIT_SCREEN_TO_GMRFB, ANNOTATION_FILL, ANNOTATION_COPY
+ *
+ * New 3D commands:
+ * BLIT_SURFACE_TO_SCREEN
+ *
+ * New guarantees:
+ *
+ * - The host will not read or write guest memory, including the GFB,
+ * except when explicitly initiated by a DMA command.
+ *
+ * - All DMA, including legacy DMA like UPDATE and PRESENT_READBACK,
+ * is guaranteed to complete before any subsequent FENCEs.
+ *
+ * - All legacy commands which affect a Screen (UPDATE, PRESENT,
+ * PRESENT_READBACK) as well as new Screen blit commands will
+ * all behave consistently as blits, and memory will be read
+ * or written in FIFO order.
+ *
+ * For example, if you PRESENT from one SVGA3D surface to multiple
+ * places on the screen, the data copied will always be from the
+ * SVGA3D surface at the time the PRESENT was issued in the FIFO.
+ * This was not necessarily true on devices without Screen Object.
+ *
+ * This means that on devices that support Screen Object, the
+ * PRESENT_READBACK command should not be necessary unless you
+ * actually want to read back the results of 3D rendering into
+ * system memory. (And for that, the BLIT_SCREEN_TO_GMRFB
+ * command provides a strict superset of functionality.)
+ *
+ * - When a screen is resized, either using Screen Object commands or
+ * legacy multimon registers, its contents are preserved.
+ */
+
+#define SVGA_FIFO_CAP_NONE 0
+#define SVGA_FIFO_CAP_FENCE (1<<0)
+#define SVGA_FIFO_CAP_ACCELFRONT (1<<1)
+#define SVGA_FIFO_CAP_PITCHLOCK (1<<2)
+#define SVGA_FIFO_CAP_VIDEO (1<<3)
+#define SVGA_FIFO_CAP_CURSOR_BYPASS_3 (1<<4)
+#define SVGA_FIFO_CAP_ESCAPE (1<<5)
+#define SVGA_FIFO_CAP_RESERVE (1<<6)
+#define SVGA_FIFO_CAP_SCREEN_OBJECT (1<<7)
+
+
+/*
+ * FIFO Flags
+ *
+ * Accel Front -- Driver should use front buffer only commands
+ */
+
+#define SVGA_FIFO_FLAG_NONE 0
+#define SVGA_FIFO_FLAG_ACCELFRONT (1<<0)
+#define SVGA_FIFO_FLAG_RESERVED (1<<31) // Internal use only
+
+/*
+ * FIFO reservation sentinel value
+ */
+
+#define SVGA_FIFO_RESERVED_UNKNOWN 0xffffffff
+
+
+/*
+ * Video overlay support
+ */
+
+#define SVGA_NUM_OVERLAY_UNITS 32
+
+
+/*
+ * Video capabilities that the guest is currently using
+ */
+
+#define SVGA_VIDEO_FLAG_COLORKEY 0x0001
+
+
+/*
+ * Offsets for the video overlay registers
+ */
+
+enum {
+ SVGA_VIDEO_ENABLED = 0,
+ SVGA_VIDEO_FLAGS,
+ SVGA_VIDEO_DATA_OFFSET,
+ SVGA_VIDEO_FORMAT,
+ SVGA_VIDEO_COLORKEY,
+ SVGA_VIDEO_SIZE, // Deprecated
+ SVGA_VIDEO_WIDTH,
+ SVGA_VIDEO_HEIGHT,
+ SVGA_VIDEO_SRC_X,
+ SVGA_VIDEO_SRC_Y,
+ SVGA_VIDEO_SRC_WIDTH,
+ SVGA_VIDEO_SRC_HEIGHT,
+ SVGA_VIDEO_DST_X, // Signed int32
+ SVGA_VIDEO_DST_Y, // Signed int32
+ SVGA_VIDEO_DST_WIDTH,
+ SVGA_VIDEO_DST_HEIGHT,
+ SVGA_VIDEO_PITCH_1,
+ SVGA_VIDEO_PITCH_2,
+ SVGA_VIDEO_PITCH_3,
+ SVGA_VIDEO_DATA_GMRID, // Optional, defaults to SVGA_GMR_FRAMEBUFFER
+ SVGA_VIDEO_DST_SCREEN_ID, // Optional, defaults to virtual coords (SVGA_ID_INVALID)
+ SVGA_VIDEO_NUM_REGS
+};
+
+
+/*
+ * SVGA Overlay Units
+ *
+ * width and height relate to the entire source video frame.
+ * srcX, srcY, srcWidth and srcHeight represent subset of the source
+ * video frame to be displayed.
+ */
+
+typedef struct SVGAOverlayUnit {
+ uint32 enabled;
+ uint32 flags;
+ uint32 dataOffset;
+ uint32 format;
+ uint32 colorKey;
+ uint32 size;
+ uint32 width;
+ uint32 height;
+ uint32 srcX;
+ uint32 srcY;
+ uint32 srcWidth;
+ uint32 srcHeight;
+ int32 dstX;
+ int32 dstY;
+ uint32 dstWidth;
+ uint32 dstHeight;
+ uint32 pitches[3];
+ uint32 dataGMRId;
+ uint32 dstScreenId;
+} SVGAOverlayUnit;
+
+
+/*
+ * SVGAScreenObject --
+ *
+ * This is a new way to represent a guest's multi-monitor screen or
+ * Unity window. Screen objects are only supported if the
+ * SVGA_FIFO_CAP_SCREEN_OBJECT capability bit is set.
+ *
+ * If Screen Objects are supported, they can be used to fully
+ * replace the functionality provided by the framebuffer registers
+ * (SVGA_REG_WIDTH, HEIGHT, etc.) and by SVGA_CAP_DISPLAY_TOPOLOGY.
+ *
+ * The screen object is a struct with guaranteed binary
+ * compatibility. New flags can be added, and the struct may grow,
+ * but existing fields must retain their meaning.
+ *
+ */
+
+#define SVGA_SCREEN_HAS_ROOT (1 << 0) // Screen is present in the virtual coord space
+#define SVGA_SCREEN_IS_PRIMARY (1 << 1) // Guest considers this screen to be 'primary'
+#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2) // Guest is running a fullscreen app here
+
+typedef
+struct SVGAScreenObject {
+ uint32 structSize; // sizeof(SVGAScreenObject)
+ uint32 id;
+ uint32 flags;
+ struct {
+ uint32 width;
+ uint32 height;
+ } size;
+ struct {
+ int32 x;
+ int32 y;
+ } root; // Only used if SVGA_SCREEN_HAS_ROOT is set.
+} SVGAScreenObject;
+
+
+/*
+ * Commands in the command FIFO:
+ *
+ * Command IDs defined below are used for the traditional 2D FIFO
+ * communication (not all commands are available for all versions of the
+ * SVGA FIFO protocol).
+ *
+ * Note the holes in the command ID numbers: These commands have been
+ * deprecated, and the old IDs must not be reused.
+ *
+ * Command IDs from 1000 to 1999 are reserved for use by the SVGA3D
+ * protocol.
+ *
+ * Each command's parameters are described by the comments and
+ * structs below.
+ */
+
+typedef enum {
+ SVGA_CMD_INVALID_CMD = 0,
+ SVGA_CMD_UPDATE = 1,
+ SVGA_CMD_RECT_COPY = 3,
+ SVGA_CMD_DEFINE_CURSOR = 19,
+ SVGA_CMD_DEFINE_ALPHA_CURSOR = 22,
+ SVGA_CMD_UPDATE_VERBOSE = 25,
+ SVGA_CMD_FRONT_ROP_FILL = 29,
+ SVGA_CMD_FENCE = 30,
+ SVGA_CMD_ESCAPE = 33,
+ SVGA_CMD_DEFINE_SCREEN = 34,
+ SVGA_CMD_DESTROY_SCREEN = 35,
+ SVGA_CMD_DEFINE_GMRFB = 36,
+ SVGA_CMD_BLIT_GMRFB_TO_SCREEN = 37,
+ SVGA_CMD_BLIT_SCREEN_TO_GMRFB = 38,
+ SVGA_CMD_ANNOTATION_FILL = 39,
+ SVGA_CMD_ANNOTATION_COPY = 40,
+ SVGA_CMD_MAX
+} SVGAFifoCmdId;
+
+#define SVGA_CMD_MAX_ARGS 64
+
+
+/*
+ * SVGA_CMD_UPDATE --
+ *
+ * This is a DMA transfer which copies from the Guest Framebuffer
+ * (GFB) at BAR1 + SVGA_REG_FB_OFFSET to any screens which
+ * intersect with the provided virtual rectangle.
+ *
+ * This command does not support using arbitrary guest memory as a
+ * data source- it only works with the pre-defined GFB memory.
+ * This command also does not support signed virtual coordinates.
+ * If you have defined screens (using SVGA_CMD_DEFINE_SCREEN) with
+ * negative root x/y coordinates, the negative portion of those
+ * screens will not be reachable by this command.
+ *
+ * This command is not necessary when using framebuffer
+ * traces. Traces are automatically enabled if the SVGA FIFO is
+ * disabled, and you may explicitly enable/disable traces using
+ * SVGA_REG_TRACES. With traces enabled, any write to the GFB will
+ * automatically act as if a subsequent SVGA_CMD_UPDATE was issued.
+ *
+ * Traces and SVGA_CMD_UPDATE are the only supported ways to render
+ * pseudocolor screen updates. The newer Screen Object commands
+ * only support true color formats.
+ *
+ * Availability:
+ * Always available.
+ */
+
+typedef
+struct {
+ uint32 x;
+ uint32 y;
+ uint32 width;
+ uint32 height;
+} SVGAFifoCmdUpdate;
+
+
+/*
+ * SVGA_CMD_RECT_COPY --
+ *
+ * Perform a rectangular DMA transfer from one area of the GFB to
+ * another, and copy the result to any screens which intersect it.
+ *
+ * Availability:
+ * SVGA_CAP_RECT_COPY
+ */
+
+typedef
+struct {
+ uint32 srcX;
+ uint32 srcY;
+ uint32 destX;
+ uint32 destY;
+ uint32 width;
+ uint32 height;
+} SVGAFifoCmdRectCopy;
+
+
+/*
+ * SVGA_CMD_DEFINE_CURSOR --
+ *
+ * Provide a new cursor image, as an AND/XOR mask.
+ *
+ * The recommended way to position the cursor overlay is by using
+ * the SVGA_FIFO_CURSOR_* registers, supported by the
+ * SVGA_FIFO_CAP_CURSOR_BYPASS_3 capability.
+ *
+ * Availability:
+ * SVGA_CAP_CURSOR
+ */
+
+typedef
+struct {
+ uint32 id; // Reserved, must be zero.
+ uint32 hotspotX;
+ uint32 hotspotY;
+ uint32 width;
+ uint32 height;
+ uint32 andMaskDepth; // Value must be 1 or equal to BITS_PER_PIXEL
+ uint32 xorMaskDepth; // Value must be 1 or equal to BITS_PER_PIXEL
+ /*
+ * Followed by scanline data for AND mask, then XOR mask.
+ * Each scanline is padded to a 32-bit boundary.
+ */
+} SVGAFifoCmdDefineCursor;
+
+
+/*
+ * SVGA_CMD_DEFINE_ALPHA_CURSOR --
+ *
+ * Provide a new cursor image, in 32-bit BGRA format.
+ *
+ * The recommended way to position the cursor overlay is by using
+ * the SVGA_FIFO_CURSOR_* registers, supported by the
+ * SVGA_FIFO_CAP_CURSOR_BYPASS_3 capability.
+ *
+ * Availability:
+ * SVGA_CAP_ALPHA_CURSOR
+ */
+
+typedef
+struct {
+ uint32 id; // Reserved, must be zero.
+ uint32 hotspotX;
+ uint32 hotspotY;
+ uint32 width;
+ uint32 height;
+ /* Followed by scanline data */
+} SVGAFifoCmdDefineAlphaCursor;
+
+
+/*
+ * SVGA_CMD_UPDATE_VERBOSE --
+ *
+ * Just like SVGA_CMD_UPDATE, but also provide a per-rectangle
+ * 'reason' value, an opaque cookie which is used by internal
+ * debugging tools. Third party drivers should not use this
+ * command.
+ *
+ * Availability:
+ * SVGA_CAP_EXTENDED_FIFO
+ */
+
+typedef
+struct {
+ uint32 x;
+ uint32 y;
+ uint32 width;
+ uint32 height;
+ uint32 reason;
+} SVGAFifoCmdUpdateVerbose;
+
+
+/*
+ * SVGA_CMD_FRONT_ROP_FILL --
+ *
+ * This is a hint which tells the SVGA device that the driver has
+ * just filled a rectangular region of the GFB with a solid
+ * color. Instead of reading these pixels from the GFB, the device
+ * can assume that they all equal 'color'. This is primarily used
+ * for remote desktop protocols.
+ *
+ * Availability:
+ * SVGA_FIFO_CAP_ACCELFRONT
+ */
+
+#define SVGA_ROP_COPY 0x03
+
+typedef
+struct {
+ uint32 color; // In the same format as the GFB
+ uint32 x;
+ uint32 y;
+ uint32 width;
+ uint32 height;
+ uint32 rop; // Must be SVGA_ROP_COPY
+} SVGAFifoCmdFrontRopFill;
+
+
+/*
+ * SVGA_CMD_FENCE --
+ *
+ * Insert a synchronization fence. When the SVGA device reaches
+ * this command, it will copy the 'fence' value into the
+ * SVGA_FIFO_FENCE register. It will also compare the fence against
+ * SVGA_FIFO_FENCE_GOAL. If the fence matches the goal and the
+ * SVGA_IRQFLAG_FENCE_GOAL interrupt is enabled, the device will
+ * raise this interrupt.
+ *
+ * Availability:
+ * SVGA_FIFO_FENCE for this command,
+ * SVGA_CAP_IRQMASK for SVGA_FIFO_FENCE_GOAL.
+ */
+
+typedef
+struct {
+ uint32 fence;
+} SVGAFifoCmdFence;
+
+
+/*
+ * SVGA_CMD_ESCAPE --
+ *
+ * Send an extended or vendor-specific variable length command.
+ * This is used for video overlay, third party plugins, and
+ * internal debugging tools. See svga_escape.h
+ *
+ * Availability:
+ * SVGA_FIFO_CAP_ESCAPE
+ */
+
+typedef
+struct {
+ uint32 nsid;
+ uint32 size;
+ /* followed by 'size' bytes of data */
+} SVGAFifoCmdEscape;
+
+
+/*
+ * SVGA_CMD_DEFINE_SCREEN --
+ *
+ * Define or redefine an SVGAScreenObject. See the description of
+ * SVGAScreenObject above. The video driver is responsible for
+ * generating new screen IDs. They should be small positive
+ * integers. The virtual device will have an implementation
+ * specific upper limit on the number of screen IDs
+ * supported. Drivers are responsible for recycling IDs. The first
+ * valid ID is zero.
+ *
+ * - Interaction with other registers:
+ *
+ * For backwards compatibility, when the GFB mode registers (WIDTH,
+ * HEIGHT, PITCHLOCK, BITS_PER_PIXEL) are modified, the SVGA device
+ * deletes all screens other than screen #0, and redefines screen
+ * #0 according to the specified mode. Drivers that use
+ * SVGA_CMD_DEFINE_SCREEN should destroy or redefine screen #0.
+ *
+ * If you use screen objects, do not use the legacy multi-mon
+ * registers (SVGA_REG_NUM_GUEST_DISPLAYS, SVGA_REG_DISPLAY_*).
+ *
+ * Availability:
+ * SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+ SVGAScreenObject screen; // Variable-length according to version
+} SVGAFifoCmdDefineScreen;
+
+
+/*
+ * SVGA_CMD_DESTROY_SCREEN --
+ *
+ * Destroy an SVGAScreenObject. Its ID is immediately available for
+ * re-use.
+ *
+ * Availability:
+ * SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+ uint32 screenId;
+} SVGAFifoCmdDestroyScreen;
+
+
+/*
+ * SVGA_CMD_DEFINE_GMRFB --
+ *
+ * This command sets a piece of SVGA device state called the
+ * Guest Memory Region Framebuffer, or GMRFB. The GMRFB is a
+ * piece of light-weight state which identifies the location and
+ * format of an image in guest memory or in BAR1. The GMRFB has
+ * an arbitrary size, and it doesn't need to match the geometry
+ * of the GFB or any screen object.
+ *
+ * The GMRFB can be redefined as often as you like. You could
+ * always use the same GMRFB, you could redefine it before
+ * rendering from a different guest screen, or you could even
+ * redefine it before every blit.
+ *
+ * There are multiple ways to use this command. The simplest way is
+ * to use it to move the framebuffer either to elsewhere in the GFB
+ * (BAR1) memory region, or to a user-defined GMR. This lets a
+ * driver use a framebuffer allocated entirely out of normal system
+ * memory, which we encourage.
+ *
+ * Another way to use this command is to set up a ring buffer of
+ * updates in GFB memory. If a driver wants to ensure that no
+ * frames are skipped by the SVGA device, it is important that the
+ * driver not modify the source data for a blit until the device is
+ * done processing the command. One efficient way to accomplish
+ * this is to use a ring of small DMA buffers. Each buffer is used
+ * for one blit, then we move on to the next buffer in the
+ * ring. The FENCE mechanism is used to protect each buffer from
+ * re-use until the device is finished with that buffer's
+ * corresponding blit.
+ *
+ * This command does not affect the meaning of SVGA_CMD_UPDATE.
+ * UPDATEs always occur from the legacy GFB memory area. This
+ * command has no support for pseudocolor GMRFBs. Currently only
+ * true-color 15, 16, and 24-bit depths are supported. Future
+ * devices may expose capabilities for additional framebuffer
+ * formats.
+ *
+ * The default GMRFB value is undefined. Drivers must always send
+ * this command at least once before performing any blit from the
+ * GMRFB.
+ *
+ * Availability:
+ * SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+ SVGAGuestPtr ptr;
+ uint32 bytesPerLine;
+ SVGAGMRImageFormat format;
+} SVGAFifoCmdDefineGMRFB;
+
+
+/*
+ * SVGA_CMD_BLIT_GMRFB_TO_SCREEN --
+ *
+ * This is a guest-to-host blit. It performs a DMA operation to
+ * copy a rectangular region of pixels from the current GMRFB to
+ * one or more Screen Objects.
+ *
+ * The destination coordinate may be specified relative to a
+ * screen's origin (if a screen ID is specified) or relative to the
+ * virtual coordinate system's origin (if the screen ID is
+ * SVGA_ID_INVALID). The actual destination may span zero or more
+ * screens, in the case of a virtual destination rect or a rect
+ * which extends off the edge of the specified screen.
+ *
+ * This command writes to the screen's "base layer": the underlying
+ * framebuffer which exists below any cursor or video overlays. No
+ * action is necessary to explicitly hide or update any overlays
+ * which exist on top of the updated region.
+ *
+ * The SVGA device is guaranteed to finish reading from the GMRFB
+ * by the time any subsequent FENCE commands are reached.
+ *
+ * This command consumes an annotation. See the
+ * SVGA_CMD_ANNOTATION_* commands for details.
+ *
+ * Availability:
+ * SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+ SVGASignedPoint srcOrigin;
+ SVGASignedRect destRect;
+ uint32 destScreenId;
+} SVGAFifoCmdBlitGMRFBToScreen;
+
+
+/*
+ * SVGA_CMD_BLIT_SCREEN_TO_GMRFB --
+ *
+ * This is a host-to-guest blit. It performs a DMA operation to
+ * copy a rectangular region of pixels from a single Screen Object
+ * back to the current GMRFB.
+ *
+ * Usage note: This command should be used rarely. It will
+ * typically be inefficient, but it is necessary for some types of
+ * synchronization between 3D (GPU) and 2D (CPU) rendering into
+ * overlapping areas of a screen.
+ *
+ * The source coordinate is specified relative to a screen's
+ * origin. The provided screen ID must be valid. If any parameters
+ * are invalid, the resulting pixel values are undefined.
+ *
+ * This command reads the screen's "base layer". Overlays like
+ * video and cursor are not included, but any data which was sent
+ * using a blit-to-screen primitive will be available, no matter
+ * whether the data's original source was the GMRFB or the 3D
+ * acceleration hardware.
+ *
+ * Note that our guest-to-host blits and host-to-guest blits aren't
+ * symmetric in their current implementation. While the parameters
+ * are identical, host-to-guest blits are a lot less featureful.
+ * They do not support clipping: If the source parameters don't
+ * fully fit within a screen, the blit fails. They must originate
+ * from exactly one screen. Virtual coordinates are not directly
+ * supported.
+ *
+ * Host-to-guest blits do support the same set of GMRFB formats
+ * offered by guest-to-host blits.
+ *
+ * The SVGA device is guaranteed to finish writing to the GMRFB by
+ * the time any subsequent FENCE commands are reached.
+ *
+ * Availability:
+ * SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+ SVGASignedPoint destOrigin;
+ SVGASignedRect srcRect;
+ uint32 srcScreenId;
+} SVGAFifoCmdBlitScreenToGMRFB;
+
+
+/*
+ * SVGA_CMD_ANNOTATION_FILL --
+ *
+ * This is a blit annotation. This command stores a small piece of
+ * device state which is consumed by the next blit-to-screen
+ * command. The state is only cleared by commands which are
+ * specifically documented as consuming an annotation. Other
+ * commands (such as ESCAPEs for debugging) may intervene between
+ * the annotation and its associated blit.
+ *
+ * This annotation is a promise about the contents of the next
+ * blit: The video driver is guaranteeing that all pixels in that
+ * blit will have the same value, specified here as a color in
+ * SVGAColorBGRX format.
+ *
+ * The SVGA device can still render the blit correctly even if it
+ * ignores this annotation, but the annotation may allow it to
+ * perform the blit more efficiently, for example by ignoring the
+ * source data and performing a fill in hardware.
+ *
+ * This annotation is most important for performance when the
+ * user's display is being remoted over a network connection.
+ *
+ * Availability:
+ * SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+ SVGAColorBGRX color;
+} SVGAFifoCmdAnnotationFill;
+
+
+/*
+ * SVGA_CMD_ANNOTATION_COPY --
+ *
+ * This is a blit annotation. See SVGA_CMD_ANNOTATION_FILL for more
+ * information about annotations.
+ *
+ * This annotation is a promise about the contents of the next
+ * blit: The video driver is guaranteeing that all pixels in that
+ * blit will have the same value as those which already exist at an
+ * identically-sized region on the same or a different screen.
+ *
+ * Note that the source pixels for the COPY in this annotation are
+ * sampled before applying the anqnotation's associated blit. They
+ * are allowed to overlap with the blit's destination pixels.
+ *
+ * The copy source rectangle is specified the same way as the blit
+ * destination: it can be a rectangle which spans zero or more
+ * screens, specified relative to either a screen or to the virtual
+ * coordinate system's origin. If the source rectangle includes
+ * pixels which are not from exactly one screen, the results are
+ * undefined.
+ *
+ * Availability:
+ * SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+ SVGASignedPoint srcOrigin;
+ uint32 srcScreenId;
+} SVGAFifoCmdAnnotationCopy;
+
+#endif
diff --git a/src/gallium/drivers/svga/include/svga_types.h b/src/gallium/drivers/svga/include/svga_types.h
new file mode 100644
index 00000000000..7fd9bab03a5
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga_types.h
@@ -0,0 +1,46 @@
+/**********************************************************
+ * Copyright 1998-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef _SVGA_TYPES_H_
+#define _SVGA_TYPES_H_
+
+#include "pipe/p_compiler.h"
+
+typedef int64_t int64;
+typedef uint64_t uint64;
+
+typedef int32_t int32;
+typedef uint32_t uint32;
+
+typedef int16_t int16;
+typedef uint16_t uint16;
+
+typedef int8_t int8;
+typedef uint8_t uint8;
+
+typedef uint8_t Bool;
+
+#endif /* _SVGA_TYPES_H_ */
+
diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c
new file mode 100644
index 00000000000..a0da7d7e5d5
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_cmd.c
@@ -0,0 +1,1427 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * svga_cmd.c --
+ *
+ * Command construction utility for the SVGA3D protocol used by
+ * the VMware SVGA device, based on the svgautil library.
+ */
+
+#include "svga_winsys.h"
+#include "svga_screen_buffer.h"
+#include "svga_screen_texture.h"
+#include "svga_cmd.h"
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * surface_to_surfaceid --
+ *
+ * Utility function for surface ids.
+ * Can handle null surface. Does a surface_reallocation so you need
+ * to have allocated the fifo space before converting.
+ *
+ * Results:
+ * id is filld out.
+ *
+ * Side effects:
+ * One surface relocation is preformed for texture handle.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static INLINE
+void surface_to_surfaceid(struct svga_winsys_context *swc, // IN
+ struct pipe_surface *surface, // IN
+ SVGA3dSurfaceImageId *id, // OUT
+ unsigned flags) // IN
+{
+ if(surface) {
+ struct svga_surface *s = svga_surface(surface);
+ swc->surface_relocation(swc, &id->sid, s->handle, flags);
+ id->face = s->real_face; /* faces have the same order */
+ id->mipmap = s->real_level;
+ }
+ else {
+ id->sid = SVGA3D_INVALID_ID;
+ id->face = 0;
+ id->mipmap = 0;
+ }
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_FIFOReserve --
+ *
+ * Reserve space for an SVGA3D FIFO command.
+ *
+ * The 2D SVGA commands have been around for a while, so they
+ * have a rather asymmetric structure. The SVGA3D protocol is
+ * more uniform: each command begins with a header containing the
+ * command number and the full size.
+ *
+ * This is a convenience wrapper around SVGA_FIFOReserve. We
+ * reserve space for the whole command, and write the header.
+ *
+ * This function must be paired with SVGA_FIFOCommitAll().
+ *
+ * Results:
+ * Returns a pointer to the space reserved for command-specific
+ * data. It must be 'cmdSize' bytes long.
+ *
+ * Side effects:
+ * Begins a FIFO reservation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void *
+SVGA3D_FIFOReserve(struct svga_winsys_context *swc,
+ uint32 cmd, // IN
+ uint32 cmdSize, // IN
+ uint32 nr_relocs) // IN
+{
+ SVGA3dCmdHeader *header;
+
+ header = swc->reserve(swc, sizeof *header + cmdSize, nr_relocs);
+ if(!header)
+ return NULL;
+
+ header->id = cmd;
+ header->size = cmdSize;
+
+ return &header[1];
+}
+
+
+void
+SVGA_FIFOCommitAll(struct svga_winsys_context *swc)
+{
+ swc->commit(swc);
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_DefineContext --
+ *
+ * Create a new context, to be referred to with the provided ID.
+ *
+ * Context objects encapsulate all render state, and shader
+ * objects are per-context.
+ *
+ * Surfaces are not per-context. The same surface can be shared
+ * between multiple contexts, and surface operations can occur
+ * without a context.
+ *
+ * If the provided context ID already existed, it is redefined.
+ *
+ * Context IDs are arbitrary small non-negative integers,
+ * global to the entire SVGA device.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_DefineContext(struct svga_winsys_context *swc) // IN
+{
+ SVGA3dCmdDefineContext *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_CONTEXT_DEFINE, sizeof *cmd, 0);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->cid = swc->cid;
+
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_DestroyContext --
+ *
+ * Delete a context created with SVGA3D_DefineContext.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_DestroyContext(struct svga_winsys_context *swc) // IN
+{
+ SVGA3dCmdDestroyContext *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_CONTEXT_DESTROY, sizeof *cmd, 0);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->cid = swc->cid;
+
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginDefineSurface --
+ *
+ * Begin a SURFACE_DEFINE command. This reserves space for it in
+ * the FIFO, and returns pointers to the command's faces and
+ * mipsizes arrays.
+ *
+ * This function must be paired with SVGA_FIFOCommitAll().
+ * The faces and mipSizes arrays are initialized to zero.
+ *
+ * This creates a "surface" object in the SVGA3D device,
+ * with the provided surface ID (sid). Surfaces are generic
+ * containers for host VRAM objects like textures, vertex
+ * buffers, and depth/stencil buffers.
+ *
+ * Surfaces are hierarchial:
+ *
+ * - Surface may have multiple faces (for cube maps)
+ *
+ * - Each face has a list of mipmap levels
+ *
+ * - Each mipmap image may have multiple volume
+ * slices, if the image is three dimensional.
+ *
+ * - Each slice is a 2D array of 'blocks'
+ *
+ * - Each block may be one or more pixels.
+ * (Usually 1, more for DXT or YUV formats.)
+ *
+ * Surfaces are generic host VRAM objects. The SVGA3D device
+ * may optimize surfaces according to the format they were
+ * created with, but this format does not limit the ways in
+ * which the surface may be used. For example, a depth surface
+ * can be used as a texture, or a floating point image may
+ * be used as a vertex buffer. Some surface usages may be
+ * lower performance, due to software emulation, but any
+ * usage should work with any surface.
+ *
+ * If 'sid' is already defined, the old surface is deleted
+ * and this new surface replaces it.
+ *
+ * Surface IDs are arbitrary small non-negative integers,
+ * global to the entire SVGA device.
+ *
+ * Results:
+ * Returns pointers to arrays allocated in the FIFO for 'faces'
+ * and 'mipSizes'.
+ *
+ * Side effects:
+ * Begins a FIFO reservation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginDefineSurface(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *sid, // IN
+ SVGA3dSurfaceFlags flags, // IN
+ SVGA3dSurfaceFormat format, // IN
+ SVGA3dSurfaceFace **faces, // OUT
+ SVGA3dSize **mipSizes, // OUT
+ uint32 numMipSizes) // IN
+{
+ SVGA3dCmdDefineSurface *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_SURFACE_DEFINE, sizeof *cmd +
+ sizeof **mipSizes * numMipSizes, 1);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ swc->surface_relocation(swc, &cmd->sid, sid, PIPE_BUFFER_USAGE_GPU_WRITE);
+ cmd->surfaceFlags = flags;
+ cmd->format = format;
+
+ *faces = &cmd->face[0];
+ *mipSizes = (SVGA3dSize*) &cmd[1];
+
+ memset(*faces, 0, sizeof **faces * SVGA3D_MAX_SURFACE_FACES);
+ memset(*mipSizes, 0, sizeof **mipSizes * numMipSizes);
+
+ return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_DefineSurface2D --
+ *
+ * This is a simplified version of SVGA3D_BeginDefineSurface(),
+ * which does not support cube maps, mipmaps, or volume textures.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_DefineSurface2D(struct svga_winsys_context *swc, // IN
+ struct svga_winsys_surface *sid, // IN
+ uint32 width, // IN
+ uint32 height, // IN
+ SVGA3dSurfaceFormat format) // IN
+{
+ SVGA3dSize *mipSizes;
+ SVGA3dSurfaceFace *faces;
+ enum pipe_error ret;
+
+ ret = SVGA3D_BeginDefineSurface(swc,
+ sid, 0, format, &faces, &mipSizes, 1);
+ if(ret != PIPE_OK)
+ return ret;
+
+ faces[0].numMipLevels = 1;
+
+ mipSizes[0].width = width;
+ mipSizes[0].height = height;
+ mipSizes[0].depth = 1;
+
+ swc->commit(swc);;
+
+ return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_DestroySurface --
+ *
+ * Release the host VRAM encapsulated by a particular surface ID.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_DestroySurface(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *sid) // IN
+{
+ SVGA3dCmdDestroySurface *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_SURFACE_DESTROY, sizeof *cmd, 1);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ swc->surface_relocation(swc, &cmd->sid, sid, PIPE_BUFFER_USAGE_GPU_READ);
+ swc->commit(swc);;
+
+ return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginSurfaceDMA--
+ *
+ * Begin a SURFACE_DMA command. This reserves space for it in
+ * the FIFO, and returns a pointer to the command's box array.
+ * This function must be paired with SVGA_FIFOCommitAll().
+ *
+ * When the SVGA3D device asynchronously processes this FIFO
+ * command, a DMA operation is performed between host VRAM and
+ * a generic SVGAGuestPtr. The guest pointer may refer to guest
+ * VRAM (provided by the SVGA PCI device) or to guest system
+ * memory that has been set up as a Guest Memory Region (GMR)
+ * by the SVGA device.
+ *
+ * The guest's DMA buffer must remain valid (not freed, paged out,
+ * or overwritten) until the host has finished processing this
+ * command. The guest can determine that the host has finished
+ * by using the SVGA device's FIFO Fence mechanism.
+ *
+ * The guest's image buffer can be an arbitrary size and shape.
+ * Guest image data is interpreted according to the SVGA3D surface
+ * format specified when the surface was defined.
+ *
+ * The caller may optionally define the guest image's pitch.
+ * guestImage->pitch can either be zero (assume image is tightly
+ * packed) or it must be the number of bytes between vertically
+ * adjacent image blocks.
+ *
+ * The provided copybox list specifies which regions of the source
+ * image are to be copied, and where they appear on the destination.
+ *
+ * NOTE: srcx/srcy are always on the guest image and x/y are
+ * always on the host image, regardless of the actual transfer
+ * direction!
+ *
+ * For efficiency, the SVGA3D device is free to copy more data
+ * than specified. For example, it may round copy boxes outwards
+ * such that they lie on particular alignment boundaries.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SurfaceDMA(struct svga_winsys_context *swc,
+ struct svga_transfer *st, // IN
+ SVGA3dTransferType transfer, // IN
+ const SVGA3dCopyBox *boxes, // IN
+ uint32 numBoxes) // IN
+{
+ struct svga_texture *texture = svga_texture(st->base.texture);
+ SVGA3dCmdSurfaceDMA *cmd;
+ SVGA3dCmdSurfaceDMASuffix *pSuffix;
+ uint32 boxesSize = sizeof *boxes * numBoxes;
+ unsigned region_flags;
+ unsigned surface_flags;
+
+ if(transfer == SVGA3D_WRITE_HOST_VRAM) {
+ region_flags = PIPE_BUFFER_USAGE_GPU_READ;
+ surface_flags = PIPE_BUFFER_USAGE_GPU_WRITE;
+ }
+ else if(transfer == SVGA3D_READ_HOST_VRAM) {
+ region_flags = PIPE_BUFFER_USAGE_GPU_WRITE;
+ surface_flags = PIPE_BUFFER_USAGE_GPU_READ;
+ }
+ else {
+ assert(0);
+ return PIPE_ERROR_BAD_INPUT;
+ }
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_SURFACE_DMA,
+ sizeof *cmd + boxesSize + sizeof *pSuffix,
+ 2);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ swc->region_relocation(swc, &cmd->guest.ptr, st->hwbuf, 0, region_flags);
+ cmd->guest.pitch = st->base.stride;
+
+ swc->surface_relocation(swc, &cmd->host.sid, texture->handle, surface_flags);
+ cmd->host.face = st->base.face; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */
+ cmd->host.mipmap = st->base.level;
+
+ cmd->transfer = transfer;
+
+ memcpy(&cmd[1], boxes, boxesSize);
+
+ pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + boxesSize);
+ pSuffix->suffixSize = sizeof *pSuffix;
+ pSuffix->maximumOffset = st->hw_nblocksy*st->base.stride;
+ memset(&pSuffix->flags, 0, sizeof pSuffix->flags);
+
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_BufferDMA(struct svga_winsys_context *swc,
+ struct svga_winsys_buffer *guest,
+ struct svga_winsys_surface *host,
+ SVGA3dTransferType transfer, // IN
+ uint32 size, // IN
+ uint32 offset, // IN
+ SVGA3dSurfaceDMAFlags flags) // IN
+{
+ SVGA3dCmdSurfaceDMA *cmd;
+ SVGA3dCopyBox *box;
+ SVGA3dCmdSurfaceDMASuffix *pSuffix;
+ unsigned region_flags;
+ unsigned surface_flags;
+
+ if(transfer == SVGA3D_WRITE_HOST_VRAM) {
+ region_flags = PIPE_BUFFER_USAGE_GPU_READ;
+ surface_flags = PIPE_BUFFER_USAGE_GPU_WRITE;
+ }
+ else if(transfer == SVGA3D_READ_HOST_VRAM) {
+ region_flags = PIPE_BUFFER_USAGE_GPU_WRITE;
+ surface_flags = PIPE_BUFFER_USAGE_GPU_READ;
+ }
+ else {
+ assert(0);
+ return PIPE_ERROR_BAD_INPUT;
+ }
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_SURFACE_DMA,
+ sizeof *cmd + sizeof *box + sizeof *pSuffix,
+ 2);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ swc->region_relocation(swc, &cmd->guest.ptr, guest, 0, region_flags);
+ cmd->guest.pitch = 0;
+
+ swc->surface_relocation(swc, &cmd->host.sid, host, surface_flags);
+ cmd->host.face = 0;
+ cmd->host.mipmap = 0;
+
+ cmd->transfer = transfer;
+
+ box = (SVGA3dCopyBox *)&cmd[1];
+ box->x = offset;
+ box->y = 0;
+ box->z = 0;
+ box->w = size;
+ box->h = 1;
+ box->d = 1;
+ box->srcx = offset;
+ box->srcy = 0;
+ box->srcz = 0;
+
+ pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + sizeof *box);
+ pSuffix->suffixSize = sizeof *pSuffix;
+ pSuffix->maximumOffset = offset + size;
+ pSuffix->flags = flags;
+
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetRenderTarget --
+ *
+ * Bind a surface object to a particular render target attachment
+ * point on the current context. Render target attachment points
+ * exist for color buffers, a depth buffer, and a stencil buffer.
+ *
+ * The SVGA3D device is quite lenient about the types of surfaces
+ * that may be used as render targets. The color buffers must
+ * all be the same size, but the depth and stencil buffers do not
+ * have to be the same size as the color buffer. All attachments
+ * are optional.
+ *
+ * Some combinations of render target formats may require software
+ * emulation, depending on the capabilities of the host graphics
+ * API and graphics hardware.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SetRenderTarget(struct svga_winsys_context *swc,
+ SVGA3dRenderTargetType type, // IN
+ struct pipe_surface *surface) // IN
+{
+ SVGA3dCmdSetRenderTarget *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_SETRENDERTARGET, sizeof *cmd, 1);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+
+ cmd->cid = swc->cid;
+
+ cmd->type = type;
+
+ surface_to_surfaceid(swc, surface, &cmd->target, PIPE_BUFFER_USAGE_GPU_WRITE);
+
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+
+
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_DefineShader --
+ *
+ * Upload the bytecode for a new shader. The bytecode is "SVGA3D
+ * format", which is theoretically a binary-compatible superset
+ * of Microsoft's DirectX shader bytecode. In practice, the
+ * SVGA3D bytecode doesn't yet have any extensions to DirectX's
+ * bytecode format.
+ *
+ * The SVGA3D device supports shader models 1.1 through 2.0.
+ *
+ * The caller chooses a shader ID (small positive integer) by
+ * which this shader will be identified in future commands. This
+ * ID is in a namespace which is per-context and per-shader-type.
+ *
+ * 'bytecodeLen' is specified in bytes. It must be a multiple of 4.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_DefineShader(struct svga_winsys_context *swc,
+ uint32 shid, // IN
+ SVGA3dShaderType type, // IN
+ const uint32 *bytecode, // IN
+ uint32 bytecodeLen) // IN
+{
+ SVGA3dCmdDefineShader *cmd;
+
+ assert(bytecodeLen % 4 == 0);
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_SHADER_DEFINE, sizeof *cmd + bytecodeLen,
+ 0);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->cid = swc->cid;
+ cmd->shid = shid;
+ cmd->type = type;
+ memcpy(&cmd[1], bytecode, bytecodeLen);
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_DestroyShader --
+ *
+ * Delete a shader that was created by SVGA3D_DefineShader. If
+ * the shader was the current vertex or pixel shader for its
+ * context, rendering results are undefined until a new shader is
+ * bound.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_DestroyShader(struct svga_winsys_context *swc,
+ uint32 shid, // IN
+ SVGA3dShaderType type) // IN
+{
+ SVGA3dCmdDestroyShader *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_SHADER_DESTROY, sizeof *cmd,
+ 0);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->cid = swc->cid;
+ cmd->shid = shid;
+ cmd->type = type;
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetShaderConst --
+ *
+ * Set the value of a shader constant.
+ *
+ * Shader constants are analogous to uniform variables in GLSL,
+ * except that they belong to the render context rather than to
+ * an individual shader.
+ *
+ * Constants may have one of three types: A 4-vector of floats,
+ * a 4-vector of integers, or a single boolean flag.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SetShaderConst(struct svga_winsys_context *swc,
+ uint32 reg, // IN
+ SVGA3dShaderType type, // IN
+ SVGA3dShaderConstType ctype, // IN
+ const void *value) // IN
+{
+ SVGA3dCmdSetShaderConst *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_SET_SHADER_CONST, sizeof *cmd,
+ 0);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->cid = swc->cid;
+ cmd->reg = reg;
+ cmd->type = type;
+ cmd->ctype = ctype;
+
+ switch (ctype) {
+
+ case SVGA3D_CONST_TYPE_FLOAT:
+ case SVGA3D_CONST_TYPE_INT:
+ memcpy(&cmd->values, value, sizeof cmd->values);
+ break;
+
+ case SVGA3D_CONST_TYPE_BOOL:
+ memset(&cmd->values, 0, sizeof cmd->values);
+ cmd->values[0] = *(uint32*)value;
+ break;
+
+ default:
+ assert(0);
+ break;
+
+ }
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetShader --
+ *
+ * Switch active shaders. This binds a new vertex or pixel shader
+ * to the specified context.
+ *
+ * A shader ID of SVGA3D_INVALID_ID unbinds any shader, switching
+ * back to the fixed function vertex or pixel pipeline.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SetShader(struct svga_winsys_context *swc,
+ SVGA3dShaderType type, // IN
+ uint32 shid) // IN
+{
+ SVGA3dCmdSetShader *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_SET_SHADER, sizeof *cmd,
+ 0);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->cid = swc->cid;
+ cmd->type = type;
+ cmd->shid = shid;
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginClear --
+ *
+ * Begin a CLEAR command. This reserves space for it in the FIFO,
+ * and returns a pointer to the command's rectangle array. This
+ * function must be paired with SVGA_FIFOCommitAll().
+ *
+ * Clear is a rendering operation which fills a list of
+ * rectangles with constant values on all render target types
+ * indicated by 'flags'.
+ *
+ * Clear is not affected by clipping, depth test, or other
+ * render state which affects the fragment pipeline.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * May write to attached render target surfaces.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginClear(struct svga_winsys_context *swc,
+ SVGA3dClearFlag flags, // IN
+ uint32 color, // IN
+ float depth, // IN
+ uint32 stencil, // IN
+ SVGA3dRect **rects, // OUT
+ uint32 numRects) // IN
+{
+ SVGA3dCmdClear *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_CLEAR,
+ sizeof *cmd + sizeof **rects * numRects,
+ 0);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->cid = swc->cid;
+ cmd->clearFlag = flags;
+ cmd->color = color;
+ cmd->depth = depth;
+ cmd->stencil = stencil;
+ *rects = (SVGA3dRect*) &cmd[1];
+
+ return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_ClearRect --
+ *
+ * This is a simplified version of SVGA3D_BeginClear().
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_ClearRect(struct svga_winsys_context *swc,
+ SVGA3dClearFlag flags, // IN
+ uint32 color, // IN
+ float depth, // IN
+ uint32 stencil, // IN
+ uint32 x, // IN
+ uint32 y, // IN
+ uint32 w, // IN
+ uint32 h) // IN
+{
+ SVGA3dRect *rect;
+ enum pipe_error ret;
+
+ ret = SVGA3D_BeginClear(swc, flags, color, depth, stencil, &rect, 1);
+ if(ret != PIPE_OK)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ memset(rect, 0, sizeof *rect);
+ rect->x = x;
+ rect->y = y;
+ rect->w = w;
+ rect->h = h;
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginDrawPrimitives --
+ *
+ * Begin a DRAW_PRIMITIVES command. This reserves space for it in
+ * the FIFO, and returns a pointer to the command's arrays.
+ * This function must be paired with SVGA_FIFOCommitAll().
+ *
+ * Drawing commands consist of two variable-length arrays:
+ * SVGA3dVertexDecl elements declare a set of vertex buffers to
+ * use while rendering, and SVGA3dPrimitiveRange elements specify
+ * groups of primitives each with an optional index buffer.
+ *
+ * The decls and ranges arrays are initialized to zero.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * May write to attached render target surfaces.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginDrawPrimitives(struct svga_winsys_context *swc,
+ SVGA3dVertexDecl **decls, // OUT
+ uint32 numVertexDecls, // IN
+ SVGA3dPrimitiveRange **ranges, // OUT
+ uint32 numRanges) // IN
+{
+ SVGA3dCmdDrawPrimitives *cmd;
+ SVGA3dVertexDecl *declArray;
+ SVGA3dPrimitiveRange *rangeArray;
+ uint32 declSize = sizeof **decls * numVertexDecls;
+ uint32 rangeSize = sizeof **ranges * numRanges;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_DRAW_PRIMITIVES,
+ sizeof *cmd + declSize + rangeSize,
+ numVertexDecls + numRanges);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->cid = swc->cid;
+ cmd->numVertexDecls = numVertexDecls;
+ cmd->numRanges = numRanges;
+
+ declArray = (SVGA3dVertexDecl*) &cmd[1];
+ rangeArray = (SVGA3dPrimitiveRange*) &declArray[numVertexDecls];
+
+ memset(declArray, 0, declSize);
+ memset(rangeArray, 0, rangeSize);
+
+ *decls = declArray;
+ *ranges = rangeArray;
+
+ return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginSurfaceCopy --
+ *
+ * Begin a SURFACE_COPY command. This reserves space for it in
+ * the FIFO, and returns a pointer to the command's arrays. This
+ * function must be paired with SVGA_FIFOCommitAll().
+ *
+ * The box array is initialized with zeroes.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Asynchronously copies a list of boxes from surface to surface.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginSurfaceCopy(struct svga_winsys_context *swc,
+ struct pipe_surface *src, // IN
+ struct pipe_surface *dest, // IN
+ SVGA3dCopyBox **boxes, // OUT
+ uint32 numBoxes) // IN
+{
+ SVGA3dCmdSurfaceCopy *cmd;
+ uint32 boxesSize = sizeof **boxes * numBoxes;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_SURFACE_COPY, sizeof *cmd + boxesSize,
+ 2);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ surface_to_surfaceid(swc, src, &cmd->src, PIPE_BUFFER_USAGE_GPU_READ);
+ surface_to_surfaceid(swc, dest, &cmd->dest, PIPE_BUFFER_USAGE_GPU_WRITE);
+ *boxes = (SVGA3dCopyBox*) &cmd[1];
+
+ memset(*boxes, 0, boxesSize);
+
+ return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SurfaceStretchBlt --
+ *
+ * Issue a SURFACE_STRETCHBLT command: an asynchronous
+ * surface-to-surface blit, with scaling.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Asynchronously copies one box from surface to surface.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SurfaceStretchBlt(struct svga_winsys_context *swc,
+ struct pipe_surface *src, // IN
+ struct pipe_surface *dest, // IN
+ SVGA3dBox *boxSrc, // IN
+ SVGA3dBox *boxDest, // IN
+ SVGA3dStretchBltMode mode) // IN
+{
+ SVGA3dCmdSurfaceStretchBlt *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_SURFACE_STRETCHBLT, sizeof *cmd,
+ 2);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ surface_to_surfaceid(swc, src, &cmd->src, PIPE_BUFFER_USAGE_GPU_READ);
+ surface_to_surfaceid(swc, dest, &cmd->dest, PIPE_BUFFER_USAGE_GPU_WRITE);
+ cmd->boxSrc = *boxSrc;
+ cmd->boxDest = *boxDest;
+ cmd->mode = mode;
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetViewport --
+ *
+ * Set the current context's viewport rectangle. The viewport
+ * is clipped to the dimensions of the current render target,
+ * then all rendering is clipped to the viewport.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SetViewport(struct svga_winsys_context *swc,
+ SVGA3dRect *rect) // IN
+{
+ SVGA3dCmdSetViewport *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_SETVIEWPORT, sizeof *cmd,
+ 0);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->cid = swc->cid;
+ cmd->rect = *rect;
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetScissorRect --
+ *
+ * Set the current context's scissor rectangle. If scissor
+ * is enabled then all rendering is clipped to the scissor.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SetScissorRect(struct svga_winsys_context *swc,
+ SVGA3dRect *rect) // IN
+{
+ SVGA3dCmdSetScissorRect *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_SETSCISSORRECT, sizeof *cmd,
+ 0);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->cid = swc->cid;
+ cmd->rect = *rect;
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetClipPlane --
+ *
+ * Set one of the current context's clip planes. If the clip
+ * plane is enabled then all 3d rendering is clipped to against
+ * the plane.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error SVGA3D_SetClipPlane(struct svga_winsys_context *swc,
+ uint32 index, const float *plane)
+{
+ SVGA3dCmdSetClipPlane *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_SETCLIPPLANE, sizeof *cmd,
+ 0);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->cid = swc->cid;
+ cmd->index = index;
+ cmd->plane[0] = plane[0];
+ cmd->plane[1] = plane[1];
+ cmd->plane[2] = plane[2];
+ cmd->plane[3] = plane[3];
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetZRange --
+ *
+ * Set the range of the depth buffer to use. 'min' and 'max'
+ * are values between 0.0 and 1.0.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SetZRange(struct svga_winsys_context *swc,
+ float zMin, // IN
+ float zMax) // IN
+{
+ SVGA3dCmdSetZRange *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_SETZRANGE, sizeof *cmd,
+ 0);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->cid = swc->cid;
+ cmd->zRange.min = zMin;
+ cmd->zRange.max = zMax;
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginSetTextureState --
+ *
+ * Begin a SETTEXTURESTATE command. This reserves space for it in
+ * the FIFO, and returns a pointer to the command's texture state
+ * array. This function must be paired with SVGA_FIFOCommitAll().
+ *
+ * This command sets rendering state which is per-texture-unit.
+ *
+ * XXX: Individual texture states need documentation. However,
+ * they are very similar to the texture states defined by
+ * Direct3D. The D3D documentation is a good starting point
+ * for understanding SVGA3D texture states.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginSetTextureState(struct svga_winsys_context *swc,
+ SVGA3dTextureState **states, // OUT
+ uint32 numStates) // IN
+{
+ SVGA3dCmdSetTextureState *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_SETTEXTURESTATE,
+ sizeof *cmd + sizeof **states * numStates,
+ numStates);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->cid = swc->cid;
+ *states = (SVGA3dTextureState*) &cmd[1];
+
+ return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginSetRenderState --
+ *
+ * Begin a SETRENDERSTATE command. This reserves space for it in
+ * the FIFO, and returns a pointer to the command's texture state
+ * array. This function must be paired with SVGA_FIFOCommitAll().
+ *
+ * This command sets rendering state which is global to the context.
+ *
+ * XXX: Individual render states need documentation. However,
+ * they are very similar to the render states defined by
+ * Direct3D. The D3D documentation is a good starting point
+ * for understanding SVGA3D render states.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginSetRenderState(struct svga_winsys_context *swc,
+ SVGA3dRenderState **states, // OUT
+ uint32 numStates) // IN
+{
+ SVGA3dCmdSetRenderState *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_SETRENDERSTATE,
+ sizeof *cmd + sizeof **states * numStates,
+ 0);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->cid = swc->cid;
+ *states = (SVGA3dRenderState*) &cmd[1];
+
+ return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginQuery--
+ *
+ * Issues a SVGA_3D_CMD_BEGIN_QUERY command.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Commits space in the FIFO memory.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginQuery(struct svga_winsys_context *swc,
+ SVGA3dQueryType type) // IN
+{
+ SVGA3dCmdBeginQuery *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_BEGIN_QUERY,
+ sizeof *cmd,
+ 0);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->cid = swc->cid;
+ cmd->type = type;
+
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_EndQuery--
+ *
+ * Issues a SVGA_3D_CMD_END_QUERY command.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Commits space in the FIFO memory.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_EndQuery(struct svga_winsys_context *swc,
+ SVGA3dQueryType type, // IN
+ struct svga_winsys_buffer *buffer) // IN/OUT
+{
+ SVGA3dCmdEndQuery *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_END_QUERY,
+ sizeof *cmd,
+ 1);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->cid = swc->cid;
+ cmd->type = type;
+
+ swc->region_relocation(swc, &cmd->guestResult, buffer, 0,
+ PIPE_BUFFER_USAGE_GPU_WRITE);
+
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_WaitForQuery--
+ *
+ * Issues a SVGA_3D_CMD_WAIT_FOR_QUERY command. This reserves space
+ * for it in the FIFO. This doesn't actually wait for the query to
+ * finish but instead tells the host to start a wait at the driver
+ * level. The caller can wait on the status variable in the
+ * guestPtr memory or send an insert fence instruction after this
+ * command and wait on the fence.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Commits space in the FIFO memory.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_WaitForQuery(struct svga_winsys_context *swc,
+ SVGA3dQueryType type, // IN
+ struct svga_winsys_buffer *buffer) // IN/OUT
+{
+ SVGA3dCmdWaitForQuery *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_WAIT_FOR_QUERY,
+ sizeof *cmd,
+ 1);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->cid = swc->cid;
+ cmd->type = type;
+
+ swc->region_relocation(swc, &cmd->guestResult, buffer, 0,
+ PIPE_BUFFER_USAGE_GPU_WRITE);
+
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
diff --git a/src/gallium/drivers/svga/svga_cmd.h b/src/gallium/drivers/svga/svga_cmd.h
new file mode 100644
index 00000000000..80410547690
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_cmd.h
@@ -0,0 +1,235 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga_cmd.h --
+ *
+ * Command construction utility for the SVGA3D protocol used by
+ * the VMware SVGA device, based on the svgautil library.
+ */
+
+#ifndef __SVGA3D_H__
+#define __SVGA3D_H__
+
+
+#include "svga_types.h"
+#include "svga_reg.h"
+#include "svga3d_reg.h"
+
+#include "pipe/p_defines.h"
+
+
+struct pipe_buffer;
+struct pipe_surface;
+struct svga_transfer;
+struct svga_winsys_context;
+struct svga_winsys_buffer;
+struct svga_winsys_surface;
+
+
+/*
+ * SVGA Device Interoperability
+ */
+
+void *
+SVGA3D_FIFOReserve(struct svga_winsys_context *swc, uint32 cmd, uint32 cmdSize, uint32 nr_relocs);
+
+void
+SVGA_FIFOCommitAll(struct svga_winsys_context *swc);
+
+
+/*
+ * Context Management
+ */
+
+enum pipe_error
+SVGA3D_DefineContext(struct svga_winsys_context *swc);
+
+enum pipe_error
+SVGA3D_DestroyContext(struct svga_winsys_context *swc);
+
+
+/*
+ * Surface Management
+ */
+
+enum pipe_error
+SVGA3D_BeginDefineSurface(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *sid,
+ SVGA3dSurfaceFlags flags,
+ SVGA3dSurfaceFormat format,
+ SVGA3dSurfaceFace **faces,
+ SVGA3dSize **mipSizes,
+ uint32 numMipSizes);
+enum pipe_error
+SVGA3D_DefineSurface2D(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *sid,
+ uint32 width,
+ uint32 height,
+ SVGA3dSurfaceFormat format);
+enum pipe_error
+SVGA3D_DestroySurface(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *sid);
+
+
+/*
+ * Surface Operations
+ */
+
+enum pipe_error
+SVGA3D_SurfaceDMA(struct svga_winsys_context *swc,
+ struct svga_transfer *st,
+ SVGA3dTransferType transfer,
+ const SVGA3dCopyBox *boxes,
+ uint32 numBoxes);
+
+enum pipe_error
+SVGA3D_BufferDMA(struct svga_winsys_context *swc,
+ struct svga_winsys_buffer *guest,
+ struct svga_winsys_surface *host,
+ SVGA3dTransferType transfer,
+ uint32 size,
+ uint32 offset,
+ SVGA3dSurfaceDMAFlags flags);
+
+/*
+ * Drawing Operations
+ */
+
+
+enum pipe_error
+SVGA3D_BeginClear(struct svga_winsys_context *swc,
+ SVGA3dClearFlag flags,
+ uint32 color, float depth, uint32 stencil,
+ SVGA3dRect **rects, uint32 numRects);
+
+enum pipe_error
+SVGA3D_ClearRect(struct svga_winsys_context *swc,
+ SVGA3dClearFlag flags, uint32 color, float depth,
+ uint32 stencil, uint32 x, uint32 y, uint32 w, uint32 h);
+
+enum pipe_error
+SVGA3D_BeginDrawPrimitives(struct svga_winsys_context *swc,
+ SVGA3dVertexDecl **decls,
+ uint32 numVertexDecls,
+ SVGA3dPrimitiveRange **ranges,
+ uint32 numRanges);
+
+/*
+ * Blits
+ */
+
+enum pipe_error
+SVGA3D_BeginSurfaceCopy(struct svga_winsys_context *swc,
+ struct pipe_surface *src,
+ struct pipe_surface *dest,
+ SVGA3dCopyBox **boxes, uint32 numBoxes);
+
+
+enum pipe_error
+SVGA3D_SurfaceStretchBlt(struct svga_winsys_context *swc,
+ struct pipe_surface *src,
+ struct pipe_surface *dest,
+ SVGA3dBox *boxSrc, SVGA3dBox *boxDest,
+ SVGA3dStretchBltMode mode);
+
+/*
+ * Shared FFP/Shader Render State
+ */
+
+enum pipe_error
+SVGA3D_SetRenderTarget(struct svga_winsys_context *swc,
+ SVGA3dRenderTargetType type,
+ struct pipe_surface *surface);
+
+enum pipe_error
+SVGA3D_SetZRange(struct svga_winsys_context *swc,
+ float zMin, float zMax);
+
+enum pipe_error
+SVGA3D_SetViewport(struct svga_winsys_context *swc,
+ SVGA3dRect *rect);
+
+enum pipe_error
+SVGA3D_SetScissorRect(struct svga_winsys_context *swc,
+ SVGA3dRect *rect);
+
+enum pipe_error
+SVGA3D_SetClipPlane(struct svga_winsys_context *swc,
+ uint32 index, const float *plane);
+
+enum pipe_error
+SVGA3D_BeginSetTextureState(struct svga_winsys_context *swc,
+ SVGA3dTextureState **states,
+ uint32 numStates);
+
+enum pipe_error
+SVGA3D_BeginSetRenderState(struct svga_winsys_context *swc,
+ SVGA3dRenderState **states,
+ uint32 numStates);
+
+
+/*
+ * Shaders
+ */
+
+enum pipe_error
+SVGA3D_DefineShader(struct svga_winsys_context *swc,
+ uint32 shid, SVGA3dShaderType type,
+ const uint32 *bytecode, uint32 bytecodeLen);
+
+enum pipe_error
+SVGA3D_DestroyShader(struct svga_winsys_context *swc,
+ uint32 shid, SVGA3dShaderType type);
+
+enum pipe_error
+SVGA3D_SetShaderConst(struct svga_winsys_context *swc,
+ uint32 reg, SVGA3dShaderType type,
+ SVGA3dShaderConstType ctype, const void *value);
+
+enum pipe_error
+SVGA3D_SetShader(struct svga_winsys_context *swc,
+ SVGA3dShaderType type, uint32 shid);
+
+
+/*
+ * Queries
+ */
+
+enum pipe_error
+SVGA3D_BeginQuery(struct svga_winsys_context *swc,
+ SVGA3dQueryType type);
+
+enum pipe_error
+SVGA3D_EndQuery(struct svga_winsys_context *swc,
+ SVGA3dQueryType type,
+ struct svga_winsys_buffer *buffer);
+
+enum pipe_error
+SVGA3D_WaitForQuery(struct svga_winsys_context *swc,
+ SVGA3dQueryType type,
+ struct svga_winsys_buffer *buffer);
+
+#endif /* __SVGA3D_H__ */
diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
new file mode 100644
index 00000000000..af99c9de37c
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -0,0 +1,287 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_screen.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+#include "util/u_upload_mgr.h"
+
+#include "svga_context.h"
+#include "svga_screen.h"
+#include "svga_screen_texture.h"
+#include "svga_screen_buffer.h"
+#include "svga_winsys.h"
+#include "svga_swtnl.h"
+#include "svga_draw.h"
+#include "svga_debug.h"
+#include "svga_state.h"
+
+
+static void svga_destroy( struct pipe_context *pipe )
+{
+ struct svga_context *svga = svga_context( pipe );
+ unsigned shader;
+
+ svga_cleanup_framebuffer( svga );
+ svga_cleanup_tss_binding( svga );
+
+ svga_hwtnl_destroy( svga->hwtnl );
+
+ svga_cleanup_vertex_state(svga);
+
+ svga->swc->destroy(svga->swc);
+
+ svga_destroy_swtnl( svga );
+
+ u_upload_destroy( svga->upload_vb );
+ u_upload_destroy( svga->upload_ib );
+
+ util_bitmask_destroy( svga->vs_bm );
+ util_bitmask_destroy( svga->fs_bm );
+
+ for(shader = 0; shader < PIPE_SHADER_TYPES; ++shader)
+ pipe_buffer_reference( &svga->curr.cb[shader], NULL );
+
+ FREE( svga );
+}
+
+static unsigned int
+svga_is_texture_referenced( struct pipe_context *pipe,
+ struct pipe_texture *texture,
+ unsigned face, unsigned level)
+{
+ struct svga_texture *tex = svga_texture(texture);
+ struct svga_screen *ss = svga_screen(pipe->screen);
+
+ /**
+ * The screen does not cache texture writes.
+ */
+
+ if (!tex->handle || ss->sws->surface_is_flushed(ss->sws, tex->handle))
+ return PIPE_UNREFERENCED;
+
+ /**
+ * sws->surface_is_flushed() does not distinguish between read references
+ * and write references. So assume a reference is both.
+ */
+
+ return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+}
+
+static unsigned int
+svga_is_buffer_referenced( struct pipe_context *pipe,
+ struct pipe_buffer *buf)
+
+{
+ struct svga_screen *ss = svga_screen(pipe->screen);
+ struct svga_buffer *sbuf = svga_buffer(buf);
+
+ /**
+ * XXX: Check this.
+ * The screen may cache buffer writes, but when we map, we map out
+ * of those cached writes, so we don't need to set a
+ * PIPE_REFERENCED_FOR_WRITE flag for cached buffers.
+ */
+
+ if (!sbuf->handle || ss->sws->surface_is_flushed(ss->sws, sbuf->handle))
+ return PIPE_UNREFERENCED;
+
+ /**
+ * sws->surface_is_flushed() does not distinguish between read references
+ * and write references. So assume a reference is both,
+ * however, we make an exception for index- and vertex buffers, to avoid
+ * a flush in st_bufferobj_get_subdata, during display list replay.
+ */
+
+ if (sbuf->base.usage & (PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_INDEX))
+ return PIPE_REFERENCED_FOR_READ;
+
+ return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+}
+
+
+struct pipe_context *svga_context_create( struct pipe_screen *screen )
+{
+ struct svga_screen *svgascreen = svga_screen(screen);
+ struct svga_context *svga = NULL;
+ enum pipe_error ret;
+
+ svga = CALLOC_STRUCT(svga_context);
+ if (svga == NULL)
+ goto no_svga;
+
+ svga->pipe.winsys = screen->winsys;
+ svga->pipe.screen = screen;
+ svga->pipe.destroy = svga_destroy;
+ svga->pipe.clear = svga_clear;
+
+ svga->pipe.is_texture_referenced = svga_is_texture_referenced;
+ svga->pipe.is_buffer_referenced = svga_is_buffer_referenced;
+
+ svga->swc = svgascreen->sws->context_create(svgascreen->sws);
+ if(!svga->swc)
+ goto no_swc;
+
+ svga_init_blend_functions(svga);
+ svga_init_blit_functions(svga);
+ svga_init_depth_stencil_functions(svga);
+ svga_init_draw_functions(svga);
+ svga_init_flush_functions(svga);
+ svga_init_misc_functions(svga);
+ svga_init_rasterizer_functions(svga);
+ svga_init_sampler_functions(svga);
+ svga_init_fs_functions(svga);
+ svga_init_vs_functions(svga);
+ svga_init_vertex_functions(svga);
+ svga_init_constbuffer_functions(svga);
+ svga_init_query_functions(svga);
+
+ /* debug */
+ svga->debug.no_swtnl = debug_get_bool_option("SVGA_NO_SWTNL", FALSE);
+ svga->debug.force_swtnl = debug_get_bool_option("SVGA_FORCE_SWTNL", FALSE);
+ svga->debug.use_min_mipmap = debug_get_bool_option("SVGA_USE_MIN_MIPMAP", FALSE);
+ svga->debug.disable_shader = debug_get_num_option("SVGA_DISABLE_SHADER", ~0);
+
+ if (!svga_init_swtnl(svga))
+ goto no_swtnl;
+
+ svga->fs_bm = util_bitmask_create();
+ if (svga->fs_bm == NULL)
+ goto no_fs_bm;
+
+ svga->vs_bm = util_bitmask_create();
+ if (svga->vs_bm == NULL)
+ goto no_vs_bm;
+
+ svga->upload_ib = u_upload_create( svga->pipe.screen,
+ 32 * 1024,
+ 16,
+ PIPE_BUFFER_USAGE_INDEX );
+ if (svga->upload_ib == NULL)
+ goto no_upload_ib;
+
+ svga->upload_vb = u_upload_create( svga->pipe.screen,
+ 128 * 1024,
+ 16,
+ PIPE_BUFFER_USAGE_VERTEX );
+ if (svga->upload_vb == NULL)
+ goto no_upload_vb;
+
+ svga->hwtnl = svga_hwtnl_create( svga,
+ svga->upload_ib,
+ svga->swc );
+ if (svga->hwtnl == NULL)
+ goto no_hwtnl;
+
+
+ ret = svga_emit_initial_state( svga );
+ if (ret)
+ goto no_state;
+
+ /* Avoid shortcircuiting state with initial value of zero.
+ */
+ memset(&svga->state.hw_clear, 0xcd, sizeof(svga->state.hw_clear));
+ memset(&svga->state.hw_clear.framebuffer, 0x0,
+ sizeof(svga->state.hw_clear.framebuffer));
+
+ memset(&svga->state.hw_draw, 0xcd, sizeof(svga->state.hw_draw));
+ memset(&svga->state.hw_draw.views, 0x0, sizeof(svga->state.hw_draw.views));
+ svga->state.hw_draw.num_views = 0;
+
+ svga->dirty = ~0;
+ svga->state.white_fs_id = SVGA3D_INVALID_ID;
+
+ LIST_INITHEAD(&svga->dirty_buffers);
+
+ return &svga->pipe;
+
+no_state:
+ svga_hwtnl_destroy( svga->hwtnl );
+no_hwtnl:
+ u_upload_destroy( svga->upload_vb );
+no_upload_vb:
+ u_upload_destroy( svga->upload_ib );
+no_upload_ib:
+ util_bitmask_destroy( svga->vs_bm );
+no_vs_bm:
+ util_bitmask_destroy( svga->fs_bm );
+no_fs_bm:
+ svga_destroy_swtnl(svga);
+no_swtnl:
+ svga->swc->destroy(svga->swc);
+no_swc:
+ FREE(svga);
+no_svga:
+ return NULL;
+}
+
+
+void svga_context_flush( struct svga_context *svga,
+ struct pipe_fence_handle **pfence )
+{
+ struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
+
+ svga->curr.nr_fbs = 0;
+
+ /* Unmap upload manager buffers:
+ */
+ u_upload_flush(svga->upload_vb);
+ u_upload_flush(svga->upload_ib);
+
+ /* Flush screen, to ensure that texture dma uploads are processed
+ * before submitting commands.
+ */
+ svga_screen_flush(svgascreen, NULL);
+
+ svga_context_flush_buffers(svga);
+
+ /* Flush pending commands to hardware:
+ */
+ svga->swc->flush(svga->swc, pfence);
+
+ if (SVGA_DEBUG & DEBUG_SYNC) {
+ if (pfence && *pfence)
+ svga->pipe.screen->fence_finish( svga->pipe.screen, *pfence, 0);
+ }
+}
+
+
+void svga_hwtnl_flush_retry( struct svga_context *svga )
+{
+ enum pipe_error ret = PIPE_OK;
+
+ ret = svga_hwtnl_flush( svga->hwtnl );
+ if (ret == PIPE_ERROR_OUT_OF_MEMORY) {
+ svga_context_flush( svga, NULL );
+ ret = svga_hwtnl_flush( svga->hwtnl );
+ }
+
+ assert(ret == 0);
+}
+
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
new file mode 100644
index 00000000000..66259fd0103
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -0,0 +1,446 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_CONTEXT_H
+#define SVGA_CONTEXT_H
+
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "util/u_double_list.h"
+
+#include "tgsi/tgsi_scan.h"
+
+
+#define SVGA_TEX_UNITS 8
+
+struct draw_vertex_shader;
+struct svga_shader_result;
+struct SVGACmdMemory;
+struct util_bitmask;
+struct u_upload_mgr;
+
+
+struct svga_shader
+{
+ const struct tgsi_token *tokens;
+
+ struct tgsi_shader_info info;
+
+ struct svga_shader_result *results;
+
+ unsigned id;
+
+ boolean use_sm30;
+};
+
+struct svga_fragment_shader
+{
+ struct svga_shader base;
+};
+
+struct svga_vertex_shader
+{
+ struct svga_shader base;
+
+ struct draw_vertex_shader *draw_shader;
+};
+
+
+struct svga_cache_context;
+struct svga_tracked_state;
+
+struct svga_blend_state {
+
+ boolean need_white_fragments;
+
+ /* Should be per-render-target:
+ */
+ struct {
+ uint8_t writemask;
+
+ boolean blend_enable;
+ uint8_t srcblend;
+ uint8_t dstblend;
+ uint8_t blendeq;
+
+ boolean separate_alpha_blend_enable;
+ uint8_t srcblend_alpha;
+ uint8_t dstblend_alpha;
+ uint8_t blendeq_alpha;
+
+ } rt[1];
+};
+
+struct svga_depth_stencil_state {
+ unsigned zfunc:8;
+ unsigned zenable:1;
+ unsigned zwriteenable:1;
+
+ unsigned alphatestenable:1;
+ unsigned alphafunc:8;
+
+ struct {
+ unsigned enabled:1;
+ unsigned func:8;
+ unsigned fail:8;
+ unsigned zfail:8;
+ unsigned pass:8;
+ } stencil[2];
+
+ /* SVGA3D has one ref/mask/writemask triple shared between front &
+ * back face stencil. We really need two:
+ */
+ unsigned stencil_ref:8;
+ unsigned stencil_mask:8;
+ unsigned stencil_writemask:8;
+
+ float alpharef;
+};
+
+#define SVGA_UNFILLED_DISABLE 0
+#define SVGA_UNFILLED_LINE 1
+#define SVGA_UNFILLED_POINT 2
+
+#define SVGA_PIPELINE_FLAG_POINTS (1<<PIPE_PRIM_POINTS)
+#define SVGA_PIPELINE_FLAG_LINES (1<<PIPE_PRIM_LINES)
+#define SVGA_PIPELINE_FLAG_TRIS (1<<PIPE_PRIM_TRIANGLES)
+
+struct svga_rasterizer_state {
+ struct pipe_rasterizer_state templ; /* needed for draw module */
+
+ unsigned shademode:8;
+ unsigned cullmode:8;
+ unsigned scissortestenable:1;
+ unsigned multisampleantialias:1;
+ unsigned antialiasedlineenable:1;
+ unsigned lastpixel:1;
+
+ unsigned linepattern;
+
+ float slopescaledepthbias;
+ float depthbias;
+ float pointsize;
+ float pointsize_min;
+ float pointsize_max;
+
+ unsigned hw_unfilled:16; /* PIPE_POLYGON_MODE_x */
+ unsigned need_pipeline:16; /* which prims do we need help for? */
+};
+
+struct svga_sampler_state {
+ unsigned mipfilter;
+ unsigned magfilter;
+ unsigned minfilter;
+ unsigned aniso_level;
+ float lod_bias;
+ unsigned addressu;
+ unsigned addressv;
+ unsigned addressw;
+ unsigned bordercolor;
+ unsigned normalized_coords:1;
+ unsigned compare_mode:1;
+ unsigned compare_func:3;
+
+ unsigned min_lod;
+ unsigned view_min_lod;
+ unsigned view_max_lod;
+};
+
+/* Use to calculate differences between state emitted to hardware and
+ * current driver-calculated state.
+ */
+struct svga_state
+{
+ const struct svga_blend_state *blend;
+ const struct svga_depth_stencil_state *depth;
+ const struct svga_rasterizer_state *rast;
+ const struct svga_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+
+ struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; /* or texture ID's? */
+ struct svga_fragment_shader *fs;
+ struct svga_vertex_shader *vs;
+
+ struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
+ struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
+ struct pipe_buffer *cb[PIPE_SHADER_TYPES];
+
+ struct pipe_framebuffer_state framebuffer;
+ float depthscale;
+
+ /* Hack to limit the number of different render targets between
+ * flushes. Helps avoid blowing out our surface cache in EXA.
+ */
+ int nr_fbs;
+
+ struct pipe_poly_stipple poly_stipple;
+ struct pipe_scissor_state scissor;
+ struct pipe_blend_color blend_color;
+ struct pipe_clip_state clip;
+ struct pipe_viewport_state viewport;
+
+ unsigned num_samplers;
+ unsigned num_textures;
+ unsigned num_vertex_elements;
+ unsigned num_vertex_buffers;
+ unsigned reduced_prim;
+
+ struct {
+ unsigned flag_1d;
+ unsigned flag_srgb;
+ } tex_flags;
+
+ boolean any_user_vertex_buffers;
+
+ unsigned zero_stride_vertex_elements;
+ unsigned num_zero_stride_vertex_elements;
+ /* ### maybe dynamically allocate this */
+ float zero_stride_constants[PIPE_MAX_ATTRIBS*4];
+};
+
+#define RS_MAX 97
+#define TS_MAX 30
+#define CB_MAX 256
+
+struct svga_prescale {
+ float translate[4];
+ float scale[4];
+ boolean enabled;
+};
+
+
+/* Updated by calling svga_update_state( SVGA_STATE_HW_VIEWPORT )
+ */
+struct svga_hw_clear_state
+{
+ struct {
+ unsigned x,y,w,h;
+ } viewport;
+
+ struct {
+ float zmin, zmax;
+ } depthrange;
+
+ struct pipe_framebuffer_state framebuffer;
+ struct svga_prescale prescale;
+};
+
+struct svga_hw_view_state
+{
+ struct pipe_texture *texture;
+ struct svga_sampler_view *v;
+ unsigned min_lod;
+ unsigned max_lod;
+ int dirty;
+};
+
+/* Updated by calling svga_update_state( SVGA_STATE_HW_DRAW )
+ */
+struct svga_hw_draw_state
+{
+ unsigned rs[RS_MAX];
+ unsigned ts[16][TS_MAX];
+ float cb[PIPE_SHADER_TYPES][CB_MAX][4];
+
+ struct svga_shader_result *fs;
+ struct svga_shader_result *vs;
+ struct svga_hw_view_state views[PIPE_MAX_SAMPLERS];
+
+ unsigned num_views;
+};
+
+
+/* Updated by calling svga_update_state( SVGA_STATE_NEED_SWTNL )
+ */
+struct svga_sw_state
+{
+ unsigned ve_format[PIPE_MAX_ATTRIBS]; /* NEW_VELEMENT */
+
+ /* which parts we need */
+ boolean need_swvfetch;
+ boolean need_pipeline;
+ boolean need_swtnl;
+};
+
+
+/* Queue some state updates (like rss) and submit them to hardware in
+ * a single packet.
+ */
+struct svga_hw_queue;
+
+struct svga_query;
+
+struct svga_context
+{
+ struct pipe_context pipe;
+ struct svga_winsys_context *swc;
+
+ struct {
+ boolean no_swtnl;
+ boolean force_swtnl;
+ boolean use_min_mipmap;
+
+ /* incremented for each shader */
+ unsigned shader_id;
+
+ unsigned disable_shader;
+ } debug;
+
+ struct {
+ struct draw_context *draw;
+ struct vbuf_render *backend;
+ unsigned hw_prim;
+ boolean new_vbuf;
+ boolean new_vdecl;
+ } swtnl;
+
+ /* Bitmask of used shader IDs */
+ struct util_bitmask *fs_bm;
+ struct util_bitmask *vs_bm;
+
+ struct {
+ unsigned dirty[4];
+
+ unsigned texture_timestamp;
+
+ /* Internally generated shaders:
+ */
+ unsigned white_fs_id;
+
+ /*
+ */
+ struct svga_sw_state sw;
+ struct svga_hw_draw_state hw_draw;
+ struct svga_hw_clear_state hw_clear;
+ } state;
+
+ struct svga_state curr; /* state from the state tracker */
+ unsigned dirty; /* statechanges since last update_state() */
+
+ struct u_upload_mgr *upload_ib;
+ struct u_upload_mgr *upload_vb;
+ struct svga_hwtnl *hwtnl;
+
+ /** The occlusion query currently in progress */
+ struct svga_query *sq;
+
+ /** List of buffers with queued transfers */
+ struct list_head dirty_buffers;
+};
+
+/* A flag for each state_tracker state object:
+ */
+#define SVGA_NEW_BLEND 0x1
+#define SVGA_NEW_DEPTH_STENCIL 0x2
+#define SVGA_NEW_RAST 0x4
+#define SVGA_NEW_SAMPLER 0x8
+#define SVGA_NEW_TEXTURE 0x10
+#define SVGA_NEW_VBUFFER 0x20
+#define SVGA_NEW_VELEMENT 0x40
+#define SVGA_NEW_FS 0x80
+#define SVGA_NEW_VS 0x100
+#define SVGA_NEW_FS_CONST_BUFFER 0x200
+#define SVGA_NEW_VS_CONST_BUFFER 0x400
+#define SVGA_NEW_FRAME_BUFFER 0x800
+#define SVGA_NEW_STIPPLE 0x1000
+#define SVGA_NEW_SCISSOR 0x2000
+#define SVGA_NEW_BLEND_COLOR 0x4000
+#define SVGA_NEW_CLIP 0x8000
+#define SVGA_NEW_VIEWPORT 0x10000
+#define SVGA_NEW_PRESCALE 0x20000
+#define SVGA_NEW_REDUCED_PRIMITIVE 0x40000
+#define SVGA_NEW_TEXTURE_BINDING 0x80000
+#define SVGA_NEW_NEED_PIPELINE 0x100000
+#define SVGA_NEW_NEED_SWVFETCH 0x200000
+#define SVGA_NEW_NEED_SWTNL 0x400000
+#define SVGA_NEW_FS_RESULT 0x800000
+#define SVGA_NEW_VS_RESULT 0x1000000
+#define SVGA_NEW_ZERO_STRIDE 0x2000000
+#define SVGA_NEW_TEXTURE_FLAGS 0x4000000
+
+
+
+
+
+/***********************************************************************
+ * svga_clear.c:
+ */
+void svga_clear(struct pipe_context *pipe,
+ unsigned buffers,
+ const float *rgba,
+ double depth,
+ unsigned stencil);
+
+
+/***********************************************************************
+ * svga_screen_texture.c:
+ */
+void svga_mark_surfaces_dirty(struct svga_context *svga);
+
+
+
+
+void svga_init_state_functions( struct svga_context *svga );
+void svga_init_flush_functions( struct svga_context *svga );
+void svga_init_string_functions( struct svga_context *svga );
+void svga_init_blit_functions(struct svga_context *svga);
+
+void svga_init_blend_functions( struct svga_context *svga );
+void svga_init_depth_stencil_functions( struct svga_context *svga );
+void svga_init_misc_functions( struct svga_context *svga );
+void svga_init_rasterizer_functions( struct svga_context *svga );
+void svga_init_sampler_functions( struct svga_context *svga );
+void svga_init_fs_functions( struct svga_context *svga );
+void svga_init_vs_functions( struct svga_context *svga );
+void svga_init_vertex_functions( struct svga_context *svga );
+void svga_init_constbuffer_functions( struct svga_context *svga );
+void svga_init_draw_functions( struct svga_context *svga );
+void svga_init_query_functions( struct svga_context *svga );
+
+void svga_cleanup_vertex_state( struct svga_context *svga );
+void svga_cleanup_tss_binding( struct svga_context *svga );
+void svga_cleanup_framebuffer( struct svga_context *svga );
+
+void svga_context_flush( struct svga_context *svga,
+ struct pipe_fence_handle **pfence );
+
+void svga_hwtnl_flush_retry( struct svga_context *svga );
+
+
+/***********************************************************************
+ * Inline conversion functions. These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct svga_context *
+svga_context( struct pipe_context *pipe )
+{
+ return (struct svga_context *)pipe;
+}
+
+
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_debug.h b/src/gallium/drivers/svga/svga_debug.h
new file mode 100644
index 00000000000..3a3fcd8fae2
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_debug.h
@@ -0,0 +1,75 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_DEBUG_H
+#define SVGA_DEBUG_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h"
+
+#define DEBUG_DMA 0x1
+#define DEBUG_TGSI 0x4
+#define DEBUG_PIPE 0x8
+#define DEBUG_STATE 0x10
+#define DEBUG_SCREEN 0x20
+#define DEBUG_TEX 0x40
+#define DEBUG_SWTNL 0x80
+#define DEBUG_CONSTS 0x100
+#define DEBUG_VIEWPORT 0x200
+#define DEBUG_VIEWS 0x400
+#define DEBUG_PERF 0x800 /* print something when we hit any slow path operation */
+#define DEBUG_FLUSH 0x1000 /* flush after every draw */
+#define DEBUG_SYNC 0x2000 /* sync after every flush */
+#define DEBUG_QUERY 0x4000
+#define DEBUG_CACHE 0x8000
+
+#ifdef DEBUG
+extern int SVGA_DEBUG;
+#define DBSTR(x) x
+#else
+#define SVGA_DEBUG 0
+#define DBSTR(x) ""
+#endif
+
+static INLINE void
+SVGA_DBG( unsigned flag, const char *fmt, ... )
+{
+#ifdef DEBUG
+ if (SVGA_DEBUG & flag)
+ {
+ va_list args;
+
+ va_start( args, fmt );
+ debug_vprintf( fmt, args );
+ va_end( args );
+ }
+#else
+ (void)flag;
+ (void)fmt;
+#endif
+}
+
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
new file mode 100644
index 00000000000..ca73cf9d5a3
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -0,0 +1,378 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "svga_context.h"
+#include "svga_draw.h"
+#include "svga_draw_private.h"
+#include "svga_debug.h"
+#include "svga_screen.h"
+#include "svga_screen_buffer.h"
+#include "svga_screen_texture.h"
+#include "svga_winsys.h"
+#include "svga_cmd.h"
+
+
+struct svga_hwtnl *svga_hwtnl_create( struct svga_context *svga,
+ struct u_upload_mgr *upload_ib,
+ struct svga_winsys_context *swc )
+{
+ struct svga_hwtnl *hwtnl = CALLOC_STRUCT(svga_hwtnl);
+ if (hwtnl == NULL)
+ goto fail;
+
+ hwtnl->svga = svga;
+ hwtnl->upload_ib = upload_ib;
+
+ hwtnl->cmd.swc = swc;
+
+ return hwtnl;
+
+fail:
+ return NULL;
+}
+
+void svga_hwtnl_destroy( struct svga_hwtnl *hwtnl )
+{
+ int i, j;
+
+ for (i = 0; i < PIPE_PRIM_MAX; i++) {
+ for (j = 0; j < IDX_CACHE_MAX; j++) {
+ pipe_buffer_reference( &hwtnl->index_cache[i][j].buffer,
+ NULL );
+ }
+ }
+
+ for (i = 0; i < hwtnl->cmd.vdecl_count; i++)
+ pipe_buffer_reference(&hwtnl->cmd.vdecl_vb[i], NULL);
+
+ for (i = 0; i < hwtnl->cmd.prim_count; i++)
+ pipe_buffer_reference(&hwtnl->cmd.prim_ib[i], NULL);
+
+
+ FREE(hwtnl);
+}
+
+
+void svga_hwtnl_set_flatshade( struct svga_hwtnl *hwtnl,
+ boolean flatshade,
+ boolean flatshade_first )
+{
+ hwtnl->hw_pv = PV_FIRST;
+ hwtnl->api_pv = (flatshade && !flatshade_first) ? PV_LAST : PV_FIRST;
+}
+
+void svga_hwtnl_set_unfilled( struct svga_hwtnl *hwtnl,
+ unsigned mode )
+{
+ hwtnl->api_fillmode = mode;
+}
+
+void svga_hwtnl_reset_vdecl( struct svga_hwtnl *hwtnl,
+ unsigned count )
+{
+ unsigned i;
+
+ assert(hwtnl->cmd.prim_count == 0);
+
+ for (i = count; i < hwtnl->cmd.vdecl_count; i++) {
+ pipe_buffer_reference(&hwtnl->cmd.vdecl_vb[i],
+ NULL);
+ }
+
+ hwtnl->cmd.vdecl_count = count;
+}
+
+
+void svga_hwtnl_vdecl( struct svga_hwtnl *hwtnl,
+ unsigned i,
+ const SVGA3dVertexDecl *decl,
+ struct pipe_buffer *vb)
+{
+ assert(hwtnl->cmd.prim_count == 0);
+
+ assert( i < hwtnl->cmd.vdecl_count );
+
+ hwtnl->cmd.vdecl[i] = *decl;
+
+ pipe_buffer_reference(&hwtnl->cmd.vdecl_vb[i],
+ vb);
+}
+
+
+
+enum pipe_error
+svga_hwtnl_flush( struct svga_hwtnl *hwtnl )
+{
+ struct svga_winsys_context *swc = hwtnl->cmd.swc;
+ struct svga_context *svga = hwtnl->svga;
+ enum pipe_error ret;
+
+ if (hwtnl->cmd.prim_count) {
+ struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX];
+ struct svga_winsys_surface *ib_handle[QSZ];
+ struct svga_winsys_surface *handle;
+ SVGA3dVertexDecl *vdecl;
+ SVGA3dPrimitiveRange *prim;
+ unsigned i;
+
+ for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+ handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]);
+ if (handle == NULL)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ vb_handle[i] = handle;
+ }
+
+ for (i = 0; i < hwtnl->cmd.prim_count; i++) {
+ if (hwtnl->cmd.prim_ib[i]) {
+ handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]);
+ if (handle == NULL)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+ else
+ handle = NULL;
+
+ ib_handle[i] = handle;
+ }
+
+ SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n",
+ svga->curr.framebuffer.cbufs[0] ?
+ svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL,
+ hwtnl->cmd.prim_count);
+
+ ret = SVGA3D_BeginDrawPrimitives(swc,
+ &vdecl,
+ hwtnl->cmd.vdecl_count,
+ &prim,
+ hwtnl->cmd.prim_count);
+ if (ret != PIPE_OK)
+ return ret;
+
+
+ memcpy( vdecl,
+ hwtnl->cmd.vdecl,
+ hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]);
+
+ for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+ /* Given rangeHint is considered to be relative to indexBias, and
+ * indexBias varies per primitive, we cannot accurately supply an
+ * rangeHint when emitting more than one primitive per draw command.
+ */
+ if (hwtnl->cmd.prim_count == 1) {
+ vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0];
+ vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1;
+ }
+ else {
+ vdecl[i].rangeHint.first = 0;
+ vdecl[i].rangeHint.last = 0;
+ }
+
+ swc->surface_relocation(swc,
+ &vdecl[i].array.surfaceId,
+ vb_handle[i],
+ PIPE_BUFFER_USAGE_GPU_READ);
+ }
+
+ memcpy( prim,
+ hwtnl->cmd.prim,
+ hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]);
+
+ for (i = 0; i < hwtnl->cmd.prim_count; i++) {
+ swc->surface_relocation(swc,
+ &prim[i].indexArray.surfaceId,
+ ib_handle[i],
+ PIPE_BUFFER_USAGE_GPU_READ);
+ pipe_buffer_reference(&hwtnl->cmd.prim_ib[i], NULL);
+ }
+
+ SVGA_FIFOCommitAll( swc );
+ hwtnl->cmd.prim_count = 0;
+ }
+
+ return PIPE_OK;
+}
+
+
+
+
+
+/***********************************************************************
+ * Internal functions:
+ */
+
+enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
+ const SVGA3dPrimitiveRange *range,
+ unsigned min_index,
+ unsigned max_index,
+ struct pipe_buffer *ib )
+{
+ int ret = PIPE_OK;
+
+#ifdef DEBUG
+ {
+ unsigned i;
+ for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+ struct pipe_buffer *vb = hwtnl->cmd.vdecl_vb[i];
+ unsigned size = vb ? vb->size : 0;
+ unsigned offset = hwtnl->cmd.vdecl[i].array.offset;
+ unsigned stride = hwtnl->cmd.vdecl[i].array.stride;
+ unsigned index_bias = range->indexBias;
+ unsigned width;
+
+ assert(vb);
+ assert(size);
+ assert(offset < size);
+ assert(index_bias >= 0);
+ assert(min_index <= max_index);
+ assert(offset + index_bias*stride < size);
+ assert(offset + (index_bias + min_index)*stride < size);
+
+ switch (hwtnl->cmd.vdecl[i].identity.type) {
+ case SVGA3D_DECLTYPE_FLOAT1:
+ width = 4;
+ break;
+ case SVGA3D_DECLTYPE_FLOAT2:
+ width = 4*2;
+ break;
+ case SVGA3D_DECLTYPE_FLOAT3:
+ width = 4*3;
+ break;
+ case SVGA3D_DECLTYPE_FLOAT4:
+ width = 4*4;
+ break;
+ case SVGA3D_DECLTYPE_D3DCOLOR:
+ width = 4;
+ break;
+ case SVGA3D_DECLTYPE_UBYTE4:
+ width = 1*4;
+ break;
+ case SVGA3D_DECLTYPE_SHORT2:
+ width = 2*2;
+ break;
+ case SVGA3D_DECLTYPE_SHORT4:
+ width = 2*4;
+ break;
+ case SVGA3D_DECLTYPE_UBYTE4N:
+ width = 1*4;
+ break;
+ case SVGA3D_DECLTYPE_SHORT2N:
+ width = 2*2;
+ break;
+ case SVGA3D_DECLTYPE_SHORT4N:
+ width = 2*4;
+ break;
+ case SVGA3D_DECLTYPE_USHORT2N:
+ width = 2*2;
+ break;
+ case SVGA3D_DECLTYPE_USHORT4N:
+ width = 2*4;
+ break;
+ case SVGA3D_DECLTYPE_UDEC3:
+ width = 4;
+ break;
+ case SVGA3D_DECLTYPE_DEC3N:
+ width = 4;
+ break;
+ case SVGA3D_DECLTYPE_FLOAT16_2:
+ width = 2*2;
+ break;
+ case SVGA3D_DECLTYPE_FLOAT16_4:
+ width = 2*4;
+ break;
+ default:
+ assert(0);
+ width = 0;
+ break;
+ }
+
+ assert(!stride || width <= stride);
+ assert(offset + (index_bias + max_index)*stride + width <= size);
+ }
+
+ assert(range->indexWidth == range->indexArray.stride);
+
+ if(ib) {
+ unsigned size = ib->size;
+ unsigned offset = range->indexArray.offset;
+ unsigned stride = range->indexArray.stride;
+ unsigned count;
+
+ assert(size);
+ assert(offset < size);
+ assert(stride);
+
+ switch (range->primType) {
+ case SVGA3D_PRIMITIVE_POINTLIST:
+ count = range->primitiveCount;
+ break;
+ case SVGA3D_PRIMITIVE_LINELIST:
+ count = range->primitiveCount * 2;
+ break;
+ case SVGA3D_PRIMITIVE_LINESTRIP:
+ count = range->primitiveCount + 1;
+ break;
+ case SVGA3D_PRIMITIVE_TRIANGLELIST:
+ count = range->primitiveCount * 3;
+ break;
+ case SVGA3D_PRIMITIVE_TRIANGLESTRIP:
+ count = range->primitiveCount + 2;
+ break;
+ case SVGA3D_PRIMITIVE_TRIANGLEFAN:
+ count = range->primitiveCount + 2;
+ break;
+ default:
+ assert(0);
+ count = 0;
+ break;
+ }
+
+ assert(offset + count*stride <= size);
+ }
+ }
+#endif
+
+ if (hwtnl->cmd.prim_count+1 >= QSZ) {
+ ret = svga_hwtnl_flush( hwtnl );
+ if (ret != PIPE_OK)
+ return ret;
+ }
+
+ /* min/max indices are relative to bias */
+ hwtnl->cmd.min_index[hwtnl->cmd.prim_count] = min_index;
+ hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
+
+ hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
+
+ pipe_buffer_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
+ hwtnl->cmd.prim_count++;
+
+ return ret;
+}
diff --git a/src/gallium/drivers/svga/svga_draw.h b/src/gallium/drivers/svga/svga_draw.h
new file mode 100644
index 00000000000..14553b17b58
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_draw.h
@@ -0,0 +1,83 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_DRAW_H
+#define SVGA_DRAW_H
+
+#include "pipe/p_compiler.h"
+
+#include "svga_hw_reg.h"
+
+struct svga_hwtnl;
+struct svga_winsys_context;
+struct svga_screen;
+struct svga_context;
+struct pipe_buffer;
+struct u_upload_mgr;
+
+struct svga_hwtnl *svga_hwtnl_create( struct svga_context *svga,
+ struct u_upload_mgr *upload_ib,
+ struct svga_winsys_context *swc );
+
+void svga_hwtnl_destroy( struct svga_hwtnl *hwtnl );
+
+void svga_hwtnl_set_flatshade( struct svga_hwtnl *hwtnl,
+ boolean flatshade,
+ boolean flatshade_first );
+
+void svga_hwtnl_set_unfilled( struct svga_hwtnl *hwtnl,
+ unsigned mode );
+
+void svga_hwtnl_vdecl( struct svga_hwtnl *hwtnl,
+ unsigned i,
+ const SVGA3dVertexDecl *decl,
+ struct pipe_buffer *vb);
+
+void svga_hwtnl_reset_vdecl( struct svga_hwtnl *hwtnl,
+ unsigned count );
+
+
+enum pipe_error
+svga_hwtnl_draw_arrays( struct svga_hwtnl *hwtnl,
+ unsigned prim,
+ unsigned start,
+ unsigned count);
+
+enum pipe_error
+svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl,
+ struct pipe_buffer *indexBuffer,
+ unsigned index_size,
+ unsigned min_index,
+ unsigned max_index,
+ unsigned prim,
+ unsigned start,
+ unsigned count,
+ unsigned bias );
+
+enum pipe_error
+svga_hwtnl_flush( struct svga_hwtnl *hwtnl );
+
+
+#endif /* SVGA_DRAW_H_ */
diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c
new file mode 100644
index 00000000000..75492dffca2
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_draw_arrays.c
@@ -0,0 +1,297 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "pipe/p_inlines.h"
+#include "util/u_prim.h"
+#include "indices/u_indices.h"
+
+#include "svga_hw_reg.h"
+#include "svga_draw.h"
+#include "svga_draw_private.h"
+#include "svga_context.h"
+
+
+#define DBG 0
+
+
+
+
+static enum pipe_error generate_indices( struct svga_hwtnl *hwtnl,
+ unsigned nr,
+ unsigned index_size,
+ u_generate_func generate,
+ struct pipe_buffer **out_buf )
+{
+ struct pipe_screen *screen = hwtnl->svga->pipe.screen;
+ unsigned size = index_size * nr;
+ struct pipe_buffer *dst = NULL;
+ void *dst_map = NULL;
+
+ dst = screen->buffer_create( screen, 32,
+ PIPE_BUFFER_USAGE_INDEX |
+ PIPE_BUFFER_USAGE_CPU_WRITE |
+ PIPE_BUFFER_USAGE_GPU_READ,
+ size );
+ if (dst == NULL)
+ goto fail;
+
+ dst_map = pipe_buffer_map( screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE );
+ if (dst_map == NULL)
+ goto fail;
+
+ generate( nr,
+ dst_map );
+
+ pipe_buffer_unmap( screen, dst );
+
+ *out_buf = dst;
+ return PIPE_OK;
+
+fail:
+ if (dst_map)
+ screen->buffer_unmap( screen, dst );
+
+ if (dst)
+ screen->buffer_destroy( dst );
+
+ return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+static boolean compare( unsigned cached_nr,
+ unsigned nr,
+ unsigned type )
+{
+ if (type == U_GENERATE_REUSABLE)
+ return cached_nr >= nr;
+ else
+ return cached_nr == nr;
+}
+
+static enum pipe_error retrieve_or_generate_indices( struct svga_hwtnl *hwtnl,
+ unsigned prim,
+ unsigned gen_type,
+ unsigned gen_nr,
+ unsigned gen_size,
+ u_generate_func generate,
+ struct pipe_buffer **out_buf )
+{
+ enum pipe_error ret = PIPE_OK;
+ int i;
+
+ for (i = 0; i < IDX_CACHE_MAX; i++) {
+ if (hwtnl->index_cache[prim][i].buffer != NULL &&
+ hwtnl->index_cache[prim][i].generate == generate)
+ {
+ if (compare(hwtnl->index_cache[prim][i].gen_nr, gen_nr, gen_type))
+ {
+ pipe_buffer_reference( out_buf,
+ hwtnl->index_cache[prim][i].buffer );
+
+ if (DBG)
+ debug_printf("%s retrieve %d/%d\n", __FUNCTION__, i, gen_nr);
+
+ return PIPE_OK;
+ }
+ else if (gen_type == U_GENERATE_REUSABLE)
+ {
+ pipe_buffer_reference( &hwtnl->index_cache[prim][i].buffer,
+ NULL );
+
+ if (DBG)
+ debug_printf("%s discard %d/%d\n", __FUNCTION__,
+ i, hwtnl->index_cache[prim][i].gen_nr);
+
+ break;
+ }
+ }
+ }
+
+ if (i == IDX_CACHE_MAX)
+ {
+ unsigned smallest = 0;
+ unsigned smallest_size = ~0;
+
+ for (i = 0; i < IDX_CACHE_MAX && smallest_size; i++) {
+ if (hwtnl->index_cache[prim][i].buffer == NULL)
+ {
+ smallest = i;
+ smallest_size = 0;
+ }
+ else if (hwtnl->index_cache[prim][i].gen_nr < smallest)
+ {
+ smallest = i;
+ smallest_size = hwtnl->index_cache[prim][i].gen_nr;
+ }
+ }
+
+ assert (smallest != IDX_CACHE_MAX);
+
+ pipe_buffer_reference( &hwtnl->index_cache[prim][smallest].buffer,
+ NULL );
+
+ if (DBG)
+ debug_printf("%s discard smallest %d/%d\n", __FUNCTION__,
+ smallest, smallest_size);
+
+ i = smallest;
+ }
+
+
+ ret = generate_indices( hwtnl,
+ gen_nr,
+ gen_size,
+ generate,
+ out_buf );
+ if (ret != PIPE_OK)
+ return ret;
+
+
+ hwtnl->index_cache[prim][i].generate = generate;
+ hwtnl->index_cache[prim][i].gen_nr = gen_nr;
+ pipe_buffer_reference( &hwtnl->index_cache[prim][i].buffer,
+ *out_buf );
+
+ if (DBG)
+ debug_printf("%s cache %d/%d\n", __FUNCTION__,
+ i, hwtnl->index_cache[prim][i].gen_nr);
+
+ return PIPE_OK;
+}
+
+
+
+static enum pipe_error
+simple_draw_arrays( struct svga_hwtnl *hwtnl,
+ unsigned prim, unsigned start, unsigned count )
+{
+ SVGA3dPrimitiveRange range;
+ unsigned hw_prim;
+ unsigned hw_count;
+
+ hw_prim = svga_translate_prim(prim, count, &hw_count);
+ if (hw_count == 0)
+ return PIPE_ERROR_BAD_INPUT;
+
+ range.primType = hw_prim;
+ range.primitiveCount = hw_count;
+ range.indexArray.surfaceId = SVGA3D_INVALID_ID;
+ range.indexArray.offset = 0;
+ range.indexArray.stride = 0;
+ range.indexWidth = 0;
+ range.indexBias = start;
+
+ /* Min/max index should be calculated prior to applying bias, so we
+ * end up with min_index = 0, max_index = count - 1 and everybody
+ * looking at those numbers knows to adjust them by
+ * range.indexBias.
+ */
+ return svga_hwtnl_prim( hwtnl, &range, 0, count - 1, NULL );
+}
+
+
+
+
+
+
+
+
+
+
+enum pipe_error
+svga_hwtnl_draw_arrays( struct svga_hwtnl *hwtnl,
+ unsigned prim,
+ unsigned start,
+ unsigned count)
+{
+ unsigned gen_prim, gen_size, gen_nr, gen_type;
+ u_generate_func gen_func;
+ enum pipe_error ret = PIPE_OK;
+
+ if (hwtnl->api_fillmode != PIPE_POLYGON_MODE_FILL &&
+ prim >= PIPE_PRIM_TRIANGLES)
+ {
+ gen_type = u_unfilled_generator( prim,
+ start,
+ count,
+ hwtnl->api_fillmode,
+ &gen_prim,
+ &gen_size,
+ &gen_nr,
+ &gen_func );
+ }
+ else {
+ gen_type = u_index_generator( svga_hw_prims,
+ prim,
+ start,
+ count,
+ hwtnl->api_pv,
+ hwtnl->hw_pv,
+ &gen_prim,
+ &gen_size,
+ &gen_nr,
+ &gen_func );
+ }
+
+ if (gen_type == U_GENERATE_LINEAR) {
+ return simple_draw_arrays( hwtnl, gen_prim, start, count );
+ }
+ else {
+ struct pipe_buffer *gen_buf = NULL;
+
+ /* Need to draw as indexed primitive.
+ * Potentially need to run the gen func to build an index buffer.
+ */
+ ret = retrieve_or_generate_indices( hwtnl,
+ prim,
+ gen_type,
+ gen_nr,
+ gen_size,
+ gen_func,
+ &gen_buf );
+ if (ret)
+ goto done;
+
+ ret = svga_hwtnl_simple_draw_range_elements( hwtnl,
+ gen_buf,
+ gen_size,
+ 0,
+ count - 1,
+ gen_prim,
+ 0,
+ gen_nr,
+ start );
+ if (ret)
+ goto done;
+
+ done:
+ if (gen_buf)
+ pipe_buffer_reference( &gen_buf, NULL );
+
+ return ret;
+ }
+}
+
diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c
new file mode 100644
index 00000000000..167d8178315
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_draw_elements.c
@@ -0,0 +1,255 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "util/u_prim.h"
+#include "util/u_upload_mgr.h"
+#include "indices/u_indices.h"
+
+#include "svga_cmd.h"
+#include "svga_draw.h"
+#include "svga_draw_private.h"
+#include "svga_screen_buffer.h"
+#include "svga_winsys.h"
+#include "svga_context.h"
+
+#include "svga_hw_reg.h"
+
+
+static enum pipe_error
+translate_indices( struct svga_hwtnl *hwtnl,
+ struct pipe_buffer *src,
+ unsigned offset,
+ unsigned nr,
+ unsigned index_size,
+ u_translate_func translate,
+ struct pipe_buffer **out_buf )
+{
+ struct pipe_screen *screen = hwtnl->svga->pipe.screen;
+ unsigned size = index_size * nr;
+ const void *src_map = NULL;
+ struct pipe_buffer *dst = NULL;
+ void *dst_map = NULL;
+
+ dst = screen->buffer_create( screen, 32,
+ PIPE_BUFFER_USAGE_INDEX |
+ PIPE_BUFFER_USAGE_CPU_WRITE |
+ PIPE_BUFFER_USAGE_GPU_READ,
+ size );
+ if (dst == NULL)
+ goto fail;
+
+ src_map = pipe_buffer_map( screen, src, PIPE_BUFFER_USAGE_CPU_READ );
+ if (src_map == NULL)
+ goto fail;
+
+ dst_map = pipe_buffer_map( screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE );
+ if (dst_map == NULL)
+ goto fail;
+
+ translate( (const char *)src_map + offset,
+ nr,
+ dst_map );
+
+ pipe_buffer_unmap( screen, src );
+ pipe_buffer_unmap( screen, dst );
+
+ *out_buf = dst;
+ return PIPE_OK;
+
+fail:
+ if (src_map)
+ screen->buffer_unmap( screen, src );
+
+ if (dst_map)
+ screen->buffer_unmap( screen, dst );
+
+ if (dst)
+ screen->buffer_destroy( dst );
+
+ return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+
+
+
+
+enum pipe_error
+svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl,
+ struct pipe_buffer *index_buffer,
+ unsigned index_size,
+ unsigned min_index,
+ unsigned max_index,
+ unsigned prim,
+ unsigned start,
+ unsigned count,
+ unsigned bias )
+{
+ struct pipe_buffer *upload_buffer = NULL;
+ SVGA3dPrimitiveRange range;
+ unsigned hw_prim;
+ unsigned hw_count;
+ unsigned index_offset = start * index_size;
+ int ret = PIPE_OK;
+
+ hw_prim = svga_translate_prim(prim, count, &hw_count);
+ if (hw_count == 0)
+ goto done;
+
+ if (index_buffer &&
+ svga_buffer_is_user_buffer(index_buffer))
+ {
+ assert( index_buffer->size >= index_offset + count * index_size );
+
+ ret = u_upload_buffer( hwtnl->upload_ib,
+ index_offset,
+ count * index_size,
+ index_buffer,
+ &index_offset,
+ &upload_buffer );
+ if (ret)
+ goto done;
+
+ /* Don't need to worry about refcounting index_buffer as this is
+ * just a stack variable without a counted reference of its own.
+ * The caller holds the reference.
+ */
+ index_buffer = upload_buffer;
+ }
+
+ range.primType = hw_prim;
+ range.primitiveCount = hw_count;
+ range.indexArray.offset = index_offset;
+ range.indexArray.stride = index_size;
+ range.indexWidth = index_size;
+ range.indexBias = bias;
+
+ ret = svga_hwtnl_prim( hwtnl, &range, min_index, max_index, index_buffer );
+ if (ret)
+ goto done;
+
+done:
+ if (upload_buffer)
+ pipe_buffer_reference( &upload_buffer, NULL );
+
+ return ret;
+}
+
+
+
+
+enum pipe_error
+svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl,
+ struct pipe_buffer *index_buffer,
+ unsigned index_size,
+ unsigned min_index,
+ unsigned max_index,
+ unsigned prim, unsigned start, unsigned count,
+ unsigned bias)
+{
+ unsigned gen_prim, gen_size, gen_nr, gen_type;
+ u_translate_func gen_func;
+ enum pipe_error ret = PIPE_OK;
+
+ if (hwtnl->api_fillmode != PIPE_POLYGON_MODE_FILL &&
+ prim >= PIPE_PRIM_TRIANGLES)
+ {
+ gen_type = u_unfilled_translator( prim,
+ index_size,
+ count,
+ hwtnl->api_fillmode,
+ &gen_prim,
+ &gen_size,
+ &gen_nr,
+ &gen_func );
+ }
+ else
+ {
+ gen_type = u_index_translator( svga_hw_prims,
+ prim,
+ index_size,
+ count,
+ hwtnl->api_pv,
+ hwtnl->hw_pv,
+ &gen_prim,
+ &gen_size,
+ &gen_nr,
+ &gen_func );
+ }
+
+
+ if (gen_type == U_TRANSLATE_MEMCPY) {
+ /* No need for translation, just pass through to hardware:
+ */
+ return svga_hwtnl_simple_draw_range_elements( hwtnl, index_buffer,
+ index_size,
+ min_index,
+ max_index,
+ gen_prim, start, count, bias );
+ }
+ else {
+ struct pipe_buffer *gen_buf = NULL;
+
+ /* Need to allocate a new index buffer and run the translate
+ * func to populate it. Could potentially cache this translated
+ * index buffer with the original to avoid future
+ * re-translations. Not much point if we're just accelerating
+ * GL though, as index buffers are typically used only once
+ * there.
+ */
+ ret = translate_indices( hwtnl,
+ index_buffer,
+ start * index_size,
+ gen_nr,
+ gen_size,
+ gen_func,
+ &gen_buf );
+ if (ret)
+ goto done;
+
+ ret = svga_hwtnl_simple_draw_range_elements( hwtnl,
+ gen_buf,
+ gen_size,
+ min_index,
+ max_index,
+ gen_prim,
+ 0,
+ gen_nr,
+ bias );
+ if (ret)
+ goto done;
+
+ done:
+ if (gen_buf)
+ pipe_buffer_reference( &gen_buf, NULL );
+
+ return ret;
+ }
+}
+
+
+
+
+
diff --git a/src/gallium/drivers/svga/svga_draw_private.h b/src/gallium/drivers/svga/svga_draw_private.h
new file mode 100644
index 00000000000..9aa40e16642
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_draw_private.h
@@ -0,0 +1,158 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_DRAW_H_
+#define SVGA_DRAW_H_
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_defines.h"
+#include "indices/u_indices.h"
+#include "svga_hw_reg.h"
+#include "svga3d_shaderdefs.h"
+
+struct svga_context;
+struct u_upload_mgr;
+
+/* Should include polygon?
+ */
+static const unsigned svga_hw_prims =
+ ((1 << PIPE_PRIM_POINTS) |
+ (1 << PIPE_PRIM_LINES) |
+ (1 << PIPE_PRIM_LINE_STRIP) |
+ (1 << PIPE_PRIM_TRIANGLES) |
+ (1 << PIPE_PRIM_TRIANGLE_STRIP) |
+ (1 << PIPE_PRIM_TRIANGLE_FAN));
+
+
+static INLINE unsigned svga_translate_prim(unsigned mode,
+ unsigned count,
+ unsigned *out_count)
+{
+ switch (mode) {
+ case PIPE_PRIM_POINTS:
+ *out_count = count;
+ return SVGA3D_PRIMITIVE_POINTLIST;
+
+ case PIPE_PRIM_LINES:
+ *out_count = count / 2;
+ return SVGA3D_PRIMITIVE_LINELIST;
+
+ case PIPE_PRIM_LINE_STRIP:
+ *out_count = count - 1;
+ return SVGA3D_PRIMITIVE_LINESTRIP;
+
+ case PIPE_PRIM_TRIANGLES:
+ *out_count = count / 3;
+ return SVGA3D_PRIMITIVE_TRIANGLELIST;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ *out_count = count - 2;
+ return SVGA3D_PRIMITIVE_TRIANGLESTRIP;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ *out_count = count - 2;
+ return SVGA3D_PRIMITIVE_TRIANGLEFAN;
+
+ default:
+ assert(0);
+ *out_count = 0;
+ return 0;
+ }
+}
+
+
+struct index_cache {
+ u_generate_func generate;
+ unsigned gen_nr;
+
+ /* If non-null, this buffer is filled by calling
+ * generate(nr, map(buffer))
+ */
+ struct pipe_buffer *buffer;
+};
+
+#define QSZ 32
+
+struct draw_cmd {
+ struct svga_winsys_context *swc;
+
+ SVGA3dVertexDecl vdecl[SVGA3D_INPUTREG_MAX];
+ struct pipe_buffer *vdecl_vb[SVGA3D_INPUTREG_MAX];
+ unsigned vdecl_count;
+
+ SVGA3dPrimitiveRange prim[QSZ];
+ struct pipe_buffer *prim_ib[QSZ];
+ unsigned prim_count;
+ unsigned min_index[QSZ];
+ unsigned max_index[QSZ];
+};
+
+#define IDX_CACHE_MAX 8
+
+struct svga_hwtnl {
+ struct svga_context *svga;
+ struct u_upload_mgr *upload_ib;
+
+ /* Flatshade information:
+ */
+ unsigned api_pv;
+ unsigned hw_pv;
+ unsigned api_fillmode;
+
+ /* Cache the results of running a particular generate func on each
+ * primitive type.
+ */
+ struct index_cache index_cache[PIPE_PRIM_MAX][IDX_CACHE_MAX];
+
+ /* Try to build the maximal draw command packet before emitting:
+ */
+ struct draw_cmd cmd;
+};
+
+
+
+/***********************************************************************
+ * Internal functions
+ */
+enum pipe_error
+svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
+ const SVGA3dPrimitiveRange *range,
+ unsigned min_index,
+ unsigned max_index,
+ struct pipe_buffer *ib );
+
+enum pipe_error
+svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl,
+ struct pipe_buffer *indexBuffer,
+ unsigned index_size,
+ unsigned min_index,
+ unsigned max_index,
+ unsigned prim,
+ unsigned start,
+ unsigned count,
+ unsigned bias );
+
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_hw_reg.h b/src/gallium/drivers/svga/svga_hw_reg.h
new file mode 100644
index 00000000000..183f4b918e0
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_hw_reg.h
@@ -0,0 +1,42 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_HW_REG_H
+#define SVGA_HW_REG_H
+
+#include "pipe/p_compiler.h"
+
+#if defined(PIPE_CC_GCC)
+#ifndef HAVE_STDINT_H
+#define HAVE_STDINT_H
+#endif
+#endif
+
+#include "svga_types.h"
+
+#include "svga3d_reg.h"
+
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_pipe_blend.c b/src/gallium/drivers/svga/svga_pipe_blend.c
new file mode 100644
index 00000000000..855d228755f
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_blend.c
@@ -0,0 +1,246 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+
+#include "svga_hw_reg.h"
+
+
+static INLINE unsigned
+svga_translate_blend_factor(unsigned factor)
+{
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ZERO: return SVGA3D_BLENDOP_ZERO;
+ case PIPE_BLENDFACTOR_SRC_ALPHA: return SVGA3D_BLENDOP_SRCALPHA;
+ case PIPE_BLENDFACTOR_ONE: return SVGA3D_BLENDOP_ONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR: return SVGA3D_BLENDOP_SRCCOLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR: return SVGA3D_BLENDOP_INVSRCCOLOR;
+ case PIPE_BLENDFACTOR_DST_COLOR: return SVGA3D_BLENDOP_DESTCOLOR;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR: return SVGA3D_BLENDOP_INVDESTCOLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return SVGA3D_BLENDOP_INVSRCALPHA;
+ case PIPE_BLENDFACTOR_DST_ALPHA: return SVGA3D_BLENDOP_DESTALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA: return SVGA3D_BLENDOP_INVDESTALPHA;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return SVGA3D_BLENDOP_SRCALPHASAT;
+ case PIPE_BLENDFACTOR_CONST_COLOR: return SVGA3D_BLENDOP_BLENDFACTOR;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR: return SVGA3D_BLENDOP_INVBLENDFACTOR;
+ case PIPE_BLENDFACTOR_CONST_ALPHA: return SVGA3D_BLENDOP_BLENDFACTOR; /* ? */
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return SVGA3D_BLENDOP_INVBLENDFACTOR; /* ? */
+ default:
+ assert(0);
+ return SVGA3D_BLENDOP_ZERO;
+ }
+}
+
+static INLINE unsigned
+svga_translate_blend_func(unsigned mode)
+{
+ switch (mode) {
+ case PIPE_BLEND_ADD: return SVGA3D_BLENDEQ_ADD;
+ case PIPE_BLEND_SUBTRACT: return SVGA3D_BLENDEQ_SUBTRACT;
+ case PIPE_BLEND_REVERSE_SUBTRACT: return SVGA3D_BLENDEQ_REVSUBTRACT;
+ case PIPE_BLEND_MIN: return SVGA3D_BLENDEQ_MINIMUM;
+ case PIPE_BLEND_MAX: return SVGA3D_BLENDEQ_MAXIMUM;
+ default:
+ assert(0);
+ return SVGA3D_BLENDEQ_ADD;
+ }
+}
+
+
+static void *
+svga_create_blend_state(struct pipe_context *pipe,
+ const struct pipe_blend_state *templ)
+{
+ struct svga_blend_state *blend = CALLOC_STRUCT( svga_blend_state );
+ unsigned i;
+
+
+ /* Fill in the per-rendertarget blend state. We currently only
+ * have one rendertarget.
+ */
+ for (i = 0; i < 1; i++) {
+ /* No way to set this in SVGA3D, and no way to correctly implement it on
+ * top of D3D9 API. Instead we try to simulate with various blend modes.
+ */
+ if (templ->logicop_enable) {
+ switch (templ->logicop_func) {
+ case PIPE_LOGICOP_XOR:
+ blend->need_white_fragments = TRUE;
+ blend->rt[i].blend_enable = TRUE;
+ blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE;
+ blend->rt[i].dstblend = SVGA3D_BLENDOP_ONE;
+ blend->rt[i].blendeq = SVGA3D_BLENDEQ_SUBTRACT;
+ break;
+ case PIPE_LOGICOP_CLEAR:
+ blend->rt[i].blend_enable = TRUE;
+ blend->rt[i].srcblend = SVGA3D_BLENDOP_ZERO;
+ blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO;
+ blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM;
+ break;
+ case PIPE_LOGICOP_COPY:
+ blend->rt[i].blend_enable = FALSE;
+ break;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ blend->rt[i].blend_enable = TRUE;
+ blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR;
+ blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO;
+ blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD;
+ break;
+ case PIPE_LOGICOP_NOOP:
+ blend->rt[i].blend_enable = TRUE;
+ blend->rt[i].srcblend = SVGA3D_BLENDOP_ZERO;
+ blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR;
+ blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD;
+ break;
+ case PIPE_LOGICOP_SET:
+ blend->rt[i].blend_enable = TRUE;
+ blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE;
+ blend->rt[i].dstblend = SVGA3D_BLENDOP_ONE;
+ blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM;
+ break;
+ case PIPE_LOGICOP_INVERT:
+ blend->rt[i].blend_enable = TRUE;
+ blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR;
+ blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO;
+ blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD;
+ break;
+ case PIPE_LOGICOP_AND:
+ /* Approximate with minimum - works for the 0 & anything case: */
+ blend->rt[i].blend_enable = TRUE;
+ blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR;
+ blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR;
+ blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM;
+ break;
+ case PIPE_LOGICOP_AND_REVERSE:
+ blend->rt[i].blend_enable = TRUE;
+ blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR;
+ blend->rt[i].dstblend = SVGA3D_BLENDOP_INVDESTCOLOR;
+ blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM;
+ break;
+ case PIPE_LOGICOP_AND_INVERTED:
+ blend->rt[i].blend_enable = TRUE;
+ blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR;
+ blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR;
+ blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM;
+ break;
+ case PIPE_LOGICOP_OR:
+ /* Approximate with maximum - works for the 1 | anything case: */
+ blend->rt[i].blend_enable = TRUE;
+ blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR;
+ blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR;
+ blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM;
+ break;
+ case PIPE_LOGICOP_OR_REVERSE:
+ blend->rt[i].blend_enable = TRUE;
+ blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR;
+ blend->rt[i].dstblend = SVGA3D_BLENDOP_INVDESTCOLOR;
+ blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM;
+ break;
+ case PIPE_LOGICOP_OR_INVERTED:
+ blend->rt[i].blend_enable = TRUE;
+ blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR;
+ blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR;
+ blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM;
+ break;
+ case PIPE_LOGICOP_NAND:
+ case PIPE_LOGICOP_NOR:
+ case PIPE_LOGICOP_EQUIV:
+ /* Fill these in with plausible values */
+ blend->rt[i].blend_enable = FALSE;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+ else {
+ blend->rt[i].blend_enable = templ->blend_enable;
+
+ if (templ->blend_enable) {
+ blend->rt[i].srcblend = svga_translate_blend_factor(templ->rgb_src_factor);
+ blend->rt[i].dstblend = svga_translate_blend_factor(templ->rgb_dst_factor);
+ blend->rt[i].blendeq = svga_translate_blend_func(templ->rgb_func);
+ blend->rt[i].srcblend_alpha = svga_translate_blend_factor(templ->alpha_src_factor);
+ blend->rt[i].dstblend_alpha = svga_translate_blend_factor(templ->alpha_dst_factor);
+ blend->rt[i].blendeq_alpha = svga_translate_blend_func(templ->alpha_func);
+
+ if (blend->rt[i].srcblend_alpha != blend->rt[i].srcblend ||
+ blend->rt[i].dstblend_alpha != blend->rt[i].dstblend ||
+ blend->rt[i].blendeq_alpha != blend->rt[i].blendeq)
+ {
+ blend->rt[i].separate_alpha_blend_enable = TRUE;
+ }
+ }
+ }
+
+ blend->rt[i].writemask = templ->colormask;
+ }
+
+ return blend;
+}
+
+static void svga_bind_blend_state(struct pipe_context *pipe,
+ void *blend)
+{
+ struct svga_context *svga = svga_context(pipe);
+
+ svga->curr.blend = (struct svga_blend_state*)blend;
+ svga->dirty |= SVGA_NEW_BLEND;
+}
+
+
+static void svga_delete_blend_state(struct pipe_context *pipe, void *blend)
+{
+ FREE(blend);
+}
+
+static void svga_set_blend_color( struct pipe_context *pipe,
+ const struct pipe_blend_color *blend_color )
+{
+ struct svga_context *svga = svga_context(pipe);
+
+ svga->curr.blend_color = *blend_color;
+
+ svga->dirty |= SVGA_NEW_BLEND;
+}
+
+
+void svga_init_blend_functions( struct svga_context *svga )
+{
+ svga->pipe.create_blend_state = svga_create_blend_state;
+ svga->pipe.bind_blend_state = svga_bind_blend_state;
+ svga->pipe.delete_blend_state = svga_delete_blend_state;
+
+ svga->pipe.set_blend_color = svga_set_blend_color;
+}
+
+
+
diff --git a/src/gallium/drivers/svga/svga_pipe_blit.c b/src/gallium/drivers/svga/svga_pipe_blit.c
new file mode 100644
index 00000000000..4f575b06e62
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_blit.c
@@ -0,0 +1,92 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_screen_texture.h"
+#include "svga_context.h"
+#include "svga_debug.h"
+#include "svga_cmd.h"
+
+#define FILE_DEBUG_FLAG DEBUG_BLIT
+
+
+static void svga_surface_copy(struct pipe_context *pipe,
+ struct pipe_surface *dest,
+ unsigned destx, unsigned desty,
+ struct pipe_surface *src,
+ unsigned srcx, unsigned srcy,
+ unsigned width, unsigned height)
+{
+ struct svga_context *svga = svga_context(pipe);
+ SVGA3dCopyBox *box;
+ enum pipe_error ret;
+
+ svga_hwtnl_flush_retry( svga );
+
+ SVGA_DBG(DEBUG_DMA, "blit to sid %p (%d,%d), from sid %p (%d,%d) sz %dx%d\n",
+ svga_surface(dest)->handle,
+ destx, desty,
+ svga_surface(src)->handle,
+ srcx, srcy,
+ width, height);
+
+ ret = SVGA3D_BeginSurfaceCopy(svga->swc,
+ src,
+ dest,
+ &box,
+ 1);
+ if(ret != PIPE_OK) {
+
+ svga_context_flush(svga, NULL);
+
+ ret = SVGA3D_BeginSurfaceCopy(svga->swc,
+ src,
+ dest,
+ &box,
+ 1);
+ assert(ret == PIPE_OK);
+ }
+
+ box->x = destx;
+ box->y = desty;
+ box->z = 0;
+ box->w = width;
+ box->h = height;
+ box->d = 1;
+ box->srcx = srcx;
+ box->srcy = srcy;
+ box->srcz = 0;
+
+ SVGA_FIFOCommitAll(svga->swc);
+
+ svga_surface(dest)->dirty = TRUE;
+ svga_propagate_surface(pipe, dest);
+}
+
+
+void
+svga_init_blit_functions(struct svga_context *svga)
+{
+ svga->pipe.surface_copy = svga_surface_copy;
+}
diff --git a/src/gallium/drivers/svga/svga_pipe_clear.c b/src/gallium/drivers/svga/svga_pipe_clear.c
new file mode 100644
index 00000000000..409b3b41cbc
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_clear.c
@@ -0,0 +1,125 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+#include "svga_debug.h"
+
+#include "pipe/p_defines.h"
+#include "util/u_pack_color.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_screen_texture.h"
+
+
+static enum pipe_error
+try_clear(struct svga_context *svga,
+ unsigned buffers,
+ const float *rgba,
+ double depth,
+ unsigned stencil)
+{
+ int ret = PIPE_OK;
+ SVGA3dRect rect = { 0, 0, 0, 0 };
+ boolean restore_viewport = FALSE;
+ SVGA3dClearFlag flags = 0;
+ struct pipe_framebuffer_state *fb = &svga->curr.framebuffer;
+ union util_color uc;
+
+ ret = svga_update_state(svga, SVGA_STATE_HW_CLEAR);
+ if (ret)
+ return ret;
+
+ if ((buffers & PIPE_CLEAR_COLOR) && fb->cbufs[0]) {
+ flags |= SVGA3D_CLEAR_COLOR;
+ util_pack_color(rgba, PIPE_FORMAT_A8R8G8B8_UNORM, &uc);
+
+ rect.w = fb->cbufs[0]->width;
+ rect.h = fb->cbufs[0]->height;
+ }
+
+ if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && fb->zsbuf) {
+ flags |= SVGA3D_CLEAR_DEPTH;
+
+ if (svga->curr.framebuffer.zsbuf->format == PIPE_FORMAT_Z24S8_UNORM)
+ flags |= SVGA3D_CLEAR_STENCIL;
+
+ rect.w = MAX2(rect.w, fb->zsbuf->width);
+ rect.h = MAX2(rect.h, fb->zsbuf->height);
+ }
+
+ if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) {
+ restore_viewport = TRUE;
+ ret = SVGA3D_SetViewport(svga->swc, &rect);
+ if (ret)
+ return ret;
+ }
+
+ ret = SVGA3D_ClearRect(svga->swc, flags, uc.ui, depth, stencil,
+ rect.x, rect.y, rect.w, rect.h);
+ if (ret != PIPE_OK)
+ return ret;
+
+ if (restore_viewport) {
+ memcpy(&rect, &svga->state.hw_clear.viewport, sizeof rect);
+ ret = SVGA3D_SetViewport(svga->swc, &rect);
+ }
+
+ return ret;
+}
+
+/**
+ * Clear the given surface to the specified value.
+ * No masking, no scissor (clear entire buffer).
+ */
+void
+svga_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
+ double depth, unsigned stencil)
+{
+ struct svga_context *svga = svga_context( pipe );
+ int ret;
+
+ if (buffers & PIPE_CLEAR_COLOR)
+ SVGA_DBG(DEBUG_DMA, "clear sid %p\n",
+ svga_surface(svga->curr.framebuffer.cbufs[0])->handle);
+
+ ret = try_clear( svga, buffers, rgba, depth, stencil );
+
+ if (ret == PIPE_ERROR_OUT_OF_MEMORY) {
+ /* Flush command buffer and retry:
+ */
+ svga_context_flush( svga, NULL );
+
+ ret = try_clear( svga, buffers, rgba, depth, stencil );
+ }
+
+ /*
+ * Mark target surfaces as dirty
+ * TODO Mark only cleared surfaces.
+ */
+ svga_mark_surfaces_dirty(svga);
+
+ assert (ret == PIPE_OK);
+}
diff --git a/src/gallium/drivers/svga/svga_pipe_constants.c b/src/gallium/drivers/svga/svga_pipe_constants.c
new file mode 100644
index 00000000000..10e7a121892
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_constants.c
@@ -0,0 +1,74 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_hw_reg.h"
+#include "svga_cmd.h"
+
+/***********************************************************************
+ * Constant buffers
+ */
+
+struct svga_constbuf
+{
+ unsigned type;
+ float (*data)[4];
+ unsigned count;
+};
+
+
+
+static void svga_set_constant_buffer(struct pipe_context *pipe,
+ uint shader, uint index,
+ const struct pipe_constant_buffer *buf)
+{
+ struct svga_context *svga = svga_context(pipe);
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(index == 0);
+
+ pipe_buffer_reference( &svga->curr.cb[shader],
+ buf->buffer );
+
+ if (shader == PIPE_SHADER_FRAGMENT)
+ svga->dirty |= SVGA_NEW_FS_CONST_BUFFER;
+ else
+ svga->dirty |= SVGA_NEW_VS_CONST_BUFFER;
+}
+
+
+
+void svga_init_constbuffer_functions( struct svga_context *svga )
+{
+ svga->pipe.set_constant_buffer = svga_set_constant_buffer;
+}
+
diff --git a/src/gallium/drivers/svga/svga_pipe_depthstencil.c b/src/gallium/drivers/svga/svga_pipe_depthstencil.c
new file mode 100644
index 00000000000..df636c08a05
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_depthstencil.c
@@ -0,0 +1,153 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_hw_reg.h"
+
+
+static INLINE unsigned
+svga_translate_compare_func(unsigned func)
+{
+ switch (func) {
+ case PIPE_FUNC_NEVER: return SVGA3D_CMP_NEVER;
+ case PIPE_FUNC_LESS: return SVGA3D_CMP_LESS;
+ case PIPE_FUNC_LEQUAL: return SVGA3D_CMP_LESSEQUAL;
+ case PIPE_FUNC_GREATER: return SVGA3D_CMP_GREATER;
+ case PIPE_FUNC_GEQUAL: return SVGA3D_CMP_GREATEREQUAL;
+ case PIPE_FUNC_NOTEQUAL: return SVGA3D_CMP_NOTEQUAL;
+ case PIPE_FUNC_EQUAL: return SVGA3D_CMP_EQUAL;
+ case PIPE_FUNC_ALWAYS: return SVGA3D_CMP_ALWAYS;
+ default:
+ assert(0);
+ return SVGA3D_CMP_ALWAYS;
+ }
+}
+
+static INLINE unsigned
+svga_translate_stencil_op(unsigned op)
+{
+ switch (op) {
+ case PIPE_STENCIL_OP_KEEP: return SVGA3D_STENCILOP_KEEP;
+ case PIPE_STENCIL_OP_ZERO: return SVGA3D_STENCILOP_ZERO;
+ case PIPE_STENCIL_OP_REPLACE: return SVGA3D_STENCILOP_REPLACE;
+ case PIPE_STENCIL_OP_INCR: return SVGA3D_STENCILOP_INCR;
+ case PIPE_STENCIL_OP_DECR: return SVGA3D_STENCILOP_DECR;
+ case PIPE_STENCIL_OP_INCR_WRAP: return SVGA3D_STENCILOP_INCRSAT; /* incorrect? */
+ case PIPE_STENCIL_OP_DECR_WRAP: return SVGA3D_STENCILOP_DECRSAT; /* incorrect? */
+ case PIPE_STENCIL_OP_INVERT: return SVGA3D_STENCILOP_INVERT;
+ default:
+ assert(0);
+ return SVGA3D_STENCILOP_KEEP;
+ }
+}
+
+
+static void *
+svga_create_depth_stencil_state(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *templ)
+{
+ struct svga_depth_stencil_state *ds = CALLOC_STRUCT( svga_depth_stencil_state );
+
+ /* Don't try to figure out CW/CCW correspondence with
+ * stencil[0]/[1] at this point. Presumably this can change as
+ * back/front face are modified.
+ */
+ ds->stencil[0].enabled = templ->stencil[0].enabled;
+ if (ds->stencil[0].enabled) {
+ ds->stencil[0].func = svga_translate_compare_func(templ->stencil[0].func);
+ ds->stencil[0].fail = svga_translate_stencil_op(templ->stencil[0].fail_op);
+ ds->stencil[0].zfail = svga_translate_stencil_op(templ->stencil[0].zfail_op);
+ ds->stencil[0].pass = svga_translate_stencil_op(templ->stencil[0].zpass_op);
+
+ /* SVGA3D has one ref/mask/writemask triple shared between front &
+ * back face stencil. We really need two:
+ */
+ ds->stencil_ref = templ->stencil[0].ref_value & 0xff;
+ ds->stencil_mask = templ->stencil[0].valuemask & 0xff;
+ ds->stencil_writemask = templ->stencil[0].writemask & 0xff;
+ }
+
+
+ ds->stencil[1].enabled = templ->stencil[1].enabled;
+ if (templ->stencil[1].enabled) {
+ ds->stencil[1].func = svga_translate_compare_func(templ->stencil[1].func);
+ ds->stencil[1].fail = svga_translate_stencil_op(templ->stencil[1].fail_op);
+ ds->stencil[1].zfail = svga_translate_stencil_op(templ->stencil[1].zfail_op);
+ ds->stencil[1].pass = svga_translate_stencil_op(templ->stencil[1].zpass_op);
+
+ ds->stencil_ref = templ->stencil[1].ref_value & 0xff;
+ ds->stencil_mask = templ->stencil[1].valuemask & 0xff;
+ ds->stencil_writemask = templ->stencil[1].writemask & 0xff;
+ }
+
+
+ ds->zenable = templ->depth.enabled;
+ if (ds->zenable) {
+ ds->zfunc = svga_translate_compare_func(templ->depth.func);
+ ds->zwriteenable = templ->depth.writemask;
+ }
+
+ ds->alphatestenable = templ->alpha.enabled;
+ if (ds->alphatestenable) {
+ ds->alphafunc = svga_translate_compare_func(templ->alpha.func);
+ ds->alpharef = templ->alpha.ref_value;
+ }
+
+ return ds;
+}
+
+static void svga_bind_depth_stencil_state(struct pipe_context *pipe,
+ void *depth_stencil)
+{
+ struct svga_context *svga = svga_context(pipe);
+
+ svga->curr.depth = (const struct svga_depth_stencil_state *)depth_stencil;
+ svga->dirty |= SVGA_NEW_DEPTH_STENCIL;
+}
+
+static void svga_delete_depth_stencil_state(struct pipe_context *pipe,
+ void *depth_stencil)
+{
+ FREE(depth_stencil);
+}
+
+
+
+void svga_init_depth_stencil_functions( struct svga_context *svga )
+{
+ svga->pipe.create_depth_stencil_alpha_state = svga_create_depth_stencil_state;
+ svga->pipe.bind_depth_stencil_alpha_state = svga_bind_depth_stencil_state;
+ svga->pipe.delete_depth_stencil_alpha_state = svga_delete_depth_stencil_state;
+}
+
+
+
+
diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c
new file mode 100644
index 00000000000..0f24ef4ee8d
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_draw.c
@@ -0,0 +1,259 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "pipe/p_inlines.h"
+#include "util/u_prim.h"
+#include "util/u_time.h"
+#include "indices/u_indices.h"
+
+#include "svga_hw_reg.h"
+#include "svga_context.h"
+#include "svga_screen.h"
+#include "svga_winsys.h"
+#include "svga_draw.h"
+#include "svga_state.h"
+#include "svga_swtnl.h"
+#include "svga_debug.h"
+
+
+
+static enum pipe_error
+retry_draw_range_elements( struct svga_context *svga,
+ struct pipe_buffer *index_buffer,
+ unsigned index_size,
+ unsigned min_index,
+ unsigned max_index,
+ unsigned prim,
+ unsigned start,
+ unsigned count,
+ boolean do_retry )
+{
+ enum pipe_error ret = 0;
+
+ svga_hwtnl_set_unfilled( svga->hwtnl,
+ svga->curr.rast->hw_unfilled );
+
+ svga_hwtnl_set_flatshade( svga->hwtnl,
+ svga->curr.rast->templ.flatshade,
+ svga->curr.rast->templ.flatshade_first );
+
+
+ ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
+ if (ret)
+ goto retry;
+
+ ret = svga_hwtnl_draw_range_elements( svga->hwtnl,
+ index_buffer, index_size,
+ min_index, max_index,
+ prim, start, count, 0 );
+ if (ret)
+ goto retry;
+
+ if (svga->curr.any_user_vertex_buffers) {
+ ret = svga_hwtnl_flush( svga->hwtnl );
+ if (ret)
+ goto retry;
+ }
+
+ return PIPE_OK;
+
+retry:
+ svga_context_flush( svga, NULL );
+
+ if (do_retry)
+ {
+ return retry_draw_range_elements( svga,
+ index_buffer, index_size,
+ min_index, max_index,
+ prim, start, count,
+ FALSE );
+ }
+
+ return ret;
+}
+
+
+static enum pipe_error
+retry_draw_arrays( struct svga_context *svga,
+ unsigned prim,
+ unsigned start,
+ unsigned count,
+ boolean do_retry )
+{
+ enum pipe_error ret;
+
+ svga_hwtnl_set_unfilled( svga->hwtnl,
+ svga->curr.rast->hw_unfilled );
+
+ svga_hwtnl_set_flatshade( svga->hwtnl,
+ svga->curr.rast->templ.flatshade,
+ svga->curr.rast->templ.flatshade_first );
+
+ ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
+ if (ret)
+ goto retry;
+
+ ret = svga_hwtnl_draw_arrays( svga->hwtnl, prim,
+ start, count );
+ if (ret)
+ goto retry;
+
+ if (svga->curr.any_user_vertex_buffers) {
+ ret = svga_hwtnl_flush( svga->hwtnl );
+ if (ret)
+ goto retry;
+ }
+
+ return 0;
+
+retry:
+ if (ret == PIPE_ERROR_OUT_OF_MEMORY && do_retry)
+ {
+ svga_context_flush( svga, NULL );
+
+ return retry_draw_arrays( svga,
+ prim,
+ start,
+ count,
+ FALSE );
+ }
+
+ return ret;
+}
+
+
+
+
+
+static void
+svga_draw_range_elements( struct pipe_context *pipe,
+ struct pipe_buffer *index_buffer,
+ unsigned index_size,
+ unsigned min_index,
+ unsigned max_index,
+ unsigned prim, unsigned start, unsigned count)
+{
+ struct svga_context *svga = svga_context( pipe );
+ unsigned reduced_prim = u_reduced_prim(prim);
+ enum pipe_error ret = 0;
+
+ if (!u_trim_pipe_prim( prim, &count ))
+ return;
+
+ /*
+ * Mark currently bound target surfaces as dirty
+ * doesn't really matter if it is done before drawing.
+ *
+ * TODO If we ever normaly return something other then
+ * true we should not mark it as dirty then.
+ */
+ svga_mark_surfaces_dirty(svga_context(pipe));
+
+ if (svga->curr.reduced_prim != reduced_prim) {
+ svga->curr.reduced_prim = reduced_prim;
+ svga->dirty |= SVGA_NEW_REDUCED_PRIMITIVE;
+ }
+
+ svga_update_state_retry( svga, SVGA_STATE_NEED_SWTNL );
+
+#ifdef DEBUG
+ if (svga->curr.vs->base.id == svga->debug.disable_shader ||
+ svga->curr.fs->base.id == svga->debug.disable_shader)
+ return;
+#endif
+
+ if (svga->state.sw.need_swtnl)
+ {
+ ret = svga_swtnl_draw_range_elements( svga,
+ index_buffer,
+ index_size,
+ min_index, max_index,
+ prim,
+ start, count );
+ }
+ else {
+ if (index_buffer) {
+ ret = retry_draw_range_elements( svga,
+ index_buffer,
+ index_size,
+ min_index,
+ max_index,
+ prim,
+ start,
+ count,
+ TRUE );
+ }
+ else {
+ ret = retry_draw_arrays( svga,
+ prim,
+ start,
+ count,
+ TRUE );
+ }
+ }
+
+ if (SVGA_DEBUG & DEBUG_FLUSH) {
+ static unsigned id;
+ debug_printf("%s %d\n", __FUNCTION__, id++);
+ if (id > 1300)
+ util_time_sleep( 2000 );
+
+ svga_hwtnl_flush_retry( svga );
+ svga_context_flush(svga, NULL);
+ }
+}
+
+
+static void
+svga_draw_elements( struct pipe_context *pipe,
+ struct pipe_buffer *index_buffer,
+ unsigned index_size,
+ unsigned prim, unsigned start, unsigned count)
+{
+ svga_draw_range_elements( pipe, index_buffer,
+ index_size,
+ 0, 0xffffffff,
+ prim, start, count );
+}
+
+static void
+svga_draw_arrays( struct pipe_context *pipe,
+ unsigned prim, unsigned start, unsigned count)
+{
+ svga_draw_range_elements(pipe, NULL, 0,
+ start, start + count - 1,
+ prim,
+ start, count);
+}
+
+
+void svga_init_draw_functions( struct svga_context *svga )
+{
+ svga->pipe.draw_arrays = svga_draw_arrays;
+ svga->pipe.draw_elements = svga_draw_elements;
+ svga->pipe.draw_range_elements = svga_draw_range_elements;
+}
diff --git a/src/gallium/drivers/svga/svga_pipe_flush.c b/src/gallium/drivers/svga/svga_pipe_flush.c
new file mode 100644
index 00000000000..0becb0765ac
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_flush.c
@@ -0,0 +1,71 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_defines.h"
+#include "svga_screen.h"
+#include "svga_screen_texture.h"
+#include "svga_context.h"
+#include "svga_winsys.h"
+#include "svga_draw.h"
+#include "svga_debug.h"
+
+#include "svga_hw_reg.h"
+
+
+
+
+static void svga_flush( struct pipe_context *pipe,
+ unsigned flags,
+ struct pipe_fence_handle **fence )
+{
+ struct svga_context *svga = svga_context(pipe);
+ int i;
+
+ /* Emit buffered drawing commands.
+ */
+ svga_hwtnl_flush_retry( svga );
+
+ /* Emit back-copy from render target view to texture.
+ */
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ if (svga->curr.framebuffer.cbufs[i])
+ svga_propagate_surface(pipe, svga->curr.framebuffer.cbufs[i]);
+ }
+ if (svga->curr.framebuffer.zsbuf)
+ svga_propagate_surface(pipe, svga->curr.framebuffer.zsbuf);
+
+ /* Flush command queue.
+ */
+ svga_context_flush(svga, fence);
+
+ SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s flags %x fence_ptr %p\n",
+ __FUNCTION__, flags, fence ? *fence : 0x0);
+}
+
+
+void svga_init_flush_functions( struct svga_context *svga )
+{
+ svga->pipe.flush = svga_flush;
+}
diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c
new file mode 100644
index 00000000000..5f1213e46a3
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_fs.c
@@ -0,0 +1,134 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_text.h"
+
+#include "svga_screen.h"
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_tgsi.h"
+#include "svga_hw_reg.h"
+#include "svga_cmd.h"
+#include "svga_draw.h"
+#include "svga_debug.h"
+
+
+/***********************************************************************
+ * Fragment shaders
+ */
+
+static void *
+svga_create_fs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_screen *svgascreen = svga_screen(pipe->screen);
+ struct svga_fragment_shader *fs;
+
+ fs = CALLOC_STRUCT(svga_fragment_shader);
+ if (!fs)
+ return NULL;
+
+ fs->base.tokens = tgsi_dup_tokens(templ->tokens);
+
+ /* Collect basic info that we'll need later:
+ */
+ tgsi_scan_shader(fs->base.tokens, &fs->base.info);
+
+ fs->base.id = svga->debug.shader_id++;
+ fs->base.use_sm30 = svgascreen->use_ps30;
+
+ if (SVGA_DEBUG & DEBUG_TGSI || 0) {
+ debug_printf("%s id: %u, inputs: %u, outputs: %u\n",
+ __FUNCTION__, fs->base.id,
+ fs->base.info.num_inputs, fs->base.info.num_outputs);
+ }
+
+ return fs;
+}
+
+static void
+svga_bind_fs_state(struct pipe_context *pipe, void *shader)
+{
+ struct svga_fragment_shader *fs = (struct svga_fragment_shader *) shader;
+ struct svga_context *svga = svga_context(pipe);
+
+ svga->curr.fs = fs;
+ svga->dirty |= SVGA_NEW_FS;
+}
+
+static
+void svga_delete_fs_state(struct pipe_context *pipe, void *shader)
+{
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_fragment_shader *fs = (struct svga_fragment_shader *) shader;
+ struct svga_shader_result *result, *tmp;
+ enum pipe_error ret;
+
+ svga_hwtnl_flush_retry( svga );
+
+ for (result = fs->base.results; result; result = tmp ) {
+ tmp = result->next;
+
+ ret = SVGA3D_DestroyShader(svga->swc,
+ result->id,
+ SVGA3D_SHADERTYPE_PS );
+ if(ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_DestroyShader(svga->swc,
+ result->id,
+ SVGA3D_SHADERTYPE_PS );
+ assert(ret == PIPE_OK);
+ }
+
+ util_bitmask_clear( svga->fs_bm, result->id );
+
+ svga_destroy_shader_result( result );
+
+ /*
+ * Remove stale references to this result to ensure a new result on the
+ * same address will be detected as a change.
+ */
+ if(result == svga->state.hw_draw.fs)
+ svga->state.hw_draw.fs = NULL;
+ }
+
+ FREE((void *)fs->base.tokens);
+ FREE(fs);
+}
+
+
+void svga_init_fs_functions( struct svga_context *svga )
+{
+ svga->pipe.create_fs_state = svga_create_fs_state;
+ svga->pipe.bind_fs_state = svga_bind_fs_state;
+ svga->pipe.delete_fs_state = svga_delete_fs_state;
+}
+
diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c
new file mode 100644
index 00000000000..58cb1e6e230
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_misc.c
@@ -0,0 +1,187 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "svga_context.h"
+#include "svga_screen_texture.h"
+#include "svga_state.h"
+#include "svga_winsys.h"
+
+#include "svga_hw_reg.h"
+
+
+
+
+static void svga_set_scissor_state( struct pipe_context *pipe,
+ const struct pipe_scissor_state *scissor )
+{
+ struct svga_context *svga = svga_context(pipe);
+
+ memcpy( &svga->curr.scissor, scissor, sizeof(*scissor) );
+ svga->dirty |= SVGA_NEW_SCISSOR;
+}
+
+
+static void svga_set_polygon_stipple( struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple )
+{
+ /* overridden by the draw module */
+}
+
+
+void svga_cleanup_framebuffer(struct svga_context *svga)
+{
+ struct pipe_framebuffer_state *curr = &svga->curr.framebuffer;
+ struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
+ int i;
+
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ pipe_surface_reference(&curr->cbufs[i], NULL);
+ pipe_surface_reference(&hw->cbufs[i], NULL);
+ }
+
+ pipe_surface_reference(&curr->zsbuf, NULL);
+ pipe_surface_reference(&hw->zsbuf, NULL);
+}
+
+
+#define DEPTH_BIAS_SCALE_FACTOR_D16 ((float)(1<<15))
+#define DEPTH_BIAS_SCALE_FACTOR_D24S8 ((float)(1<<23))
+#define DEPTH_BIAS_SCALE_FACTOR_D32 ((float)(1<<31))
+
+
+static void svga_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct svga_context *svga = svga_context(pipe);
+ struct pipe_framebuffer_state *dst = &svga->curr.framebuffer;
+ boolean propagate = FALSE;
+ int i;
+
+ dst->width = fb->width;
+ dst->height = fb->height;
+ dst->nr_cbufs = fb->nr_cbufs;
+
+ /* check if we need to propaget any of the target surfaces */
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ if (dst->cbufs[i] && dst->cbufs[i] != fb->cbufs[i])
+ if (svga_surface_needs_propagation(dst->cbufs[i]))
+ propagate = TRUE;
+ }
+
+ if (propagate) {
+ /* make sure that drawing calls comes before propagation calls */
+ svga_hwtnl_flush_retry( svga );
+
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
+ if (dst->cbufs[i] && dst->cbufs[i] != fb->cbufs[i])
+ svga_propagate_surface(pipe, dst->cbufs[i]);
+ }
+
+ /* XXX: Actually the virtual hardware may support rendertargets with
+ * different size, depending on the host API and driver, but since we cannot
+ * know that make no such assumption here. */
+ for(i = 0; i < fb->nr_cbufs; ++i) {
+ if (fb->zsbuf && fb->cbufs[i]) {
+ assert(fb->zsbuf->width == fb->cbufs[i]->width);
+ assert(fb->zsbuf->height == fb->cbufs[i]->height);
+ }
+ }
+
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
+ pipe_surface_reference(&dst->cbufs[i], fb->cbufs[i]);
+ pipe_surface_reference(&dst->zsbuf, fb->zsbuf);
+
+
+ if (svga->curr.framebuffer.zsbuf)
+ {
+ switch (svga->curr.framebuffer.zsbuf->format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ svga->curr.depthscale = 1.0f / DEPTH_BIAS_SCALE_FACTOR_D16;
+ break;
+ case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ svga->curr.depthscale = 1.0f / DEPTH_BIAS_SCALE_FACTOR_D24S8;
+ break;
+ case PIPE_FORMAT_Z32_UNORM:
+ svga->curr.depthscale = 1.0f / DEPTH_BIAS_SCALE_FACTOR_D32;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ svga->curr.depthscale = 1.0f / ((float)(1<<23));
+ break;
+ default:
+ svga->curr.depthscale = 0.0f;
+ break;
+ }
+ }
+ else {
+ svga->curr.depthscale = 0.0f;
+ }
+
+ svga->dirty |= SVGA_NEW_FRAME_BUFFER;
+}
+
+
+
+static void svga_set_clip_state( struct pipe_context *pipe,
+ const struct pipe_clip_state *clip )
+{
+ struct svga_context *svga = svga_context(pipe);
+
+ svga->curr.clip = *clip; /* struct copy */
+
+ svga->dirty |= SVGA_NEW_CLIP;
+}
+
+
+
+/* Called when driver state tracker notices changes to the viewport
+ * matrix:
+ */
+static void svga_set_viewport_state( struct pipe_context *pipe,
+ const struct pipe_viewport_state *viewport )
+{
+ struct svga_context *svga = svga_context(pipe);
+
+ svga->curr.viewport = *viewport; /* struct copy */
+
+ svga->dirty |= SVGA_NEW_VIEWPORT;
+}
+
+
+
+void svga_init_misc_functions( struct svga_context *svga )
+{
+ svga->pipe.set_scissor_state = svga_set_scissor_state;
+ svga->pipe.set_polygon_stipple = svga_set_polygon_stipple;
+ svga->pipe.set_framebuffer_state = svga_set_framebuffer_state;
+ svga->pipe.set_clip_state = svga_set_clip_state;
+ svga->pipe.set_viewport_state = svga_set_viewport_state;
+}
+
+
diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c
new file mode 100644
index 00000000000..01336b0a2c3
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_query.c
@@ -0,0 +1,267 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "util/u_memory.h"
+
+#include "svga_cmd.h"
+#include "svga_context.h"
+#include "svga_screen.h"
+#include "svga_screen_buffer.h"
+#include "svga_winsys.h"
+#include "svga_draw.h"
+#include "svga_debug.h"
+
+
+/* Fixme: want a public base class for all pipe structs, even if there
+ * isn't much in them.
+ */
+struct pipe_query {
+ int dummy;
+};
+
+struct svga_query {
+ struct pipe_query base;
+ SVGA3dQueryType type;
+ struct svga_winsys_buffer *hwbuf;
+ volatile SVGA3dQueryResult *queryResult;
+ struct pipe_fence_handle *fence;
+};
+
+/***********************************************************************
+ * Inline conversion functions. These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct svga_query *
+svga_query( struct pipe_query *q )
+{
+ return (struct svga_query *)q;
+}
+
+static boolean svga_get_query_result(struct pipe_context *pipe,
+ struct pipe_query *q,
+ boolean wait,
+ uint64_t *result);
+
+static struct pipe_query *svga_create_query( struct pipe_context *pipe,
+ unsigned query_type )
+{
+ struct svga_screen *svgascreen = svga_screen(pipe->screen);
+ struct svga_winsys_screen *sws = svgascreen->sws;
+ struct svga_query *sq;
+
+ SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+
+ sq = CALLOC_STRUCT(svga_query);
+ if (!sq)
+ goto no_sq;
+
+ sq->type = SVGA3D_QUERYTYPE_OCCLUSION;
+
+ sq->hwbuf = svga_winsys_buffer_create(svgascreen,
+ 1,
+ SVGA_BUFFER_USAGE_PINNED,
+ sizeof *sq->queryResult);
+ if(!sq->hwbuf)
+ goto no_hwbuf;
+
+ sq->queryResult = (SVGA3dQueryResult *)sws->buffer_map(sws,
+ sq->hwbuf,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+ if(!sq->queryResult)
+ goto no_query_result;
+
+ sq->queryResult->totalSize = sizeof *sq->queryResult;
+ sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
+
+ /*
+ * We request the buffer to be pinned and assume it is always mapped.
+ *
+ * The reason is that we don't want to wait for fences when checking the
+ * query status.
+ */
+ sws->buffer_unmap(sws, sq->hwbuf);
+
+ return &sq->base;
+
+no_query_result:
+ sws->buffer_destroy(sws, sq->hwbuf);
+no_hwbuf:
+ FREE(sq);
+no_sq:
+ return NULL;
+}
+
+static void svga_destroy_query(struct pipe_context *pipe,
+ struct pipe_query *q)
+{
+ struct svga_screen *svgascreen = svga_screen(pipe->screen);
+ struct svga_winsys_screen *sws = svgascreen->sws;
+ struct svga_query *sq = svga_query( q );
+
+ SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+ sws->buffer_destroy(sws, sq->hwbuf);
+ sws->fence_reference(sws, &sq->fence, NULL);
+ FREE(sq);
+}
+
+static void svga_begin_query(struct pipe_context *pipe,
+ struct pipe_query *q)
+{
+ struct svga_screen *svgascreen = svga_screen(pipe->screen);
+ struct svga_winsys_screen *sws = svgascreen->sws;
+ struct svga_context *svga = svga_context( pipe );
+ struct svga_query *sq = svga_query( q );
+ enum pipe_error ret;
+
+ SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+
+ assert(!svga->sq);
+
+ /* Need to flush out buffered drawing commands so that they don't
+ * get counted in the query results.
+ */
+ svga_hwtnl_flush_retry(svga);
+
+ if(sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {
+ /* The application doesn't care for the pending query result. We cannot
+ * let go the existing buffer and just get a new one because its storage
+ * may be reused for other purposes and clobbered by the host when it
+ * determines the query result. So the only option here is to wait for
+ * the existing query's result -- not a big deal, given that no sane
+ * application would do this.
+ */
+ uint64_t result;
+
+ svga_get_query_result(pipe, q, TRUE, &result);
+
+ assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING);
+ }
+
+ sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
+ sws->fence_reference(sws, &sq->fence, NULL);
+
+ ret = SVGA3D_BeginQuery(svga->swc, sq->type);
+ if(ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_BeginQuery(svga->swc, sq->type);
+ assert(ret == PIPE_OK);
+ }
+
+ svga->sq = sq;
+}
+
+static void svga_end_query(struct pipe_context *pipe,
+ struct pipe_query *q)
+{
+ struct svga_context *svga = svga_context( pipe );
+ struct svga_query *sq = svga_query( q );
+ enum pipe_error ret;
+
+ SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+ assert(svga->sq == sq);
+
+ svga_hwtnl_flush_retry(svga);
+
+ /* Set to PENDING before sending EndQuery. */
+ sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;
+
+ ret = SVGA3D_EndQuery( svga->swc, sq->type, sq->hwbuf);
+ if(ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_EndQuery( svga->swc, sq->type, sq->hwbuf);
+ assert(ret == PIPE_OK);
+ }
+
+ /* TODO: Delay flushing. We don't really need to flush here, just ensure
+ * that there is one flush before svga_get_query_result attempts to get the
+ * result */
+ svga_context_flush(svga, NULL);
+
+ svga->sq = NULL;
+}
+
+static boolean svga_get_query_result(struct pipe_context *pipe,
+ struct pipe_query *q,
+ boolean wait,
+ uint64_t *result)
+{
+ struct svga_context *svga = svga_context( pipe );
+ struct svga_screen *svgascreen = svga_screen( pipe->screen );
+ struct svga_winsys_screen *sws = svgascreen->sws;
+ struct svga_query *sq = svga_query( q );
+ SVGA3dQueryState state;
+
+ SVGA_DBG(DEBUG_QUERY, "%s wait: %d\n", __FUNCTION__);
+
+ /* The query status won't be updated by the host unless
+ * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause a
+ * synchronous wait on the host */
+ if(!sq->fence) {
+ enum pipe_error ret;
+
+ ret = SVGA3D_WaitForQuery( svga->swc, sq->type, sq->hwbuf);
+ if(ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_WaitForQuery( svga->swc, sq->type, sq->hwbuf);
+ assert(ret == PIPE_OK);
+ }
+
+ svga_context_flush(svga, &sq->fence);
+
+ assert(sq->fence);
+ }
+
+ state = sq->queryResult->state;
+ if(state == SVGA3D_QUERYSTATE_PENDING) {
+ if(!wait)
+ return FALSE;
+
+ sws->fence_finish(sws, sq->fence, 0);
+
+ state = sq->queryResult->state;
+ }
+
+ assert(state == SVGA3D_QUERYSTATE_SUCCEEDED ||
+ state == SVGA3D_QUERYSTATE_FAILED);
+
+ *result = (uint64_t)sq->queryResult->result32;
+
+ SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, (unsigned)*result);
+
+ return TRUE;
+}
+
+
+
+void svga_init_query_functions( struct svga_context *svga )
+{
+ svga->pipe.create_query = svga_create_query;
+ svga->pipe.destroy_query = svga_destroy_query;
+ svga->pipe.begin_query = svga_begin_query;
+ svga->pipe.end_query = svga_end_query;
+ svga->pipe.get_query_result = svga_get_query_result;
+}
diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
new file mode 100644
index 00000000000..b03f8eb9cf3
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
@@ -0,0 +1,250 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "draw/draw_context.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+
+#include "svga_hw_reg.h"
+
+/* Hardware frontwinding is always set up as SVGA3D_FRONTWINDING_CW.
+ */
+static SVGA3dFace svga_translate_cullmode( unsigned mode,
+ unsigned front_winding )
+{
+ switch (mode) {
+ case PIPE_WINDING_NONE:
+ return SVGA3D_FACE_NONE;
+ case PIPE_WINDING_CCW:
+ return SVGA3D_FACE_BACK;
+ case PIPE_WINDING_CW:
+ return SVGA3D_FACE_FRONT;
+ case PIPE_WINDING_BOTH:
+ return SVGA3D_FACE_FRONT_BACK;
+ default:
+ assert(0);
+ return SVGA3D_FACE_NONE;
+ }
+}
+
+static SVGA3dShadeMode svga_translate_flatshade( unsigned mode )
+{
+ return mode ? SVGA3D_SHADEMODE_FLAT : SVGA3D_SHADEMODE_SMOOTH;
+}
+
+
+static void *
+svga_create_rasterizer_state(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *templ)
+{
+ struct svga_rasterizer_state *rast = CALLOC_STRUCT( svga_rasterizer_state );
+ /* need this for draw module. */
+ rast->templ = *templ;
+
+ /* light_twoside - XXX: need fragment shader varient */
+ /* poly_smooth - XXX: no fallback available */
+ /* poly_stipple_enable - draw module */
+ /* point_sprite - ? */
+ /* point_size_per_vertex - ? */
+ /* sprite_coord_mode - ??? */
+ /* bypass_vs_viewport_and_clip - handled by viewport setup */
+ /* flatshade_first - handled by index translation */
+ /* gl_rasterization_rules - XXX - viewport code */
+ /* line_width - draw module */
+ /* fill_cw, fill_ccw - draw module or index translation */
+
+ rast->shademode = svga_translate_flatshade( templ->flatshade );
+ rast->cullmode = svga_translate_cullmode( templ->cull_mode,
+ templ->front_winding );
+ rast->scissortestenable = templ->scissor;
+ rast->multisampleantialias = templ->multisample;
+ rast->antialiasedlineenable = templ->line_smooth;
+ rast->lastpixel = templ->line_last_pixel;
+ rast->pointsize = templ->point_size;
+ rast->pointsize_min = templ->point_size_min;
+ rast->pointsize_max = templ->point_size_max;
+ rast->hw_unfilled = PIPE_POLYGON_MODE_FILL;
+
+ /* Use swtnl + decomposition implement these:
+ */
+ if (templ->poly_stipple_enable)
+ rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+
+ if (templ->line_width != 1.0 &&
+ templ->line_width != 0.0)
+ rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES;
+
+ if (templ->line_stipple_enable) {
+ /* LinePattern not implemented on all backends.
+ */
+ if (0) {
+ SVGA3dLinePattern lp;
+ lp.repeat = templ->line_stipple_factor + 1;
+ lp.pattern = templ->line_stipple_pattern;
+ rast->linepattern = lp.uintValue;
+ }
+ else {
+ rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES;
+ }
+ }
+
+ if (templ->point_smooth)
+ rast->need_pipeline |= SVGA_PIPELINE_FLAG_POINTS;
+
+ {
+ boolean offset_cw = templ->offset_cw;
+ boolean offset_ccw = templ->offset_ccw;
+ boolean offset = 0;
+ int fill_cw = templ->fill_cw;
+ int fill_ccw = templ->fill_ccw;
+ int fill = PIPE_POLYGON_MODE_FILL;
+
+ switch (templ->cull_mode) {
+ case PIPE_WINDING_BOTH:
+ offset = 0;
+ fill = PIPE_POLYGON_MODE_FILL;
+ break;
+
+ case PIPE_WINDING_CW:
+ offset = offset_ccw;
+ fill = fill_ccw;
+ break;
+
+ case PIPE_WINDING_CCW:
+ offset = offset_cw;
+ fill = fill_cw;
+ break;
+
+ case PIPE_WINDING_NONE:
+ if (fill_cw != fill_ccw || offset_cw != offset_ccw)
+ {
+ /* Always need the draw module to work out different
+ * front/back fill modes:
+ */
+ rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+ }
+ else {
+ offset = offset_ccw;
+ fill = fill_ccw;
+ }
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ /* Unfilled primitive modes aren't implemented on all virtual
+ * hardware. We can do some unfilled processing with index
+ * translation, but otherwise need the draw module:
+ */
+ if (fill != PIPE_POLYGON_MODE_FILL &&
+ (templ->flatshade ||
+ templ->light_twoside ||
+ offset ||
+ templ->cull_mode != PIPE_WINDING_NONE))
+ {
+ fill = PIPE_POLYGON_MODE_FILL;
+ rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+ }
+
+ /* If we are decomposing to lines, and lines need the pipeline,
+ * then we also need the pipeline for tris.
+ */
+ if (fill == PIPE_POLYGON_MODE_LINE &&
+ (rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES))
+ {
+ fill = PIPE_POLYGON_MODE_FILL;
+ rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+ }
+
+ /* Similarly for points:
+ */
+ if (fill == PIPE_POLYGON_MODE_POINT &&
+ (rast->need_pipeline & SVGA_PIPELINE_FLAG_POINTS))
+ {
+ fill = PIPE_POLYGON_MODE_FILL;
+ rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+ }
+
+ if (offset) {
+ rast->slopescaledepthbias = templ->offset_scale;
+ rast->depthbias = templ->offset_units;
+ }
+
+ rast->hw_unfilled = fill;
+ }
+
+
+
+
+ if (rast->need_pipeline & SVGA_PIPELINE_FLAG_TRIS) {
+ /* Turn off stuff which will get done in the draw module:
+ */
+ rast->hw_unfilled = PIPE_POLYGON_MODE_FILL;
+ rast->slopescaledepthbias = 0;
+ rast->depthbias = 0;
+ }
+
+ return rast;
+}
+
+static void svga_bind_rasterizer_state( struct pipe_context *pipe,
+ void *state )
+{
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_rasterizer_state *raster = (struct svga_rasterizer_state *)state;
+
+ svga->curr.rast = raster;
+
+ draw_set_rasterizer_state(svga->swtnl.draw, raster ? &raster->templ : NULL);
+
+ svga->dirty |= SVGA_NEW_RAST;
+}
+
+static void svga_delete_rasterizer_state(struct pipe_context *pipe,
+ void *raster)
+{
+ FREE(raster);
+}
+
+
+void svga_init_rasterizer_functions( struct svga_context *svga )
+{
+ svga->pipe.create_rasterizer_state = svga_create_rasterizer_state;
+ svga->pipe.bind_rasterizer_state = svga_bind_rasterizer_state;
+ svga->pipe.delete_rasterizer_state = svga_delete_rasterizer_state;
+}
+
+
+/***********************************************************************
+ * Hardware state update
+ */
+
diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c
new file mode 100644
index 00000000000..460a101f8c0
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -0,0 +1,245 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_pack_color.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "svga_context.h"
+#include "svga_screen_texture.h"
+#include "svga_state.h"
+
+#include "svga_hw_reg.h"
+
+#include "svga_debug.h"
+
+static INLINE unsigned
+translate_wrap_mode(unsigned wrap)
+{
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return SVGA3D_TEX_ADDRESS_WRAP;
+
+ case PIPE_TEX_WRAP_CLAMP:
+ return SVGA3D_TEX_ADDRESS_CLAMP;
+
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ /* Unfortunately SVGA3D_TEX_ADDRESS_EDGE not respected by
+ * hardware.
+ */
+ return SVGA3D_TEX_ADDRESS_CLAMP;
+
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return SVGA3D_TEX_ADDRESS_BORDER;
+
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return SVGA3D_TEX_ADDRESS_MIRROR;
+
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return SVGA3D_TEX_ADDRESS_MIRRORONCE;
+
+ default:
+ assert(0);
+ return SVGA3D_TEX_ADDRESS_WRAP;
+ }
+}
+
+static INLINE unsigned translate_img_filter( unsigned filter )
+{
+ switch (filter) {
+ case PIPE_TEX_FILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST;
+ case PIPE_TEX_FILTER_LINEAR: return SVGA3D_TEX_FILTER_LINEAR;
+ default:
+ assert(0);
+ return SVGA3D_TEX_FILTER_NEAREST;
+ }
+}
+
+static INLINE unsigned translate_mip_filter( unsigned filter )
+{
+ switch (filter) {
+ case PIPE_TEX_MIPFILTER_NONE: return SVGA3D_TEX_FILTER_NONE;
+ case PIPE_TEX_MIPFILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST;
+ case PIPE_TEX_MIPFILTER_LINEAR: return SVGA3D_TEX_FILTER_LINEAR;
+ default:
+ assert(0);
+ return SVGA3D_TEX_FILTER_NONE;
+ }
+}
+
+static void *
+svga_create_sampler_state(struct pipe_context *pipe,
+ const struct pipe_sampler_state *sampler)
+{
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_sampler_state *cso = CALLOC_STRUCT( svga_sampler_state );
+ union util_color uc;
+
+ cso->mipfilter = translate_mip_filter(sampler->min_mip_filter);
+ cso->magfilter = translate_img_filter( sampler->mag_img_filter );
+ cso->minfilter = translate_img_filter( sampler->min_img_filter );
+ cso->aniso_level = MAX2( (unsigned) sampler->max_anisotropy, 1 );
+ if(cso->aniso_level != 1)
+ cso->magfilter = cso->minfilter = SVGA3D_TEX_FILTER_ANISOTROPIC;
+ cso->lod_bias = sampler->lod_bias;
+ cso->addressu = translate_wrap_mode(sampler->wrap_s);
+ cso->addressv = translate_wrap_mode(sampler->wrap_t);
+ cso->addressw = translate_wrap_mode(sampler->wrap_r);
+ cso->normalized_coords = sampler->normalized_coords;
+ cso->compare_mode = sampler->compare_mode;
+ cso->compare_func = sampler->compare_func;
+
+ {
+ ubyte r = float_to_ubyte(sampler->border_color[0]);
+ ubyte g = float_to_ubyte(sampler->border_color[1]);
+ ubyte b = float_to_ubyte(sampler->border_color[2]);
+ ubyte a = float_to_ubyte(sampler->border_color[3]);
+
+ util_pack_color_ub( r, g, b, a,
+ PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
+ cso->bordercolor = uc.ui;
+ }
+
+ /* No SVGA3D support for:
+ * - min/max LOD clamping
+ */
+ cso->min_lod = 0;
+ cso->view_min_lod = MAX2(sampler->min_lod, 0);
+ cso->view_max_lod = MAX2(sampler->max_lod, 0);
+
+ /* Use min_mipmap */
+ if (svga->debug.use_min_mipmap) {
+ if (cso->view_min_lod == cso->view_max_lod) {
+ cso->min_lod = cso->view_min_lod;
+ cso->view_min_lod = 0;
+ cso->view_max_lod = 1000; /* Just a high number */
+ cso->mipfilter = SVGA3D_TEX_FILTER_NONE;
+ }
+ }
+
+ SVGA_DBG(DEBUG_VIEWS, "min %u, view(min %u, max %u) lod, mipfilter %s\n",
+ cso->min_lod, cso->view_min_lod, cso->view_max_lod,
+ cso->mipfilter == SVGA3D_TEX_FILTER_NONE ? "SVGA3D_TEX_FILTER_NONE" : "SOMETHING");
+
+ return cso;
+}
+
+static void svga_bind_sampler_states(struct pipe_context *pipe,
+ unsigned num, void **sampler)
+{
+ struct svga_context *svga = svga_context(pipe);
+ unsigned i;
+
+ assert(num <= PIPE_MAX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num == svga->curr.num_samplers &&
+ !memcmp(svga->curr.sampler, sampler, num * sizeof(void *))) {
+ debug_printf("sampler noop\n");
+ return;
+ }
+
+ for (i = 0; i < num; i++)
+ svga->curr.sampler[i] = sampler[i];
+
+ for (i = num; i < svga->curr.num_samplers; i++)
+ svga->curr.sampler[i] = NULL;
+
+ svga->curr.num_samplers = num;
+ svga->dirty |= SVGA_NEW_SAMPLER;
+}
+
+static void svga_delete_sampler_state(struct pipe_context *pipe,
+ void *sampler)
+{
+ FREE(sampler);
+}
+
+
+static void svga_set_sampler_textures(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_texture **texture)
+{
+ struct svga_context *svga = svga_context(pipe);
+ unsigned flag_1d = 0;
+ unsigned flag_srgb = 0;
+ uint i;
+
+ assert(num <= PIPE_MAX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num == svga->curr.num_textures &&
+ !memcmp(svga->curr.texture, texture, num * sizeof(struct pipe_texture *))) {
+ if (0) debug_printf("texture noop\n");
+ return;
+ }
+
+ for (i = 0; i < num; i++) {
+ pipe_texture_reference(&svga->curr.texture[i],
+ texture[i]);
+
+ if (!texture[i])
+ continue;
+
+ if (texture[i]->format == PIPE_FORMAT_A8R8G8B8_SRGB)
+ flag_srgb |= 1 << i;
+
+ if (texture[i]->target == PIPE_TEXTURE_1D)
+ flag_1d |= 1 << i;
+ }
+
+ for (i = num; i < svga->curr.num_textures; i++)
+ pipe_texture_reference(&svga->curr.texture[i],
+ NULL);
+
+ svga->curr.num_textures = num;
+ svga->dirty |= SVGA_NEW_TEXTURE_BINDING;
+
+ if (flag_srgb != svga->curr.tex_flags.flag_srgb ||
+ flag_1d != svga->curr.tex_flags.flag_1d)
+ {
+ svga->dirty |= SVGA_NEW_TEXTURE_FLAGS;
+ svga->curr.tex_flags.flag_1d = flag_1d;
+ svga->curr.tex_flags.flag_srgb = flag_srgb;
+ }
+}
+
+
+
+void svga_init_sampler_functions( struct svga_context *svga )
+{
+ svga->pipe.create_sampler_state = svga_create_sampler_state;
+ svga->pipe.bind_fragment_sampler_states = svga_bind_sampler_states;
+ svga->pipe.delete_sampler_state = svga_delete_sampler_state;
+ svga->pipe.set_fragment_sampler_textures = svga_set_sampler_textures;
+}
+
+
+
diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c
new file mode 100644
index 00000000000..42f290d162a
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_vertex.c
@@ -0,0 +1,102 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "svga_screen.h"
+#include "svga_screen_buffer.h"
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_winsys.h"
+
+#include "svga_hw_reg.h"
+
+
+static void svga_set_vertex_buffers(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_buffer *buffers)
+{
+ struct svga_context *svga = svga_context(pipe);
+ unsigned i;
+ boolean any_user_buffer = FALSE;
+
+ /* Check for no change */
+ if (count == svga->curr.num_vertex_buffers &&
+ memcmp(svga->curr.vb, buffers, count * sizeof buffers[0]) == 0)
+ return;
+
+ /* Adjust refcounts */
+ for (i = 0; i < count; i++) {
+ pipe_buffer_reference(&svga->curr.vb[i].buffer, buffers[i].buffer);
+ if (svga_buffer(buffers[i].buffer)->user)
+ any_user_buffer = TRUE;
+ }
+
+ for ( ; i < svga->curr.num_vertex_buffers; i++)
+ pipe_buffer_reference(&svga->curr.vb[i].buffer, NULL);
+
+ /* Copy remaining data */
+ memcpy(svga->curr.vb, buffers, count * sizeof buffers[0]);
+ svga->curr.num_vertex_buffers = count;
+ svga->curr.any_user_vertex_buffers = any_user_buffer;
+
+ svga->dirty |= SVGA_NEW_VBUFFER;
+}
+
+static void svga_set_vertex_elements(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_element *elements)
+{
+ struct svga_context *svga = svga_context(pipe);
+ unsigned i;
+
+ for (i = 0; i < count; i++)
+ svga->curr.ve[i] = elements[i];
+
+ svga->curr.num_vertex_elements = count;
+ svga->dirty |= SVGA_NEW_VELEMENT;
+}
+
+
+void svga_cleanup_vertex_state( struct svga_context *svga )
+{
+ unsigned i;
+
+ for (i = 0 ; i < svga->curr.num_vertex_buffers; i++)
+ pipe_buffer_reference(&svga->curr.vb[i].buffer, NULL);
+}
+
+
+void svga_init_vertex_functions( struct svga_context *svga )
+{
+ svga->pipe.set_vertex_buffers = svga_set_vertex_buffers;
+ svga->pipe.set_vertex_elements = svga_set_vertex_elements;
+}
+
+
diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c
new file mode 100644
index 00000000000..7e6ab576add
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_vs.c
@@ -0,0 +1,199 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "draw/draw_context.h"
+#include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_text.h"
+
+#include "svga_screen.h"
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_tgsi.h"
+#include "svga_hw_reg.h"
+#include "svga_cmd.h"
+#include "svga_debug.h"
+
+
+static const struct tgsi_token *substitute_vs(
+ unsigned shader_id,
+ const struct tgsi_token *old_tokens )
+{
+#if 0
+ if (shader_id == 12) {
+ static struct tgsi_token tokens[300];
+
+ const char *text =
+ "VERT\n"
+ "DCL IN[0]\n"
+ "DCL IN[1]\n"
+ "DCL IN[2]\n"
+ "DCL OUT[0], POSITION\n"
+ "DCL TEMP[0..4]\n"
+ "IMM FLT32 { 1.0000, 1.0000, 1.0000, 1.0000 }\n"
+ "IMM FLT32 { 0.45, 1.0000, 1.0000, 1.0000 }\n"
+ "IMM FLT32 { 1.297863, 0.039245, 0.035993, 0.035976}\n"
+ "IMM FLT32 { -0.019398, 1.696131, -0.202151, -0.202050 }\n"
+ "IMM FLT32 { 0.051711, -0.348713, -0.979204, -0.978714 }\n"
+ "IMM FLT32 { 0.000000, 0.000003, 139.491577, 141.421356 }\n"
+ "DCL CONST[0..7]\n"
+ "DCL CONST[9..16]\n"
+ " MOV TEMP[2], IMM[0]\n"
+
+ " MOV TEMP[2].xyz, IN[2]\n"
+ " MOV TEMP[2].xyz, IN[0]\n"
+ " MOV TEMP[2].xyz, IN[1]\n"
+
+ " MUL TEMP[1], IMM[3], TEMP[2].yyyy\n"
+ " MAD TEMP[3], IMM[2], TEMP[2].xxxx, TEMP[1]\n"
+ " MAD TEMP[1], IMM[4], TEMP[2].zzzz, TEMP[3]\n"
+ " MAD TEMP[4], IMM[5], TEMP[2].wwww, TEMP[1]\n"
+
+ " MOV OUT[0], TEMP[4]\n"
+ " END\n";
+
+ if (!tgsi_text_translate( text,
+ tokens,
+ Elements(tokens) ))
+ {
+ assert(0);
+ return NULL;
+ }
+
+ return tokens;
+ }
+#endif
+
+ return old_tokens;
+}
+
+
+/***********************************************************************
+ * Vertex shaders
+ */
+
+static void *
+svga_create_vs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_screen *svgascreen = svga_screen(pipe->screen);
+ struct svga_vertex_shader *vs = CALLOC_STRUCT(svga_vertex_shader);
+ if (!vs)
+ return NULL;
+
+ /* substitute a debug shader?
+ */
+ vs->base.tokens = tgsi_dup_tokens(substitute_vs(svga->debug.shader_id,
+ templ->tokens));
+
+
+ /* Collect basic info that we'll need later:
+ */
+ tgsi_scan_shader(vs->base.tokens, &vs->base.info);
+
+ {
+ /* Need to do construct a new template in case we substitued a
+ * debug shader.
+ */
+ struct pipe_shader_state tmp2 = *templ;
+ tmp2.tokens = vs->base.tokens;
+ vs->draw_shader = draw_create_vertex_shader(svga->swtnl.draw, &tmp2);
+ }
+
+ vs->base.id = svga->debug.shader_id++;
+ vs->base.use_sm30 = svgascreen->use_vs30;
+
+ if (SVGA_DEBUG & DEBUG_TGSI || 0) {
+ debug_printf("%s id: %u, inputs: %u, outputs: %u\n",
+ __FUNCTION__, vs->base.id,
+ vs->base.info.num_inputs, vs->base.info.num_outputs);
+ }
+
+ return vs;
+}
+
+static void svga_bind_vs_state(struct pipe_context *pipe, void *shader)
+{
+ struct svga_vertex_shader *vs = (struct svga_vertex_shader *)shader;
+ struct svga_context *svga = svga_context(pipe);
+
+ svga->curr.vs = vs;
+ svga->dirty |= SVGA_NEW_VS;
+}
+
+
+static void svga_delete_vs_state(struct pipe_context *pipe, void *shader)
+{
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_vertex_shader *vs = (struct svga_vertex_shader *)shader;
+ struct svga_shader_result *result, *tmp;
+ enum pipe_error ret;
+
+ svga_hwtnl_flush_retry( svga );
+
+ draw_delete_vertex_shader(svga->swtnl.draw, vs->draw_shader);
+
+ for (result = vs->base.results; result; result = tmp ) {
+ tmp = result->next;
+
+ ret = SVGA3D_DestroyShader(svga->swc,
+ result->id,
+ SVGA3D_SHADERTYPE_VS );
+ if(ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_DestroyShader(svga->swc,
+ result->id,
+ SVGA3D_SHADERTYPE_VS );
+ assert(ret == PIPE_OK);
+ }
+
+ util_bitmask_clear( svga->vs_bm, result->id );
+
+ svga_destroy_shader_result( result );
+
+ /*
+ * Remove stale references to this result to ensure a new result on the
+ * same address will be detected as a change.
+ */
+ if(result == svga->state.hw_draw.vs)
+ svga->state.hw_draw.vs = NULL;
+ }
+
+ FREE((void *)vs->base.tokens);
+ FREE(vs);
+}
+
+
+void svga_init_vs_functions( struct svga_context *svga )
+{
+ svga->pipe.create_vs_state = svga_create_vs_state;
+ svga->pipe.bind_vs_state = svga_bind_vs_state;
+ svga->pipe.delete_vs_state = svga_delete_vs_state;
+}
+
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
new file mode 100644
index 00000000000..fc1b3c980ef
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -0,0 +1,440 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_memory.h"
+#include "pipe/p_inlines.h"
+#include "util/u_string.h"
+#include "util/u_math.h"
+
+#include "svga_winsys.h"
+#include "svga_context.h"
+#include "svga_screen.h"
+#include "svga_screen_texture.h"
+#include "svga_screen_buffer.h"
+#include "svga_cmd.h"
+#include "svga_debug.h"
+
+#include "svga_hw_reg.h"
+#include "svga3d_shaderdefs.h"
+
+
+#ifdef DEBUG
+int SVGA_DEBUG = 0;
+
+static const struct debug_named_value svga_debug_flags[] = {
+ { "dma", DEBUG_DMA },
+ { "tgsi", DEBUG_TGSI },
+ { "pipe", DEBUG_PIPE },
+ { "state", DEBUG_STATE },
+ { "screen", DEBUG_SCREEN },
+ { "tex", DEBUG_TEX },
+ { "swtnl", DEBUG_SWTNL },
+ { "const", DEBUG_CONSTS },
+ { "viewport", DEBUG_VIEWPORT },
+ { "views", DEBUG_VIEWS },
+ { "perf", DEBUG_PERF },
+ { "flush", DEBUG_FLUSH },
+ { "sync", DEBUG_SYNC },
+ { "cache", DEBUG_CACHE },
+ {NULL, 0}
+};
+#endif
+
+static const char *
+svga_get_vendor( struct pipe_screen *pscreen )
+{
+ return "VMware, Inc.";
+}
+
+
+static const char *
+svga_get_name( struct pipe_screen *pscreen )
+{
+#ifdef DEBUG
+ /* Only return internal details in the DEBUG version:
+ */
+ return "SVGA3D; build: DEBUG; mutex: " PIPE_ATOMIC;
+#else
+ return "SVGA3D; build: RELEASE; ";
+#endif
+}
+
+
+
+
+static float
+svga_get_paramf(struct pipe_screen *screen, int param)
+{
+ struct svga_screen *svgascreen = svga_screen(screen);
+ struct svga_winsys_screen *sws = svgascreen->sws;
+ SVGA3dDevCapResult result;
+
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 7.0;
+
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ /* Keep this to a reasonable size to avoid failures in
+ * conform/pntaa.c:
+ */
+ return 80.0;
+
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 4.0;
+
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 16.0;
+
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 16;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return svgascreen->use_ps30 && svgascreen->use_vs30;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 1;
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ if(!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_RENDER_TARGETS, &result))
+ return 1;
+ if(!result.u)
+ return 1;
+ return MIN2(result.u, PIPE_MAX_COLOR_BUFS);
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return SVGA_MAX_TEXTURE_LEVELS;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 8; /* max 128x128x128 */
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return SVGA_MAX_TEXTURE_LEVELS;
+
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT: /* req. for GL 1.4 */
+ return 1;
+
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE: /* req. for GL 1.5 */
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+
+/* This is a fairly pointless interface
+ */
+static int
+svga_get_param(struct pipe_screen *screen, int param)
+{
+ return (int) svga_get_paramf( screen, param );
+}
+
+
+static INLINE SVGA3dDevCapIndex
+svga_translate_format_cap(enum pipe_format format)
+{
+ switch(format) {
+
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return SVGA3D_DEVCAP_SURFACEFMT_A8R8G8B8;
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ return SVGA3D_DEVCAP_SURFACEFMT_X8R8G8B8;
+
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ return SVGA3D_DEVCAP_SURFACEFMT_R5G6B5;
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ return SVGA3D_DEVCAP_SURFACEFMT_A1R5G5B5;
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ return SVGA3D_DEVCAP_SURFACEFMT_A4R4G4B4;
+
+ case PIPE_FORMAT_Z16_UNORM:
+ return SVGA3D_DEVCAP_SURFACEFMT_Z_D16;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ return SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return SVGA3D_DEVCAP_SURFACEFMT_Z_D24X8;
+
+ case PIPE_FORMAT_A8_UNORM:
+ return SVGA3D_DEVCAP_SURFACEFMT_ALPHA8;
+ case PIPE_FORMAT_L8_UNORM:
+ return SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8;
+
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ return SVGA3D_DEVCAP_SURFACEFMT_DXT1;
+ case PIPE_FORMAT_DXT3_RGBA:
+ return SVGA3D_DEVCAP_SURFACEFMT_DXT3;
+ case PIPE_FORMAT_DXT5_RGBA:
+ return SVGA3D_DEVCAP_SURFACEFMT_DXT5;
+
+ default:
+ return SVGA3D_DEVCAP_MAX;
+ }
+}
+
+
+static boolean
+svga_is_format_supported( struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned tex_usage,
+ unsigned geom_flags )
+{
+ struct svga_winsys_screen *sws = svga_screen(screen)->sws;
+ SVGA3dDevCapIndex index;
+ SVGA3dDevCapResult result;
+
+ assert(tex_usage);
+
+ /* Override host capabilities */
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ switch(format) {
+
+ /* Often unsupported/problematic. This means we end up with the same
+ * visuals for all virtual hardware implementations.
+ */
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ return FALSE;
+
+ /* Simulate ability to render into compressed textures */
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT5_RGBA:
+ return TRUE;
+
+ default:
+ break;
+ }
+ }
+
+ /* Try to query the host */
+ index = svga_translate_format_cap(format);
+ if( index < SVGA3D_DEVCAP_MAX &&
+ sws->get_cap(sws, index, &result) )
+ {
+ SVGA3dSurfaceFormatCaps mask;
+
+ mask.value = 0;
+ if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET)
+ mask.offscreenRenderTarget = 1;
+ if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL)
+ mask.zStencil = 1;
+ if (tex_usage & PIPE_TEXTURE_USAGE_SAMPLER)
+ mask.texture = 1;
+
+ if ((result.u & mask.value) == mask.value)
+ return TRUE;
+ else
+ return FALSE;
+ }
+
+ /* Use our translate functions directly rather than relying on a
+ * duplicated list of supported formats which is prone to getting
+ * out of sync:
+ */
+ if(tex_usage & (PIPE_TEXTURE_USAGE_RENDER_TARGET | PIPE_TEXTURE_USAGE_DEPTH_STENCIL))
+ return svga_translate_format_render(format) != SVGA3D_FORMAT_INVALID;
+ else
+ return svga_translate_format(format) != SVGA3D_FORMAT_INVALID;
+}
+
+
+static void
+svga_fence_reference(struct pipe_screen *screen,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence)
+{
+ struct svga_winsys_screen *sws = svga_screen(screen)->sws;
+ sws->fence_reference(sws, ptr, fence);
+}
+
+
+static int
+svga_fence_signalled(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ unsigned flag)
+{
+ struct svga_winsys_screen *sws = svga_screen(screen)->sws;
+ return sws->fence_signalled(sws, fence, flag);
+}
+
+
+static int
+svga_fence_finish(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ unsigned flag)
+{
+ struct svga_winsys_screen *sws = svga_screen(screen)->sws;
+
+ SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s fence_ptr %p\n",
+ __FUNCTION__, fence);
+
+ return sws->fence_finish(sws, fence, flag);
+}
+
+
+static void
+svga_destroy_screen( struct pipe_screen *screen )
+{
+ struct svga_screen *svgascreen = svga_screen(screen);
+
+ svga_screen_cache_cleanup(svgascreen);
+
+ pipe_mutex_destroy(svgascreen->swc_mutex);
+ pipe_mutex_destroy(svgascreen->tex_mutex);
+
+ svgascreen->swc->destroy(svgascreen->swc);
+
+ svgascreen->sws->destroy(svgascreen->sws);
+
+ FREE(svgascreen);
+}
+
+
+/**
+ * Create a new svga_screen object
+ */
+struct pipe_screen *
+svga_screen_create(struct svga_winsys_screen *sws)
+{
+ struct svga_screen *svgascreen;
+ struct pipe_screen *screen;
+ SVGA3dDevCapResult result;
+
+#ifdef DEBUG
+ SVGA_DEBUG = debug_get_flags_option("SVGA_DEBUG", svga_debug_flags, 0 );
+#endif
+
+ svgascreen = CALLOC_STRUCT(svga_screen);
+ if (!svgascreen)
+ goto error1;
+
+ svgascreen->debug.force_level_surface_view =
+ debug_get_bool_option("SVGA_FORCE_LEVEL_SURFACE_VIEW", FALSE);
+ svgascreen->debug.force_surface_view =
+ debug_get_bool_option("SVGA_FORCE_SURFACE_VIEW", FALSE);
+ svgascreen->debug.force_sampler_view =
+ debug_get_bool_option("SVGA_FORCE_SAMPLER_VIEW", FALSE);
+ svgascreen->debug.no_surface_view =
+ debug_get_bool_option("SVGA_NO_SURFACE_VIEW", FALSE);
+ svgascreen->debug.no_sampler_view =
+ debug_get_bool_option("SVGA_NO_SAMPLER_VIEW", FALSE);
+
+ screen = &svgascreen->screen;
+
+ screen->destroy = svga_destroy_screen;
+ screen->get_name = svga_get_name;
+ screen->get_vendor = svga_get_vendor;
+ screen->get_param = svga_get_param;
+ screen->get_paramf = svga_get_paramf;
+ screen->is_format_supported = svga_is_format_supported;
+ screen->fence_reference = svga_fence_reference;
+ screen->fence_signalled = svga_fence_signalled;
+ screen->fence_finish = svga_fence_finish;
+ svgascreen->sws = sws;
+
+ svga_screen_init_texture_functions(screen);
+ svga_screen_init_buffer_functions(screen);
+
+ svgascreen->use_ps30 =
+ sws->get_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION, &result) &&
+ result.u >= SVGA3DPSVERSION_30 ? TRUE : FALSE;
+
+ svgascreen->use_vs30 =
+ sws->get_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION, &result) &&
+ result.u >= SVGA3DVSVERSION_30 ? TRUE : FALSE;
+
+#if 1
+ /* Shader model 2.0 is unsupported at the moment. */
+ if(!svgascreen->use_ps30 || !svgascreen->use_vs30)
+ goto error2;
+#else
+ if(debug_get_bool_option("SVGA_NO_SM30", FALSE))
+ svgascreen->use_vs30 = svgascreen->use_ps30 = FALSE;
+#endif
+
+ svgascreen->swc = sws->context_create(sws);
+ if(!svgascreen->swc)
+ goto error2;
+
+ pipe_mutex_init(svgascreen->tex_mutex);
+ pipe_mutex_init(svgascreen->swc_mutex);
+
+ LIST_INITHEAD(&svgascreen->cached_buffers);
+
+ svga_screen_cache_init(svgascreen);
+
+ return screen;
+error2:
+ FREE(svgascreen);
+error1:
+ return NULL;
+}
+
+void svga_screen_flush( struct svga_screen *svgascreen,
+ struct pipe_fence_handle **pfence )
+{
+ struct pipe_fence_handle *fence = NULL;
+
+ SVGA_DBG(DEBUG_PERF, "%s\n", __FUNCTION__);
+
+ pipe_mutex_lock(svgascreen->swc_mutex);
+ svgascreen->swc->flush(svgascreen->swc, &fence);
+ pipe_mutex_unlock(svgascreen->swc_mutex);
+
+ svga_screen_cache_flush(svgascreen, fence);
+
+ if(pfence)
+ *pfence = fence;
+ else
+ svgascreen->sws->fence_reference(svgascreen->sws, &fence, NULL);
+}
+
+struct svga_winsys_screen *
+svga_winsys_screen(struct pipe_screen *screen)
+{
+ return svga_screen(screen)->sws;
+}
+
+#ifdef DEBUG
+struct svga_screen *
+svga_screen(struct pipe_screen *screen)
+{
+ assert(screen);
+ assert(screen->destroy == svga_destroy_screen);
+ return (struct svga_screen *)screen;
+}
+#endif
diff --git a/src/gallium/drivers/svga/svga_screen.h b/src/gallium/drivers/svga/svga_screen.h
new file mode 100644
index 00000000000..b94ca7fc1ca
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen.h
@@ -0,0 +1,95 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_SCREEN_H
+#define SVGA_SCREEN_H
+
+
+#include "pipe/p_screen.h"
+#include "pipe/p_thread.h"
+
+#include "util/u_double_list.h"
+
+#include "svga_screen_cache.h"
+
+
+struct svga_winsys_screen;
+struct svga_winsys_context;
+struct SVGACmdMemory;
+
+#define SVGA_COMBINE_USERBUFFERS 1
+
+/**
+ * Subclass of pipe_screen
+ */
+struct svga_screen
+{
+ struct pipe_screen screen;
+ struct svga_winsys_screen *sws;
+
+ unsigned use_ps30;
+ unsigned use_vs30;
+
+ struct {
+ boolean force_level_surface_view;
+ boolean force_surface_view;
+ boolean no_surface_view;
+ boolean force_sampler_view;
+ boolean no_sampler_view;
+ } debug;
+
+ /* The screen needs its own context */
+ struct svga_winsys_context *swc;
+ struct SVGACmdMemory *fifo;
+
+ unsigned texture_timestamp;
+ pipe_mutex tex_mutex;
+ pipe_mutex swc_mutex; /* Protects the use of swc and dirty_buffers */
+
+ /**
+ * List of buffers with cached GMR. Ordered from the most recently used to
+ * the least recently used
+ */
+ struct list_head cached_buffers;
+
+ struct svga_host_surface_cache cache;
+};
+
+#ifndef DEBUG
+/** cast wrapper */
+static INLINE struct svga_screen *
+svga_screen(struct pipe_screen *pscreen)
+{
+ return (struct svga_screen *) pscreen;
+}
+#else
+struct svga_screen *
+svga_screen(struct pipe_screen *screen);
+#endif
+
+void svga_screen_flush( struct svga_screen *svga_screen,
+ struct pipe_fence_handle **pfence );
+
+#endif /* SVGA_SCREEN_H */
diff --git a/src/gallium/drivers/svga/svga_screen_buffer.c b/src/gallium/drivers/svga/svga_screen_buffer.c
new file mode 100644
index 00000000000..58a1aba464b
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen_buffer.c
@@ -0,0 +1,825 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_thread.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "svga_context.h"
+#include "svga_screen.h"
+#include "svga_screen_buffer.h"
+#include "svga_winsys.h"
+#include "svga_debug.h"
+
+
+/**
+ * Vertex and index buffers have to be treated slightly differently from
+ * regular guest memory regions because the SVGA device sees them as
+ * surfaces, and the state tracker can create/destroy without the pipe
+ * driver, therefore we must do the uploads from the vws.
+ */
+static INLINE boolean
+svga_buffer_needs_hw_storage(unsigned usage)
+{
+ return usage & (PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_INDEX);
+}
+
+
+static INLINE enum pipe_error
+svga_buffer_create_host_surface(struct svga_screen *ss,
+ struct svga_buffer *sbuf)
+{
+ if(!sbuf->handle) {
+ sbuf->key.flags = 0;
+
+ sbuf->key.format = SVGA3D_BUFFER;
+ if(sbuf->base.usage & PIPE_BUFFER_USAGE_VERTEX)
+ sbuf->key.flags |= SVGA3D_SURFACE_HINT_VERTEXBUFFER;
+ if(sbuf->base.usage & PIPE_BUFFER_USAGE_INDEX)
+ sbuf->key.flags |= SVGA3D_SURFACE_HINT_INDEXBUFFER;
+
+ sbuf->key.size.width = sbuf->base.size;
+ sbuf->key.size.height = 1;
+ sbuf->key.size.depth = 1;
+
+ sbuf->key.numFaces = 1;
+ sbuf->key.numMipLevels = 1;
+ sbuf->key.cachable = 1;
+
+ SVGA_DBG(DEBUG_DMA, "surface_create for buffer sz %d\n", sbuf->base.size);
+
+ sbuf->handle = svga_screen_surface_create(ss, &sbuf->key);
+ if(!sbuf->handle)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ /* Always set the discard flag on the first time the buffer is written
+ * as svga_screen_surface_create might have passed a recycled host
+ * buffer.
+ */
+ sbuf->hw.flags.discard = TRUE;
+
+ SVGA_DBG(DEBUG_DMA, " --> got sid %p sz %d (buffer)\n", sbuf->handle, sbuf->base.size);
+ }
+
+ return PIPE_OK;
+}
+
+
+static INLINE void
+svga_buffer_destroy_host_surface(struct svga_screen *ss,
+ struct svga_buffer *sbuf)
+{
+ if(sbuf->handle) {
+ SVGA_DBG(DEBUG_DMA, " ungrab sid %p sz %d\n", sbuf->handle, sbuf->base.size);
+ svga_screen_surface_destroy(ss, &sbuf->key, &sbuf->handle);
+ }
+}
+
+
+static INLINE void
+svga_buffer_destroy_hw_storage(struct svga_screen *ss, struct svga_buffer *sbuf)
+{
+ struct svga_winsys_screen *sws = ss->sws;
+
+ assert(!sbuf->map.count);
+ assert(sbuf->hw.buf);
+ if(sbuf->hw.buf) {
+ sws->buffer_destroy(sws, sbuf->hw.buf);
+ sbuf->hw.buf = NULL;
+ assert(sbuf->head.prev && sbuf->head.next);
+ LIST_DEL(&sbuf->head);
+#ifdef DEBUG
+ sbuf->head.next = sbuf->head.prev = NULL;
+#endif
+ }
+}
+
+static INLINE enum pipe_error
+svga_buffer_backup(struct svga_screen *ss, struct svga_buffer *sbuf)
+{
+ if (sbuf->hw.buf && sbuf->hw.num_ranges) {
+ void *src;
+
+ if (!sbuf->swbuf)
+ sbuf->swbuf = align_malloc(sbuf->base.size, sbuf->base.alignment);
+ if (!sbuf->swbuf)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ src = ss->sws->buffer_map(ss->sws, sbuf->hw.buf,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ if (!src)
+ return PIPE_ERROR;
+
+ memcpy(sbuf->swbuf, src, sbuf->base.size);
+ ss->sws->buffer_unmap(ss->sws, sbuf->hw.buf);
+ }
+
+ return PIPE_OK;
+}
+
+/**
+ * Try to make GMR space available by freeing the hardware storage of
+ * unmapped
+ */
+boolean
+svga_buffer_free_cached_hw_storage(struct svga_screen *ss)
+{
+ struct list_head *curr;
+ struct svga_buffer *sbuf;
+ enum pipe_error ret = PIPE_OK;
+
+ curr = ss->cached_buffers.prev;
+
+ /* free the least recently used buffer's hw storage which is not mapped */
+ do {
+ if(curr == &ss->cached_buffers)
+ return FALSE;
+
+ sbuf = LIST_ENTRY(struct svga_buffer, curr, head);
+
+ curr = curr->prev;
+ if (sbuf->map.count == 0)
+ ret = svga_buffer_backup(ss, sbuf);
+
+ } while(sbuf->map.count != 0 || ret != PIPE_OK);
+
+ svga_buffer_destroy_hw_storage(ss, sbuf);
+
+ return TRUE;
+}
+
+struct svga_winsys_buffer *
+svga_winsys_buffer_create( struct svga_screen *ss,
+ unsigned alignment,
+ unsigned usage,
+ unsigned size )
+{
+ struct svga_winsys_screen *sws = ss->sws;
+ struct svga_winsys_buffer *buf;
+
+ /* Just try */
+ buf = sws->buffer_create(sws, alignment, usage, size);
+ if(!buf) {
+
+ SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "flushing screen to find %d bytes GMR\n",
+ size);
+
+ /* Try flushing all pending DMAs */
+ svga_screen_flush(ss, NULL);
+ buf = sws->buffer_create(sws, alignment, usage, size);
+
+ SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "evicting buffers to find %d bytes GMR\n",
+ size);
+
+ /* Try evicing all buffer storage */
+ while(!buf && svga_buffer_free_cached_hw_storage(ss))
+ buf = sws->buffer_create(sws, alignment, usage, size);
+ }
+
+ return buf;
+}
+
+
+/**
+ * Allocate DMA'ble storage for the buffer.
+ *
+ * Called before mapping a buffer.
+ */
+static INLINE enum pipe_error
+svga_buffer_create_hw_storage(struct svga_screen *ss,
+ struct svga_buffer *sbuf)
+{
+ if(!sbuf->hw.buf) {
+ unsigned alignment = sbuf->base.alignment;
+ unsigned usage = 0;
+ unsigned size = sbuf->base.size;
+
+ sbuf->hw.buf = svga_winsys_buffer_create(ss, alignment, usage, size);
+ if(!sbuf->hw.buf)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ assert(!sbuf->needs_flush);
+ assert(!sbuf->head.prev && !sbuf->head.next);
+ LIST_ADD(&sbuf->head, &ss->cached_buffers);
+ }
+
+ return PIPE_OK;
+}
+
+
+/**
+ * Variant of SVGA3D_BufferDMA which leaves the copy box temporarily in blank.
+ */
+static enum pipe_error
+svga_buffer_upload_command(struct svga_context *svga,
+ struct svga_buffer *sbuf)
+{
+ struct svga_winsys_context *swc = svga->swc;
+ struct svga_winsys_buffer *guest = sbuf->hw.buf;
+ struct svga_winsys_surface *host = sbuf->handle;
+ SVGA3dTransferType transfer = SVGA3D_WRITE_HOST_VRAM;
+ SVGA3dSurfaceDMAFlags flags = sbuf->hw.flags;
+ SVGA3dCmdSurfaceDMA *cmd;
+ uint32 numBoxes = sbuf->hw.num_ranges;
+ SVGA3dCopyBox *boxes;
+ SVGA3dCmdSurfaceDMASuffix *pSuffix;
+ unsigned region_flags;
+ unsigned surface_flags;
+ struct pipe_buffer *dummy;
+
+ if(transfer == SVGA3D_WRITE_HOST_VRAM) {
+ region_flags = PIPE_BUFFER_USAGE_GPU_READ;
+ surface_flags = PIPE_BUFFER_USAGE_GPU_WRITE;
+ }
+ else if(transfer == SVGA3D_READ_HOST_VRAM) {
+ region_flags = PIPE_BUFFER_USAGE_GPU_WRITE;
+ surface_flags = PIPE_BUFFER_USAGE_GPU_READ;
+ }
+ else {
+ assert(0);
+ return PIPE_ERROR_BAD_INPUT;
+ }
+
+ assert(numBoxes);
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_SURFACE_DMA,
+ sizeof *cmd + numBoxes * sizeof *boxes + sizeof *pSuffix,
+ 2);
+ if(!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ swc->region_relocation(swc, &cmd->guest.ptr, guest, 0, region_flags);
+ cmd->guest.pitch = 0;
+
+ swc->surface_relocation(swc, &cmd->host.sid, host, surface_flags);
+ cmd->host.face = 0;
+ cmd->host.mipmap = 0;
+
+ cmd->transfer = transfer;
+
+ sbuf->hw.boxes = (SVGA3dCopyBox *)&cmd[1];
+ sbuf->hw.svga = svga;
+
+ /* Increment reference count */
+ dummy = NULL;
+ pipe_buffer_reference(&dummy, &sbuf->base);
+
+ pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + numBoxes * sizeof *boxes);
+ pSuffix->suffixSize = sizeof *pSuffix;
+ pSuffix->maximumOffset = sbuf->base.size;
+ pSuffix->flags = flags;
+
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+/**
+ * Patch up the upload DMA command reserved by svga_buffer_upload_command
+ * with the final ranges.
+ */
+static void
+svga_buffer_upload_flush(struct svga_context *svga,
+ struct svga_buffer *sbuf)
+{
+ struct svga_screen *ss = svga_screen(svga->pipe.screen);
+ SVGA3dCopyBox *boxes;
+ unsigned i;
+
+ assert(sbuf->handle);
+ assert(sbuf->hw.buf);
+ assert(sbuf->hw.num_ranges);
+ assert(sbuf->hw.svga == svga);
+ assert(sbuf->hw.boxes);
+
+ /*
+ * Patch the DMA command with the final copy box.
+ */
+
+ SVGA_DBG(DEBUG_DMA, "dma to sid %p\n", sbuf->handle);
+
+ boxes = sbuf->hw.boxes;
+ for(i = 0; i < sbuf->hw.num_ranges; ++i) {
+ SVGA_DBG(DEBUG_DMA, " bytes %u - %u\n",
+ sbuf->hw.ranges[i].start, sbuf->hw.ranges[i].end);
+
+ boxes[i].x = sbuf->hw.ranges[i].start;
+ boxes[i].y = 0;
+ boxes[i].z = 0;
+ boxes[i].w = sbuf->hw.ranges[i].end - sbuf->hw.ranges[i].start;
+ boxes[i].h = 1;
+ boxes[i].d = 1;
+ boxes[i].srcx = sbuf->hw.ranges[i].start;
+ boxes[i].srcy = 0;
+ boxes[i].srcz = 0;
+ }
+
+ sbuf->hw.num_ranges = 0;
+ memset(&sbuf->hw.flags, 0, sizeof sbuf->hw.flags);
+
+ assert(sbuf->head.prev && sbuf->head.next);
+ LIST_DEL(&sbuf->head);
+ sbuf->needs_flush = FALSE;
+ /* XXX: do we care about cached_buffers any more ?*/
+ LIST_ADD(&sbuf->head, &ss->cached_buffers);
+
+ sbuf->hw.svga = NULL;
+ sbuf->hw.boxes = NULL;
+
+ /* Decrement reference count */
+ pipe_reference(&(sbuf->base.reference), NULL);
+ sbuf = NULL;
+}
+
+
+/**
+ * Queue a DMA upload of a range of this buffer to the host.
+ *
+ * This function only notes the range down. It doesn't actually emit a DMA
+ * upload command. That only happens when a context tries to refer to this
+ * buffer, and the DMA upload command is added to that context's command buffer.
+ *
+ * We try to lump as many contiguous DMA transfers together as possible.
+ */
+static void
+svga_buffer_upload_queue(struct svga_buffer *sbuf,
+ unsigned start,
+ unsigned end)
+{
+ unsigned i;
+
+ assert(sbuf->hw.buf);
+ assert(end > start);
+
+ /*
+ * Try to grow one of the ranges.
+ *
+ * Note that it is not this function task to care about overlapping ranges,
+ * as the GMR was already given so it is too late to do anything. Situations
+ * where overlapping ranges may pose a problem should be detected via
+ * pipe_context::is_buffer_referenced and the context that refers to the
+ * buffer should be flushed.
+ */
+
+ for(i = 0; i < sbuf->hw.num_ranges; ++i) {
+ if(start <= sbuf->hw.ranges[i].end && sbuf->hw.ranges[i].start <= end) {
+ sbuf->hw.ranges[i].start = MIN2(sbuf->hw.ranges[i].start, start);
+ sbuf->hw.ranges[i].end = MAX2(sbuf->hw.ranges[i].end, end);
+ return;
+ }
+ }
+
+ /*
+ * We cannot add a new range to an existing DMA command, so patch-up the
+ * pending DMA upload and start clean.
+ */
+
+ if(sbuf->needs_flush)
+ svga_buffer_upload_flush(sbuf->hw.svga, sbuf);
+
+ assert(!sbuf->needs_flush);
+ assert(!sbuf->hw.svga);
+ assert(!sbuf->hw.boxes);
+
+ /*
+ * Add a new range.
+ */
+
+ sbuf->hw.ranges[sbuf->hw.num_ranges].start = start;
+ sbuf->hw.ranges[sbuf->hw.num_ranges].end = end;
+ ++sbuf->hw.num_ranges;
+}
+
+
+static void *
+svga_buffer_map_range( struct pipe_screen *screen,
+ struct pipe_buffer *buf,
+ unsigned offset, unsigned length,
+ unsigned usage )
+{
+ struct svga_screen *ss = svga_screen(screen);
+ struct svga_winsys_screen *sws = ss->sws;
+ struct svga_buffer *sbuf = svga_buffer( buf );
+ void *map;
+
+ if(sbuf->swbuf) {
+ /* User/malloc buffer */
+ map = sbuf->swbuf;
+ }
+ else {
+ if(!sbuf->hw.buf) {
+ struct svga_winsys_surface *handle = sbuf->handle;
+
+ if(svga_buffer_create_hw_storage(ss, sbuf) != PIPE_OK)
+ return NULL;
+
+ /* Populate the hardware storage if the host surface pre-existed */
+ if((usage & PIPE_BUFFER_USAGE_CPU_READ) && handle) {
+ SVGA3dSurfaceDMAFlags flags;
+ enum pipe_error ret;
+ struct pipe_fence_handle *fence = NULL;
+
+ SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "dma from sid %p (buffer), bytes %u - %u\n",
+ sbuf->handle, 0, sbuf->base.size);
+
+ memset(&flags, 0, sizeof flags);
+
+ ret = SVGA3D_BufferDMA(ss->swc,
+ sbuf->hw.buf,
+ sbuf->handle,
+ SVGA3D_READ_HOST_VRAM,
+ sbuf->base.size,
+ 0,
+ flags);
+ if(ret != PIPE_OK) {
+ ss->swc->flush(ss->swc, NULL);
+
+ ret = SVGA3D_BufferDMA(ss->swc,
+ sbuf->hw.buf,
+ sbuf->handle,
+ SVGA3D_READ_HOST_VRAM,
+ sbuf->base.size,
+ 0,
+ flags);
+ assert(ret == PIPE_OK);
+ }
+
+ ss->swc->flush(ss->swc, &fence);
+ sws->fence_finish(sws, fence, 0);
+ sws->fence_reference(sws, &fence, NULL);
+ }
+ }
+ else {
+ if((usage & PIPE_BUFFER_USAGE_CPU_READ) && !sbuf->needs_flush) {
+ /* We already had the hardware storage but we would have to issue
+ * a download if we hadn't, so move the buffer to the begginning
+ * of the LRU list.
+ */
+ assert(sbuf->head.prev && sbuf->head.next);
+ LIST_DEL(&sbuf->head);
+ LIST_ADD(&sbuf->head, &ss->cached_buffers);
+ }
+ }
+
+ map = sws->buffer_map(sws, sbuf->hw.buf, usage);
+ }
+
+ if(map) {
+ pipe_mutex_lock(ss->swc_mutex);
+
+ ++sbuf->map.count;
+
+ if (usage & PIPE_BUFFER_USAGE_CPU_WRITE) {
+ assert(sbuf->map.count <= 1);
+ sbuf->map.writing = TRUE;
+ if (usage & PIPE_BUFFER_USAGE_FLUSH_EXPLICIT)
+ sbuf->map.flush_explicit = TRUE;
+ }
+
+ pipe_mutex_unlock(ss->swc_mutex);
+ }
+
+ return map;
+}
+
+static void
+svga_buffer_flush_mapped_range( struct pipe_screen *screen,
+ struct pipe_buffer *buf,
+ unsigned offset, unsigned length)
+{
+ struct svga_buffer *sbuf = svga_buffer( buf );
+ struct svga_screen *ss = svga_screen(screen);
+
+ pipe_mutex_lock(ss->swc_mutex);
+ assert(sbuf->map.writing);
+ if(sbuf->map.writing) {
+ assert(sbuf->map.flush_explicit);
+ if(sbuf->hw.buf)
+ svga_buffer_upload_queue(sbuf, offset, offset + length);
+ }
+ pipe_mutex_unlock(ss->swc_mutex);
+}
+
+static void
+svga_buffer_unmap( struct pipe_screen *screen,
+ struct pipe_buffer *buf)
+{
+ struct svga_screen *ss = svga_screen(screen);
+ struct svga_winsys_screen *sws = ss->sws;
+ struct svga_buffer *sbuf = svga_buffer( buf );
+
+ pipe_mutex_lock(ss->swc_mutex);
+
+ assert(sbuf->map.count);
+ if(sbuf->map.count)
+ --sbuf->map.count;
+
+ if(sbuf->hw.buf)
+ sws->buffer_unmap(sws, sbuf->hw.buf);
+
+ if(sbuf->map.writing) {
+ if(!sbuf->map.flush_explicit) {
+ /* No mapped range was flushed -- flush the whole buffer */
+ SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n");
+
+ if(sbuf->hw.buf)
+ svga_buffer_upload_queue(sbuf, 0, sbuf->base.size);
+ }
+
+ sbuf->map.writing = FALSE;
+ sbuf->map.flush_explicit = FALSE;
+ }
+
+ pipe_mutex_unlock(ss->swc_mutex);
+}
+
+static void
+svga_buffer_destroy( struct pipe_buffer *buf )
+{
+ struct svga_screen *ss = svga_screen(buf->screen);
+ struct svga_buffer *sbuf = svga_buffer( buf );
+
+ assert(!p_atomic_read(&buf->reference.count));
+
+ assert(!sbuf->needs_flush);
+
+ if(sbuf->handle) {
+ SVGA_DBG(DEBUG_DMA, "release sid %p sz %d\n", sbuf->handle, sbuf->base.size);
+ svga_screen_surface_destroy(ss, &sbuf->key, &sbuf->handle);
+ }
+
+ if(sbuf->hw.buf)
+ svga_buffer_destroy_hw_storage(ss, sbuf);
+
+ if(sbuf->swbuf && !sbuf->user)
+ align_free(sbuf->swbuf);
+
+ FREE(sbuf);
+}
+
+static struct pipe_buffer *
+svga_buffer_create(struct pipe_screen *screen,
+ unsigned alignment,
+ unsigned usage,
+ unsigned size)
+{
+ struct svga_screen *ss = svga_screen(screen);
+ struct svga_buffer *sbuf;
+
+ sbuf = CALLOC_STRUCT(svga_buffer);
+ if(!sbuf)
+ goto error1;
+
+ sbuf->magic = SVGA_BUFFER_MAGIC;
+
+ pipe_reference_init(&sbuf->base.reference, 1);
+ sbuf->base.screen = screen;
+ sbuf->base.alignment = alignment;
+ sbuf->base.usage = usage;
+ sbuf->base.size = size;
+
+ if(svga_buffer_needs_hw_storage(usage)) {
+ if(svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK)
+ goto error2;
+ }
+ else {
+ if(alignment < sizeof(void*))
+ alignment = sizeof(void*);
+
+ usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
+
+ sbuf->swbuf = align_malloc(size, alignment);
+ if(!sbuf->swbuf)
+ goto error2;
+ }
+
+ return &sbuf->base;
+
+error2:
+ FREE(sbuf);
+error1:
+ return NULL;
+}
+
+static struct pipe_buffer *
+svga_user_buffer_create(struct pipe_screen *screen,
+ void *ptr,
+ unsigned bytes)
+{
+ struct svga_buffer *sbuf;
+
+ sbuf = CALLOC_STRUCT(svga_buffer);
+ if(!sbuf)
+ goto no_sbuf;
+
+ sbuf->magic = SVGA_BUFFER_MAGIC;
+
+ sbuf->swbuf = ptr;
+ sbuf->user = TRUE;
+
+ pipe_reference_init(&sbuf->base.reference, 1);
+ sbuf->base.screen = screen;
+ sbuf->base.alignment = 1;
+ sbuf->base.usage = 0;
+ sbuf->base.size = bytes;
+
+ return &sbuf->base;
+
+no_sbuf:
+ return NULL;
+}
+
+
+void
+svga_screen_init_buffer_functions(struct pipe_screen *screen)
+{
+ screen->buffer_create = svga_buffer_create;
+ screen->user_buffer_create = svga_user_buffer_create;
+ screen->buffer_map_range = svga_buffer_map_range;
+ screen->buffer_flush_mapped_range = svga_buffer_flush_mapped_range;
+ screen->buffer_unmap = svga_buffer_unmap;
+ screen->buffer_destroy = svga_buffer_destroy;
+}
+
+
+/**
+ * Copy the contents of the user buffer / malloc buffer to a hardware buffer.
+ */
+static INLINE enum pipe_error
+svga_buffer_update_hw(struct svga_screen *ss, struct svga_buffer *sbuf)
+{
+ if(!sbuf->hw.buf) {
+ enum pipe_error ret;
+ void *map;
+
+ assert(sbuf->swbuf);
+ if(!sbuf->swbuf)
+ return PIPE_ERROR;
+
+ ret = svga_buffer_create_hw_storage(ss, sbuf);
+ assert(ret == PIPE_OK);
+ if(ret != PIPE_OK)
+ return ret;
+
+ pipe_mutex_lock(ss->swc_mutex);
+ map = ss->sws->buffer_map(ss->sws, sbuf->hw.buf, PIPE_BUFFER_USAGE_CPU_WRITE);
+ assert(map);
+ if(!map) {
+ pipe_mutex_unlock(ss->swc_mutex);
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+
+ memcpy(map, sbuf->swbuf, sbuf->base.size);
+ ss->sws->buffer_unmap(ss->sws, sbuf->hw.buf);
+
+ /* This user/malloc buffer is now indistinguishable from a gpu buffer */
+ assert(!sbuf->map.count);
+ if(!sbuf->map.count) {
+ if(sbuf->user)
+ sbuf->user = FALSE;
+ else
+ align_free(sbuf->swbuf);
+ sbuf->swbuf = NULL;
+ }
+
+ svga_buffer_upload_queue(sbuf, 0, sbuf->base.size);
+ }
+
+ pipe_mutex_unlock(ss->swc_mutex);
+ return PIPE_OK;
+}
+
+
+struct svga_winsys_surface *
+svga_buffer_handle(struct svga_context *svga,
+ struct pipe_buffer *buf)
+{
+ struct pipe_screen *screen = svga->pipe.screen;
+ struct svga_screen *ss = svga_screen(screen);
+ struct svga_buffer *sbuf;
+ enum pipe_error ret;
+
+ if(!buf)
+ return NULL;
+
+ sbuf = svga_buffer(buf);
+
+ assert(!sbuf->map.count);
+
+ if(!sbuf->handle) {
+ ret = svga_buffer_create_host_surface(ss, sbuf);
+ if(ret != PIPE_OK)
+ return NULL;
+
+ ret = svga_buffer_update_hw(ss, sbuf);
+ if(ret != PIPE_OK)
+ return NULL;
+ }
+
+ if(!sbuf->needs_flush && sbuf->hw.num_ranges) {
+ /* Queue the buffer for flushing */
+ ret = svga_buffer_upload_command(svga, sbuf);
+ if(ret != PIPE_OK)
+ /* XXX: Should probably have a richer return value */
+ return NULL;
+
+ assert(sbuf->hw.svga == svga);
+
+ sbuf->needs_flush = TRUE;
+ assert(sbuf->head.prev && sbuf->head.next);
+ LIST_DEL(&sbuf->head);
+ LIST_ADDTAIL(&sbuf->head, &svga->dirty_buffers);
+ }
+
+ return sbuf->handle;
+}
+
+struct pipe_buffer *
+svga_screen_buffer_wrap_surface(struct pipe_screen *screen,
+ enum SVGA3dSurfaceFormat format,
+ struct svga_winsys_surface *srf)
+{
+ struct pipe_buffer *buf;
+ struct svga_buffer *sbuf;
+ struct svga_winsys_screen *sws = svga_winsys_screen(screen);
+
+ buf = svga_buffer_create(screen, 0, SVGA_BUFFER_USAGE_WRAPPED, 0);
+ if (!buf)
+ return NULL;
+
+ sbuf = svga_buffer(buf);
+
+ /*
+ * We are not the creator of this surface and therefore we must not
+ * cache it for reuse. Set the cacheable flag to zero in the key to
+ * prevent this.
+ */
+ sbuf->key.format = format;
+ sbuf->key.cachable = 0;
+ sws->surface_reference(sws, &sbuf->handle, srf);
+
+ return buf;
+}
+
+
+struct svga_winsys_surface *
+svga_screen_buffer_get_winsys_surface(struct pipe_buffer *buffer)
+{
+ struct svga_winsys_screen *sws = svga_winsys_screen(buffer->screen);
+ struct svga_winsys_surface *vsurf = NULL;
+
+ assert(svga_buffer(buffer)->key.cachable == 0);
+ svga_buffer(buffer)->key.cachable = 0;
+ sws->surface_reference(sws, &vsurf, svga_buffer(buffer)->handle);
+ return vsurf;
+}
+
+void
+svga_context_flush_buffers(struct svga_context *svga)
+{
+ struct list_head *curr, *next;
+ struct svga_buffer *sbuf;
+
+ curr = svga->dirty_buffers.next;
+ next = curr->next;
+ while(curr != &svga->dirty_buffers) {
+ sbuf = LIST_ENTRY(struct svga_buffer, curr, head);
+
+ assert(p_atomic_read(&sbuf->base.reference.count) != 0);
+ assert(sbuf->needs_flush);
+
+ svga_buffer_upload_flush(svga, sbuf);
+
+ curr = next;
+ next = curr->next;
+ }
+}
diff --git a/src/gallium/drivers/svga/svga_screen_buffer.h b/src/gallium/drivers/svga/svga_screen_buffer.h
new file mode 100644
index 00000000000..5d7af5a7c50
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen_buffer.h
@@ -0,0 +1,190 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_BUFFER_H
+#define SVGA_BUFFER_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+
+#include "util/u_double_list.h"
+
+#include "svga_screen_cache.h"
+
+
+#define SVGA_BUFFER_MAGIC 0x344f9005
+
+/**
+ * Maximum number of discontiguous ranges
+ */
+#define SVGA_BUFFER_MAX_RANGES 32
+
+
+struct svga_screen;
+struct svga_context;
+struct svga_winsys_buffer;
+struct svga_winsys_surface;
+
+
+struct svga_buffer_range
+{
+ unsigned start;
+ unsigned end;
+};
+
+
+/**
+ * Describe a
+ *
+ * This holds the information to emit a SVGA3dCmdSurfaceDMA.
+ */
+struct svga_buffer_upload
+{
+ /**
+ * Guest memory region.
+ */
+ struct svga_winsys_buffer *buf;
+
+ struct svga_buffer_range ranges[SVGA_BUFFER_MAX_RANGES];
+ unsigned num_ranges;
+
+ SVGA3dSurfaceDMAFlags flags;
+
+ /**
+ * Pointer to the DMA copy box *inside* the command buffer.
+ */
+ SVGA3dCopyBox *boxes;
+
+ /**
+ * Context that has the pending DMA to this buffer.
+ */
+ struct svga_context *svga;
+};
+
+
+/**
+ * SVGA pipe buffer.
+ */
+struct svga_buffer
+{
+ struct pipe_buffer base;
+
+ /**
+ * Marker to detect bad casts in runtime.
+ */
+ uint32_t magic;
+
+ /**
+ * Regular (non DMA'able) memory.
+ *
+ * Used for user buffers or for buffers which we know before hand that can
+ * never be used by the virtual hardware directly, such as constant buffers.
+ */
+ void *swbuf;
+
+ /**
+ * Whether swbuf was created by the user or not.
+ */
+ boolean user;
+
+ /**
+ * DMA'ble memory.
+ *
+ * A piece of GMR memory. It is created when mapping the buffer, and will be
+ * used to upload/download vertex data from the host.
+ */
+ struct svga_buffer_upload hw;
+
+ /**
+ * Creation key for the host surface handle.
+ *
+ * This structure describes all the host surface characteristics so that it
+ * can be looked up in cache, since creating a host surface is often a slow
+ * operation.
+ */
+ struct svga_host_surface_cache_key key;
+
+ /**
+ * Host surface handle.
+ *
+ * This is a platform independent abstraction for host SID. We create when
+ * trying to bind
+ */
+ struct svga_winsys_surface *handle;
+
+ struct {
+ unsigned count;
+ boolean writing;
+ boolean flush_explicit;
+ } map;
+
+ boolean needs_flush;
+ struct list_head head;
+};
+
+
+static INLINE struct svga_buffer *
+svga_buffer(struct pipe_buffer *buffer)
+{
+ if (buffer) {
+ assert(((struct svga_buffer *)buffer)->magic == SVGA_BUFFER_MAGIC);
+ return (struct svga_buffer *)buffer;
+ }
+ return NULL;
+}
+
+
+/**
+ * Returns TRUE for user buffers. We may
+ * decide to use an alternate upload path for these buffers.
+ */
+static INLINE boolean
+svga_buffer_is_user_buffer( struct pipe_buffer *buffer )
+{
+ return svga_buffer(buffer)->user;
+}
+
+
+void
+svga_screen_init_buffer_functions(struct pipe_screen *screen);
+
+struct svga_winsys_surface *
+svga_buffer_handle(struct svga_context *svga,
+ struct pipe_buffer *buf);
+
+void
+svga_context_flush_buffers(struct svga_context *svga);
+
+boolean
+svga_buffer_free_cached_hw_storage(struct svga_screen *ss);
+
+struct svga_winsys_buffer *
+svga_winsys_buffer_create(struct svga_screen *ss,
+ unsigned alignment,
+ unsigned usage,
+ unsigned size);
+
+#endif /* SVGA_BUFFER_H */
diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c
new file mode 100644
index 00000000000..eff36e0bccb
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen_cache.c
@@ -0,0 +1,347 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_memory.h"
+#include "util/u_hash.h"
+
+#include "svga_debug.h"
+#include "svga_winsys.h"
+#include "svga_screen.h"
+#include "svga_screen_cache.h"
+
+
+#define SVGA_SURFACE_CACHE_ENABLED 1
+
+
+/**
+ * Compute the bucket for this key.
+ */
+static INLINE unsigned
+svga_screen_cache_bucket(const struct svga_host_surface_cache_key *key)
+{
+ return util_hash_crc32( key, sizeof *key ) % SVGA_HOST_SURFACE_CACHE_BUCKETS;
+}
+
+
+static INLINE struct svga_winsys_surface *
+svga_screen_cache_lookup(struct svga_screen *svgascreen,
+ const struct svga_host_surface_cache_key *key)
+{
+ struct svga_host_surface_cache *cache = &svgascreen->cache;
+ struct svga_winsys_screen *sws = svgascreen->sws;
+ struct svga_host_surface_cache_entry *entry;
+ struct svga_winsys_surface *handle = NULL;
+ struct list_head *curr, *next;
+ unsigned bucket;
+ unsigned tries = 0;
+
+ assert(key->cachable);
+
+ bucket = svga_screen_cache_bucket(key);
+
+ pipe_mutex_lock(cache->mutex);
+
+ curr = cache->bucket[bucket].next;
+ next = curr->next;
+ while(curr != &cache->bucket[bucket]) {
+ ++tries;
+
+ entry = LIST_ENTRY(struct svga_host_surface_cache_entry, curr, bucket_head);
+
+ assert(entry->handle);
+
+ if(memcmp(&entry->key, key, sizeof *key) == 0 &&
+ sws->fence_signalled( sws, entry->fence, 0 ) == 0) {
+ assert(sws->surface_is_flushed(sws, entry->handle));
+
+ handle = entry->handle; // Reference is transfered here.
+ entry->handle = NULL;
+
+ LIST_DEL(&entry->bucket_head);
+
+ LIST_DEL(&entry->head);
+
+ LIST_ADD(&entry->head, &cache->empty);
+
+ break;
+ }
+
+ curr = next;
+ next = curr->next;
+ }
+
+ pipe_mutex_unlock(cache->mutex);
+
+ if (SVGA_DEBUG & DEBUG_DMA)
+ debug_printf("%s: cache %s after %u tries (bucket %d)\n", __FUNCTION__,
+ handle ? "hit" : "miss", tries, bucket);
+
+ return handle;
+}
+
+
+/*
+ * Transfers a handle reference.
+ */
+
+static INLINE void
+svga_screen_cache_add(struct svga_screen *svgascreen,
+ const struct svga_host_surface_cache_key *key,
+ struct svga_winsys_surface **p_handle)
+{
+ struct svga_host_surface_cache *cache = &svgascreen->cache;
+ struct svga_winsys_screen *sws = svgascreen->sws;
+ struct svga_host_surface_cache_entry *entry = NULL;
+ struct svga_winsys_surface *handle = *p_handle;
+
+ assert(key->cachable);
+
+ assert(handle);
+ if(!handle)
+ return;
+
+ *p_handle = NULL;
+ pipe_mutex_lock(cache->mutex);
+
+ if(!LIST_IS_EMPTY(&cache->empty)) {
+ /* use the first empty entry */
+ entry = LIST_ENTRY(struct svga_host_surface_cache_entry, cache->empty.next, head);
+
+ LIST_DEL(&entry->head);
+ }
+ else if(!LIST_IS_EMPTY(&cache->unused)) {
+ /* free the last used buffer and reuse its entry */
+ entry = LIST_ENTRY(struct svga_host_surface_cache_entry, cache->unused.prev, head);
+ SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
+ "unref sid %p (make space)\n", entry->handle);
+ sws->surface_reference(sws, &entry->handle, NULL);
+
+ LIST_DEL(&entry->bucket_head);
+
+ LIST_DEL(&entry->head);
+ }
+
+ if(entry) {
+ entry->handle = handle;
+ memcpy(&entry->key, key, sizeof entry->key);
+
+ SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
+ "cache sid %p\n", entry->handle);
+ LIST_ADD(&entry->head, &cache->validated);
+ }
+ else {
+ /* Couldn't cache the buffer -- this really shouldn't happen */
+ SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
+ "unref sid %p (couldn't find space)\n", handle);
+ sws->surface_reference(sws, &handle, NULL);
+ }
+
+ pipe_mutex_unlock(cache->mutex);
+}
+
+
+/**
+ * Called during the screen flush to move all buffers not in a validate list
+ * into the unused list.
+ */
+void
+svga_screen_cache_flush(struct svga_screen *svgascreen,
+ struct pipe_fence_handle *fence)
+{
+ struct svga_host_surface_cache *cache = &svgascreen->cache;
+ struct svga_winsys_screen *sws = svgascreen->sws;
+ struct svga_host_surface_cache_entry *entry;
+ struct list_head *curr, *next;
+ unsigned bucket;
+
+ pipe_mutex_lock(cache->mutex);
+
+ curr = cache->validated.next;
+ next = curr->next;
+ while(curr != &cache->validated) {
+ entry = LIST_ENTRY(struct svga_host_surface_cache_entry, curr, head);
+
+ assert(entry->handle);
+
+ if(sws->surface_is_flushed(sws, entry->handle)) {
+ LIST_DEL(&entry->head);
+
+ svgascreen->sws->fence_reference(svgascreen->sws, &entry->fence, fence);
+
+ LIST_ADD(&entry->head, &cache->unused);
+
+ bucket = svga_screen_cache_bucket(&entry->key);
+ LIST_ADD(&entry->bucket_head, &cache->bucket[bucket]);
+ }
+
+ curr = next;
+ next = curr->next;
+ }
+
+ pipe_mutex_unlock(cache->mutex);
+}
+
+
+void
+svga_screen_cache_cleanup(struct svga_screen *svgascreen)
+{
+ struct svga_host_surface_cache *cache = &svgascreen->cache;
+ struct svga_winsys_screen *sws = svgascreen->sws;
+ unsigned i;
+
+ for(i = 0; i < SVGA_HOST_SURFACE_CACHE_SIZE; ++i) {
+ if(cache->entries[i].handle) {
+ SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
+ "unref sid %p (shutdown)\n", cache->entries[i].handle);
+ sws->surface_reference(sws, &cache->entries[i].handle, NULL);
+ }
+
+ if(cache->entries[i].fence)
+ svgascreen->sws->fence_reference(svgascreen->sws, &cache->entries[i].fence, NULL);
+ }
+
+ pipe_mutex_destroy(cache->mutex);
+}
+
+
+enum pipe_error
+svga_screen_cache_init(struct svga_screen *svgascreen)
+{
+ struct svga_host_surface_cache *cache = &svgascreen->cache;
+ unsigned i;
+
+ pipe_mutex_init(cache->mutex);
+
+ for(i = 0; i < SVGA_HOST_SURFACE_CACHE_BUCKETS; ++i)
+ LIST_INITHEAD(&cache->bucket[i]);
+
+ LIST_INITHEAD(&cache->unused);
+
+ LIST_INITHEAD(&cache->validated);
+
+ LIST_INITHEAD(&cache->empty);
+ for(i = 0; i < SVGA_HOST_SURFACE_CACHE_SIZE; ++i)
+ LIST_ADDTAIL(&cache->entries[i].head, &cache->empty);
+
+ return PIPE_OK;
+}
+
+
+struct svga_winsys_surface *
+svga_screen_surface_create(struct svga_screen *svgascreen,
+ struct svga_host_surface_cache_key *key)
+{
+ struct svga_winsys_screen *sws = svgascreen->sws;
+ struct svga_winsys_surface *handle = NULL;
+ boolean cachable = SVGA_SURFACE_CACHE_ENABLED && key->cachable;
+
+ SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
+ "%s sz %dx%dx%d mips %d faces %d cachable %d\n",
+ __FUNCTION__,
+ key->size.width,
+ key->size.height,
+ key->size.depth,
+ key->numMipLevels,
+ key->numFaces,
+ key->cachable);
+
+ if (cachable) {
+ if (key->format == SVGA3D_BUFFER) {
+ /* For buffers, round the buffer size up to the nearest power
+ * of two to increase the probability of cache hits. Keep
+ * texture surface dimensions unchanged.
+ */
+ uint32_t size = 1;
+ while(size < key->size.width)
+ size <<= 1;
+ key->size.width = size;
+ /* Since we're reusing buffers we're effectively transforming all
+ * of them into dynamic buffers.
+ *
+ * It would be nice to not cache long lived static buffers. But there
+ * is no way to detect the long lived from short lived ones yet. A
+ * good heuristic would be buffer size.
+ */
+ key->flags &= ~SVGA3D_SURFACE_HINT_STATIC;
+ key->flags |= SVGA3D_SURFACE_HINT_DYNAMIC;
+ }
+
+ handle = svga_screen_cache_lookup(svgascreen, key);
+ if (handle) {
+ if (key->format == SVGA3D_BUFFER)
+ SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
+ "reuse sid %p sz %d (buffer)\n", handle,
+ key->size.width);
+ else
+ SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
+ "reuse sid %p sz %dx%dx%d mips %d faces %d\n", handle,
+ key->size.width,
+ key->size.height,
+ key->size.depth,
+ key->numMipLevels,
+ key->numFaces);
+ }
+ }
+
+ if (!handle) {
+ handle = sws->surface_create(sws,
+ key->flags,
+ key->format,
+ key->size,
+ key->numFaces,
+ key->numMipLevels);
+ if (handle)
+ SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
+ " CREATE sid %p sz %dx%dx%d\n",
+ handle,
+ key->size.width,
+ key->size.height,
+ key->size.depth);
+ }
+
+ return handle;
+}
+
+
+void
+svga_screen_surface_destroy(struct svga_screen *svgascreen,
+ const struct svga_host_surface_cache_key *key,
+ struct svga_winsys_surface **p_handle)
+{
+ struct svga_winsys_screen *sws = svgascreen->sws;
+
+ /* We only set the cachable flag for surfaces of which we are the
+ * exclusive owner. So just hold onto our existing reference in
+ * that case.
+ */
+ if(SVGA_SURFACE_CACHE_ENABLED && key->cachable) {
+ svga_screen_cache_add(svgascreen, key, p_handle);
+ }
+ else {
+ SVGA_DBG(DEBUG_DMA,
+ "unref sid %p (uncachable)\n", *p_handle);
+ sws->surface_reference(sws, p_handle, NULL);
+ }
+}
diff --git a/src/gallium/drivers/svga/svga_screen_cache.h b/src/gallium/drivers/svga/svga_screen_cache.h
new file mode 100644
index 00000000000..f5aa740d408
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen_cache.h
@@ -0,0 +1,144 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_SCREEN_CACHE_H_
+#define SVGA_SCREEN_CACHE_H_
+
+
+#include "svga_types.h"
+#include "svga_reg.h"
+#include "svga3d_reg.h"
+
+#include "pipe/p_thread.h"
+
+#include "util/u_double_list.h"
+
+
+/* Guess the storage size of cached surfaces and try and keep it under
+ * this amount:
+ */
+#define SVGA_HOST_SURFACE_CACHE_BYTES 16*1024*1024
+
+/* Maximum number of discrete surfaces in the cache:
+ */
+#define SVGA_HOST_SURFACE_CACHE_SIZE 1024
+
+/* Number of hash buckets:
+ */
+#define SVGA_HOST_SURFACE_CACHE_BUCKETS 256
+
+
+struct svga_winsys_surface;
+struct svga_screen;
+
+/**
+ * Same as svga_winsys_screen::surface_create.
+ */
+struct svga_host_surface_cache_key
+{
+ SVGA3dSurfaceFlags flags;
+ SVGA3dSurfaceFormat format;
+ SVGA3dSize size;
+ uint32_t numFaces:24;
+ uint32_t numMipLevels:7;
+ uint32_t cachable:1; /* False if this is a shared surface */
+};
+
+
+struct svga_host_surface_cache_entry
+{
+ /**
+ * Head for the LRU list, svga_host_surface_cache::unused, and
+ * svga_host_surface_cache::empty
+ */
+ struct list_head head;
+
+ /** Head for the bucket lists. */
+ struct list_head bucket_head;
+
+ struct svga_host_surface_cache_key key;
+ struct svga_winsys_surface *handle;
+
+ struct pipe_fence_handle *fence;
+};
+
+
+/**
+ * Cache of the host surfaces.
+ *
+ * A cache entry can be in the following stages:
+ * 1. empty
+ * 2. holding a buffer in a validate list
+ * 3. holding a flushed buffer (not in any validate list) with an active fence
+ * 4. holding a flushed buffer with an expired fence
+ *
+ * An entry progresses from 1 -> 2 -> 3 -> 4. When we need an entry to put a
+ * buffer into we preferencial take from 1, or from the least recentely used
+ * buffer from 3/4.
+ */
+struct svga_host_surface_cache
+{
+ pipe_mutex mutex;
+
+ /* Unused buffers are put in buckets to speed up lookups */
+ struct list_head bucket[SVGA_HOST_SURFACE_CACHE_BUCKETS];
+
+ /* Entries with unused buffers, ordered from most to least recently used
+ * (3 and 4) */
+ struct list_head unused;
+
+ /* Entries with buffers still in validate lists (2) */
+ struct list_head validated;
+
+ /** Empty entries (1) */
+ struct list_head empty;
+
+ /** The actual storage for the entries */
+ struct svga_host_surface_cache_entry entries[SVGA_HOST_SURFACE_CACHE_SIZE];
+};
+
+
+void
+svga_screen_cache_cleanup(struct svga_screen *svgascreen);
+
+void
+svga_screen_cache_flush(struct svga_screen *svgascreen,
+ struct pipe_fence_handle *fence);
+
+enum pipe_error
+svga_screen_cache_init(struct svga_screen *svgascreen);
+
+
+struct svga_winsys_surface *
+svga_screen_surface_create(struct svga_screen *svgascreen,
+ struct svga_host_surface_cache_key *key);
+
+void
+svga_screen_surface_destroy(struct svga_screen *svgascreen,
+ const struct svga_host_surface_cache_key *key,
+ struct svga_winsys_surface **handle);
+
+
+#endif /* SVGA_SCREEN_CACHE_H_ */
diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c
new file mode 100644
index 00000000000..2224c2d3945
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen_texture.c
@@ -0,0 +1,1090 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_thread.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "svga_screen.h"
+#include "svga_context.h"
+#include "svga_screen_texture.h"
+#include "svga_screen_buffer.h"
+#include "svga_winsys.h"
+#include "svga_debug.h"
+#include "svga_screen_buffer.h"
+
+#include <util/u_string.h>
+
+
+/* XXX: This isn't a real hardware flag, but just a hack for kernel to
+ * know about primary surfaces. Find a better way to accomplish this.
+ */
+#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9)
+
+
+/*
+ * Helper function and arrays
+ */
+
+SVGA3dSurfaceFormat
+svga_translate_format(enum pipe_format format)
+{
+ switch(format) {
+
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return SVGA3D_A8R8G8B8;
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ return SVGA3D_X8R8G8B8;
+
+ /* Required for GL2.1:
+ */
+ case PIPE_FORMAT_A8R8G8B8_SRGB:
+ return SVGA3D_A8R8G8B8;
+
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ return SVGA3D_R5G6B5;
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ return SVGA3D_A1R5G5B5;
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ return SVGA3D_A4R4G4B4;
+
+
+ /* XXX: Doesn't seem to work properly.
+ case PIPE_FORMAT_Z32_UNORM:
+ return SVGA3D_Z_D32;
+ */
+ case PIPE_FORMAT_Z16_UNORM:
+ return SVGA3D_Z_D16;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ return SVGA3D_Z_D24S8;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return SVGA3D_Z_D24X8;
+
+ case PIPE_FORMAT_A8_UNORM:
+ return SVGA3D_ALPHA8;
+ case PIPE_FORMAT_L8_UNORM:
+ return SVGA3D_LUMINANCE8;
+
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ return SVGA3D_DXT1;
+ case PIPE_FORMAT_DXT3_RGBA:
+ return SVGA3D_DXT3;
+ case PIPE_FORMAT_DXT5_RGBA:
+ return SVGA3D_DXT5;
+
+ default:
+ return SVGA3D_FORMAT_INVALID;
+ }
+}
+
+
+SVGA3dSurfaceFormat
+svga_translate_format_render(enum pipe_format format)
+{
+ switch(format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z32_UNORM:
+ case PIPE_FORMAT_Z16_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ return svga_translate_format(format);
+
+#if 1
+ /* For on host conversion */
+ case PIPE_FORMAT_DXT1_RGB:
+ return SVGA3D_X8R8G8B8;
+ case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT5_RGBA:
+ return SVGA3D_A8R8G8B8;
+#endif
+
+ default:
+ return SVGA3D_FORMAT_INVALID;
+ }
+}
+
+
+static INLINE void
+svga_transfer_dma_band(struct svga_transfer *st,
+ SVGA3dTransferType transfer,
+ unsigned y, unsigned h, unsigned srcy)
+{
+ struct svga_texture *texture = svga_texture(st->base.texture);
+ struct svga_screen *screen = svga_screen(texture->base.screen);
+ SVGA3dCopyBox box;
+ enum pipe_error ret;
+
+ SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - (%u, %u, %u), %ubpp\n",
+ transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from",
+ texture->handle,
+ st->base.face,
+ st->base.x,
+ y,
+ st->base.zslice,
+ st->base.x + st->base.width,
+ y + h,
+ st->base.zslice + 1,
+ util_format_get_blocksize(texture->base.format)*8/
+ (util_format_get_blockwidth(texture->base.format)*util_format_get_blockheight(texture->base.format)));
+
+ box.x = st->base.x;
+ box.y = y;
+ box.z = st->base.zslice;
+ box.w = st->base.width;
+ box.h = h;
+ box.d = 1;
+ box.srcx = 0;
+ box.srcy = srcy;
+ box.srcz = 0;
+
+ pipe_mutex_lock(screen->swc_mutex);
+ ret = SVGA3D_SurfaceDMA(screen->swc, st, transfer, &box, 1);
+ if(ret != PIPE_OK) {
+ screen->swc->flush(screen->swc, NULL);
+ ret = SVGA3D_SurfaceDMA(screen->swc, st, transfer, &box, 1);
+ assert(ret == PIPE_OK);
+ }
+ pipe_mutex_unlock(screen->swc_mutex);
+}
+
+
+static INLINE void
+svga_transfer_dma(struct svga_transfer *st,
+ SVGA3dTransferType transfer)
+{
+ struct svga_texture *texture = svga_texture(st->base.texture);
+ struct svga_screen *screen = svga_screen(texture->base.screen);
+ struct svga_winsys_screen *sws = screen->sws;
+ struct pipe_fence_handle *fence = NULL;
+
+ if (transfer == SVGA3D_READ_HOST_VRAM) {
+ SVGA_DBG(DEBUG_PERF, "%s: readback transfer\n", __FUNCTION__);
+ }
+
+
+ if(!st->swbuf) {
+ /* Do the DMA transfer in a single go */
+
+ svga_transfer_dma_band(st, transfer, st->base.y, st->base.height, 0);
+
+ if(transfer == SVGA3D_READ_HOST_VRAM) {
+ svga_screen_flush(screen, &fence);
+ sws->fence_finish(sws, fence, 0);
+ //sws->fence_reference(sws, &fence, NULL);
+ }
+ }
+ else {
+ unsigned y, h, srcy;
+ unsigned blockheight = util_format_get_blockheight(st->base.texture->format);
+ h = st->hw_nblocksy * blockheight;
+ srcy = 0;
+ for(y = 0; y < st->base.height; y += h) {
+ unsigned offset, length;
+ void *hw, *sw;
+
+ if (y + h > st->base.height)
+ h = st->base.height - y;
+
+ /* Transfer band must be aligned to pixel block boundaries */
+ assert(y % blockheight == 0);
+ assert(h % blockheight == 0);
+
+ offset = y * st->base.stride / blockheight;
+ length = h * st->base.stride / blockheight;
+
+ sw = (uint8_t *)st->swbuf + offset;
+
+ if(transfer == SVGA3D_WRITE_HOST_VRAM) {
+ /* Wait for the previous DMAs to complete */
+ /* TODO: keep one DMA (at half the size) in the background */
+ if(y) {
+ svga_screen_flush(screen, &fence);
+ sws->fence_finish(sws, fence, 0);
+ //sws->fence_reference(sws, &fence, NULL);
+ }
+
+ hw = sws->buffer_map(sws, st->hwbuf, PIPE_BUFFER_USAGE_CPU_WRITE);
+ assert(hw);
+ if(hw) {
+ memcpy(hw, sw, length);
+ sws->buffer_unmap(sws, st->hwbuf);
+ }
+ }
+
+ svga_transfer_dma_band(st, transfer, y, h, srcy);
+
+ if(transfer == SVGA3D_READ_HOST_VRAM) {
+ svga_screen_flush(screen, &fence);
+ sws->fence_finish(sws, fence, 0);
+
+ hw = sws->buffer_map(sws, st->hwbuf, PIPE_BUFFER_USAGE_CPU_READ);
+ assert(hw);
+ if(hw) {
+ memcpy(sw, hw, length);
+ sws->buffer_unmap(sws, st->hwbuf);
+ }
+ }
+ }
+ }
+}
+
+
+static struct pipe_texture *
+svga_texture_create(struct pipe_screen *screen,
+ const struct pipe_texture *templat)
+{
+ struct svga_screen *svgascreen = svga_screen(screen);
+ struct svga_texture *tex = CALLOC_STRUCT(svga_texture);
+ unsigned width, height, depth;
+ unsigned level;
+
+ if (!tex)
+ goto error1;
+
+ tex->base = *templat;
+ pipe_reference_init(&tex->base.reference, 1);
+ tex->base.screen = screen;
+
+ assert(templat->last_level < SVGA_MAX_TEXTURE_LEVELS);
+ if(templat->last_level >= SVGA_MAX_TEXTURE_LEVELS)
+ goto error2;
+
+ width = templat->width0;
+ height = templat->height0;
+ depth = templat->depth0;
+ for(level = 0; level <= templat->last_level; ++level) {
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ depth = u_minify(depth, 1);
+ }
+
+ tex->key.flags = 0;
+ tex->key.size.width = templat->width0;
+ tex->key.size.height = templat->height0;
+ tex->key.size.depth = templat->depth0;
+
+ if(templat->target == PIPE_TEXTURE_CUBE) {
+ tex->key.flags |= SVGA3D_SURFACE_CUBEMAP;
+ tex->key.numFaces = 6;
+ }
+ else {
+ tex->key.numFaces = 1;
+ }
+
+ if(templat->tex_usage & PIPE_TEXTURE_USAGE_SAMPLER)
+ tex->key.flags |= SVGA3D_SURFACE_HINT_TEXTURE;
+
+ if(templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY)
+ tex->key.flags |= SVGA3D_SURFACE_HINT_SCANOUT;
+
+ /*
+ * XXX: Never pass the SVGA3D_SURFACE_HINT_RENDERTARGET hint. Mesa cannot
+ * know beforehand whether a texture will be used as a rendertarget or not
+ * and it always requests PIPE_TEXTURE_USAGE_RENDER_TARGET, therefore
+ * passing the SVGA3D_SURFACE_HINT_RENDERTARGET here defeats its purpose.
+ */
+#if 0
+ if((templat->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) &&
+ !util_format_is_compressed(templat->format))
+ tex->key.flags |= SVGA3D_SURFACE_HINT_RENDERTARGET;
+#endif
+
+ if(templat->tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL)
+ tex->key.flags |= SVGA3D_SURFACE_HINT_DEPTHSTENCIL;
+
+ tex->key.numMipLevels = templat->last_level + 1;
+
+ tex->key.format = svga_translate_format(templat->format);
+ if(tex->key.format == SVGA3D_FORMAT_INVALID)
+ goto error2;
+
+ tex->key.cachable = 1;
+
+ SVGA_DBG(DEBUG_DMA, "surface_create for texture\n", tex->handle);
+ tex->handle = svga_screen_surface_create(svgascreen, &tex->key);
+ if (tex->handle)
+ SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture)\n", tex->handle);
+
+ return &tex->base;
+
+error2:
+ FREE(tex);
+error1:
+ return NULL;
+}
+
+
+static struct pipe_texture *
+svga_texture_blanket(struct pipe_screen * screen,
+ const struct pipe_texture *base,
+ const unsigned *stride,
+ struct pipe_buffer *buffer)
+{
+ struct svga_texture *tex;
+ struct svga_buffer *sbuf = svga_buffer(buffer);
+ struct svga_winsys_screen *sws = svga_winsys_screen(screen);
+ assert(screen);
+
+ /* Only supports one type */
+ if (base->target != PIPE_TEXTURE_2D ||
+ base->last_level != 0 ||
+ base->depth0 != 1) {
+ return NULL;
+ }
+
+ /**
+ * We currently can't do texture blanket on
+ * SVGA3D_BUFFER. Need to blit to a temporary surface?
+ */
+
+ assert(sbuf->handle);
+ if (!sbuf->handle)
+ return NULL;
+
+ if (svga_translate_format(base->format) != sbuf->key.format) {
+ unsigned f1 = svga_translate_format(base->format);
+ unsigned f2 = sbuf->key.format;
+
+ /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up */
+ if ( !( (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) ||
+ (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_X8R8G8B8) ||
+ (f1 == SVGA3D_Z_D24X8 && f2 == SVGA3D_Z_D24S8) ) ) {
+ debug_printf("%s wrong format %u != %u\n", __FUNCTION__, f1, f2);
+ return NULL;
+ }
+ }
+
+ tex = CALLOC_STRUCT(svga_texture);
+ if (!tex)
+ return NULL;
+
+ tex->base = *base;
+
+
+ if (sbuf->key.format == 1)
+ tex->base.format = PIPE_FORMAT_X8R8G8B8_UNORM;
+ else if (sbuf->key.format == 2)
+ tex->base.format = PIPE_FORMAT_A8R8G8B8_UNORM;
+
+ pipe_reference_init(&tex->base.reference, 1);
+ tex->base.screen = screen;
+
+ SVGA_DBG(DEBUG_DMA, "blanket sid %p\n", sbuf->handle);
+
+ /* We don't own this storage, so don't try to cache it.
+ */
+ assert(sbuf->key.cachable == 0);
+ tex->key.cachable = 0;
+ sws->surface_reference(sws, &tex->handle, sbuf->handle);
+
+ return &tex->base;
+}
+
+
+static void
+svga_texture_destroy(struct pipe_texture *pt)
+{
+ struct svga_screen *ss = svga_screen(pt->screen);
+ struct svga_texture *tex = (struct svga_texture *)pt;
+
+ ss->texture_timestamp++;
+
+ svga_sampler_view_reference(&tex->cached_view, NULL);
+
+ /*
+ DBG("%s deleting %p\n", __FUNCTION__, (void *) tex);
+ */
+ SVGA_DBG(DEBUG_DMA, "unref sid %p (texture)\n", tex->handle);
+ svga_screen_surface_destroy(ss, &tex->key, &tex->handle);
+
+ FREE(tex);
+}
+
+
+static void
+svga_texture_copy_handle(struct svga_context *svga,
+ struct svga_screen *ss,
+ struct svga_winsys_surface *src_handle,
+ unsigned src_x, unsigned src_y, unsigned src_z,
+ unsigned src_level, unsigned src_face,
+ struct svga_winsys_surface *dst_handle,
+ unsigned dst_x, unsigned dst_y, unsigned dst_z,
+ unsigned dst_level, unsigned dst_face,
+ unsigned width, unsigned height, unsigned depth)
+{
+ struct svga_surface dst, src;
+ enum pipe_error ret;
+ SVGA3dCopyBox box, *boxes;
+
+ assert(svga || ss);
+
+ src.handle = src_handle;
+ src.real_level = src_level;
+ src.real_face = src_face;
+ src.real_zslice = 0;
+
+ dst.handle = dst_handle;
+ dst.real_level = dst_level;
+ dst.real_face = dst_face;
+ dst.real_zslice = 0;
+
+ box.x = dst_x;
+ box.y = dst_y;
+ box.z = dst_z;
+ box.w = width;
+ box.h = height;
+ box.d = depth;
+ box.srcx = src_x;
+ box.srcy = src_y;
+ box.srcz = src_z;
+
+/*
+ SVGA_DBG(DEBUG_VIEWS, "mipcopy src: %p %u (%ux%ux%u), dst: %p %u (%ux%ux%u)\n",
+ src_handle, src_level, src_x, src_y, src_z,
+ dst_handle, dst_level, dst_x, dst_y, dst_z);
+*/
+
+ if (svga) {
+ ret = SVGA3D_BeginSurfaceCopy(svga->swc,
+ &src.base,
+ &dst.base,
+ &boxes, 1);
+ if(ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_BeginSurfaceCopy(svga->swc,
+ &src.base,
+ &dst.base,
+ &boxes, 1);
+ assert(ret == PIPE_OK);
+ }
+ *boxes = box;
+ SVGA_FIFOCommitAll(svga->swc);
+ } else {
+ pipe_mutex_lock(ss->swc_mutex);
+ ret = SVGA3D_BeginSurfaceCopy(ss->swc,
+ &src.base,
+ &dst.base,
+ &boxes, 1);
+ if(ret != PIPE_OK) {
+ ss->swc->flush(ss->swc, NULL);
+ ret = SVGA3D_BeginSurfaceCopy(ss->swc,
+ &src.base,
+ &dst.base,
+ &boxes, 1);
+ assert(ret == PIPE_OK);
+ }
+ *boxes = box;
+ SVGA_FIFOCommitAll(ss->swc);
+ pipe_mutex_unlock(ss->swc_mutex);
+ }
+}
+
+static struct svga_winsys_surface *
+svga_texture_view_surface(struct pipe_context *pipe,
+ struct svga_texture *tex,
+ SVGA3dSurfaceFormat format,
+ unsigned start_mip,
+ unsigned num_mip,
+ int face_pick,
+ int zslice_pick,
+ struct svga_host_surface_cache_key *key) /* OUT */
+{
+ struct svga_screen *ss = svga_screen(tex->base.screen);
+ struct svga_winsys_surface *handle;
+ uint32_t i, j;
+ unsigned z_offset = 0;
+
+ SVGA_DBG(DEBUG_PERF,
+ "svga: Create surface view: face %d zslice %d mips %d..%d\n",
+ face_pick, zslice_pick, start_mip, start_mip+num_mip-1);
+
+ key->flags = 0;
+ key->format = format;
+ key->numMipLevels = num_mip;
+ key->size.width = u_minify(tex->base.width0, start_mip);
+ key->size.height = u_minify(tex->base.height0, start_mip);
+ key->size.depth = zslice_pick < 0 ? u_minify(tex->base.depth0, start_mip) : 1;
+ key->cachable = 1;
+ assert(key->size.depth == 1);
+
+ if(tex->base.target == PIPE_TEXTURE_CUBE && face_pick < 0) {
+ key->flags |= SVGA3D_SURFACE_CUBEMAP;
+ key->numFaces = 6;
+ } else {
+ key->numFaces = 1;
+ }
+
+ if(key->format == SVGA3D_FORMAT_INVALID) {
+ key->cachable = 0;
+ return NULL;
+ }
+
+ SVGA_DBG(DEBUG_DMA, "surface_create for texture view\n");
+ handle = svga_screen_surface_create(ss, key);
+ if (!handle) {
+ key->cachable = 0;
+ return NULL;
+ }
+
+ SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture view)\n", handle);
+
+ if (face_pick < 0)
+ face_pick = 0;
+
+ if (zslice_pick >= 0)
+ z_offset = zslice_pick;
+
+ for (i = 0; i < key->numMipLevels; i++) {
+ for (j = 0; j < key->numFaces; j++) {
+ if(tex->defined[j + face_pick][i + start_mip]) {
+ unsigned depth = (zslice_pick < 0 ?
+ u_minify(tex->base.depth0, i + start_mip) :
+ 1);
+
+ svga_texture_copy_handle(svga_context(pipe),
+ ss,
+ tex->handle,
+ 0, 0, z_offset,
+ i + start_mip,
+ j + face_pick,
+ handle, 0, 0, 0, i, j,
+ u_minify(tex->base.width0, i + start_mip),
+ u_minify(tex->base.height0, i + start_mip),
+ depth);
+ }
+ }
+ }
+
+ return handle;
+}
+
+
+static struct pipe_surface *
+svga_get_tex_surface(struct pipe_screen *screen,
+ struct pipe_texture *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned flags)
+{
+ struct svga_texture *tex = svga_texture(pt);
+ struct svga_surface *s;
+ boolean render = flags & PIPE_BUFFER_USAGE_GPU_WRITE ? TRUE : FALSE;
+ boolean view = FALSE;
+ SVGA3dSurfaceFormat format;
+
+ s = CALLOC_STRUCT(svga_surface);
+ if (!s)
+ return NULL;
+
+ pipe_reference_init(&s->base.reference, 1);
+ pipe_texture_reference(&s->base.texture, pt);
+ s->base.format = pt->format;
+ s->base.width = u_minify(pt->width0, level);
+ s->base.height = u_minify(pt->height0, level);
+ s->base.usage = flags;
+ s->base.level = level;
+ s->base.face = face;
+ s->base.zslice = zslice;
+
+ if (!render)
+ format = svga_translate_format(pt->format);
+ else
+ format = svga_translate_format_render(pt->format);
+
+ assert(format != SVGA3D_FORMAT_INVALID);
+ assert(!(flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE));
+
+
+ if (svga_screen(screen)->debug.force_surface_view)
+ view = TRUE;
+
+ /* Currently only used for compressed textures */
+ if (render &&
+ format != svga_translate_format(pt->format)) {
+ view = TRUE;
+ }
+
+ if (level != 0 &&
+ svga_screen(screen)->debug.force_level_surface_view)
+ view = TRUE;
+
+ if (pt->target == PIPE_TEXTURE_3D)
+ view = TRUE;
+
+ if (svga_screen(screen)->debug.no_surface_view)
+ view = FALSE;
+
+ if (view) {
+ SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n",
+ pt, level, face, zslice, s);
+
+ s->handle = svga_texture_view_surface(NULL, tex, format, level, 1, face, zslice,
+ &s->key);
+ s->real_face = 0;
+ s->real_level = 0;
+ s->real_zslice = 0;
+ } else {
+ SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, face %u, z %u, %p\n",
+ pt, level, face, zslice, s);
+
+ memset(&s->key, 0, sizeof s->key);
+ s->handle = tex->handle;
+ s->real_face = face;
+ s->real_level = level;
+ s->real_zslice = zslice;
+ }
+
+ return &s->base;
+}
+
+
+static void
+svga_tex_surface_destroy(struct pipe_surface *surf)
+{
+ struct svga_surface *s = svga_surface(surf);
+ struct svga_texture *t = svga_texture(surf->texture);
+ struct svga_screen *ss = svga_screen(surf->texture->screen);
+
+ if(s->handle != t->handle) {
+ SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle);
+ svga_screen_surface_destroy(ss, &s->key, &s->handle);
+ }
+
+ pipe_texture_reference(&surf->texture, NULL);
+ FREE(surf);
+}
+
+
+static INLINE void
+svga_mark_surface_dirty(struct pipe_surface *surf)
+{
+ struct svga_surface *s = svga_surface(surf);
+
+ if(!s->dirty) {
+ struct svga_texture *tex = svga_texture(surf->texture);
+
+ s->dirty = TRUE;
+
+ if (s->handle == tex->handle)
+ tex->defined[surf->face][surf->level] = TRUE;
+ else {
+ /* this will happen later in svga_propagate_surface */
+ }
+ }
+}
+
+
+void svga_mark_surfaces_dirty(struct svga_context *svga)
+{
+ unsigned i;
+
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ if (svga->curr.framebuffer.cbufs[i])
+ svga_mark_surface_dirty(svga->curr.framebuffer.cbufs[i]);
+ }
+ if (svga->curr.framebuffer.zsbuf)
+ svga_mark_surface_dirty(svga->curr.framebuffer.zsbuf);
+}
+
+/**
+ * Progagate any changes from surfaces to texture.
+ * pipe is optional context to inline the blit command in.
+ */
+void
+svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf)
+{
+ struct svga_surface *s = svga_surface(surf);
+ struct svga_texture *tex = svga_texture(surf->texture);
+ struct svga_screen *ss = svga_screen(surf->texture->screen);
+
+ if (!s->dirty)
+ return;
+
+ s->dirty = FALSE;
+ ss->texture_timestamp++;
+ tex->view_age[surf->level] = ++(tex->age);
+
+ if (s->handle != tex->handle) {
+ SVGA_DBG(DEBUG_VIEWS, "svga: Surface propagate: tex %p, level %u, from %p\n", tex, surf->level, surf);
+ svga_texture_copy_handle(svga_context(pipe), ss,
+ s->handle, 0, 0, 0, s->real_level, s->real_face,
+ tex->handle, 0, 0, surf->zslice, surf->level, surf->face,
+ u_minify(tex->base.width0, surf->level),
+ u_minify(tex->base.height0, surf->level), 1);
+ tex->defined[surf->face][surf->level] = TRUE;
+ }
+}
+
+/**
+ * Check if we should call svga_propagate_surface on the surface.
+ */
+extern boolean
+svga_surface_needs_propagation(struct pipe_surface *surf)
+{
+ struct svga_surface *s = svga_surface(surf);
+ struct svga_texture *tex = svga_texture(surf->texture);
+
+ return s->dirty && s->handle != tex->handle;
+}
+
+
+static struct pipe_transfer *
+svga_get_tex_transfer(struct pipe_screen *screen,
+ struct pipe_texture *texture,
+ unsigned face, unsigned level, unsigned zslice,
+ enum pipe_transfer_usage usage, unsigned x, unsigned y,
+ unsigned w, unsigned h)
+{
+ struct svga_screen *ss = svga_screen(screen);
+ struct svga_winsys_screen *sws = ss->sws;
+ struct svga_transfer *st;
+ unsigned nblocksx = util_format_get_nblocksx(texture->format, w);
+ unsigned nblocksy = util_format_get_nblocksy(texture->format, h);
+
+ /* We can't map texture storage directly */
+ if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
+ return NULL;
+
+ st = CALLOC_STRUCT(svga_transfer);
+ if (!st)
+ return NULL;
+
+ st->base.x = x;
+ st->base.y = y;
+ st->base.width = w;
+ st->base.height = h;
+ st->base.stride = nblocksx*util_format_get_blocksize(texture->format);
+ st->base.usage = usage;
+ st->base.face = face;
+ st->base.level = level;
+ st->base.zslice = zslice;
+
+ st->hw_nblocksy = nblocksy;
+
+ st->hwbuf = svga_winsys_buffer_create(ss,
+ 1,
+ 0,
+ st->hw_nblocksy*st->base.stride);
+ while(!st->hwbuf && (st->hw_nblocksy /= 2)) {
+ st->hwbuf = svga_winsys_buffer_create(ss,
+ 1,
+ 0,
+ st->hw_nblocksy*st->base.stride);
+ }
+
+ if(!st->hwbuf)
+ goto no_hwbuf;
+
+ if(st->hw_nblocksy < nblocksy) {
+ /* We couldn't allocate a hardware buffer big enough for the transfer,
+ * so allocate regular malloc memory instead */
+ debug_printf("%s: failed to allocate %u KB of DMA, splitting into %u x %u KB DMA transfers\n",
+ __FUNCTION__,
+ (nblocksy*st->base.stride + 1023)/1024,
+ (nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy,
+ (st->hw_nblocksy*st->base.stride + 1023)/1024);
+ st->swbuf = MALLOC(nblocksy*st->base.stride);
+ if(!st->swbuf)
+ goto no_swbuf;
+ }
+
+ pipe_texture_reference(&st->base.texture, texture);
+
+ if (usage & PIPE_TRANSFER_READ)
+ svga_transfer_dma(st, SVGA3D_READ_HOST_VRAM);
+
+ return &st->base;
+
+no_swbuf:
+ sws->buffer_destroy(sws, st->hwbuf);
+no_hwbuf:
+ FREE(st);
+ return NULL;
+}
+
+
+static void *
+svga_transfer_map( struct pipe_screen *screen,
+ struct pipe_transfer *transfer )
+{
+ struct svga_screen *ss = svga_screen(screen);
+ struct svga_winsys_screen *sws = ss->sws;
+ struct svga_transfer *st = svga_transfer(transfer);
+
+ if(st->swbuf)
+ return st->swbuf;
+ else
+ /* The wait for read transfers already happened when svga_transfer_dma
+ * was called. */
+ return sws->buffer_map(sws, st->hwbuf,
+ pipe_transfer_buffer_flags(transfer));
+}
+
+
+static void
+svga_transfer_unmap(struct pipe_screen *screen,
+ struct pipe_transfer *transfer)
+{
+ struct svga_screen *ss = svga_screen(screen);
+ struct svga_winsys_screen *sws = ss->sws;
+ struct svga_transfer *st = svga_transfer(transfer);
+
+ if(!st->swbuf)
+ sws->buffer_unmap(sws, st->hwbuf);
+}
+
+
+static void
+svga_tex_transfer_destroy(struct pipe_transfer *transfer)
+{
+ struct svga_texture *tex = svga_texture(transfer->texture);
+ struct svga_screen *ss = svga_screen(transfer->texture->screen);
+ struct svga_winsys_screen *sws = ss->sws;
+ struct svga_transfer *st = svga_transfer(transfer);
+
+ if (st->base.usage & PIPE_TRANSFER_WRITE) {
+ svga_transfer_dma(st, SVGA3D_WRITE_HOST_VRAM);
+ ss->texture_timestamp++;
+ tex->view_age[transfer->level] = ++(tex->age);
+ tex->defined[transfer->face][transfer->level] = TRUE;
+ }
+
+ pipe_texture_reference(&st->base.texture, NULL);
+ FREE(st->swbuf);
+ sws->buffer_destroy(sws, st->hwbuf);
+ FREE(st);
+}
+
+void
+svga_screen_init_texture_functions(struct pipe_screen *screen)
+{
+ screen->texture_create = svga_texture_create;
+ screen->texture_destroy = svga_texture_destroy;
+ screen->get_tex_surface = svga_get_tex_surface;
+ screen->tex_surface_destroy = svga_tex_surface_destroy;
+ screen->texture_blanket = svga_texture_blanket;
+ screen->get_tex_transfer = svga_get_tex_transfer;
+ screen->transfer_map = svga_transfer_map;
+ screen->transfer_unmap = svga_transfer_unmap;
+ screen->tex_transfer_destroy = svga_tex_transfer_destroy;
+}
+
+/***********************************************************************
+ */
+
+struct svga_sampler_view *
+svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt,
+ unsigned min_lod, unsigned max_lod)
+{
+ struct svga_screen *ss = svga_screen(pt->screen);
+ struct svga_texture *tex = svga_texture(pt);
+ struct svga_sampler_view *sv = NULL;
+ SVGA3dSurfaceFormat format = svga_translate_format(pt->format);
+ boolean view = TRUE;
+
+ assert(pt);
+ assert(min_lod >= 0);
+ assert(min_lod <= max_lod);
+ assert(max_lod <= pt->last_level);
+
+
+ /* Is a view needed */
+ {
+ /*
+ * Can't control max lod. For first level views and when we only
+ * look at one level we disable mip filtering to achive the same
+ * results as a view.
+ */
+ if (min_lod == 0 && max_lod >= pt->last_level)
+ view = FALSE;
+
+ if (util_format_is_compressed(pt->format) && view) {
+ format = svga_translate_format_render(pt->format);
+ }
+
+ if (ss->debug.no_sampler_view)
+ view = FALSE;
+
+ if (ss->debug.force_sampler_view)
+ view = TRUE;
+ }
+
+ /* First try the cache */
+ if (view) {
+ pipe_mutex_lock(ss->tex_mutex);
+ if (tex->cached_view &&
+ tex->cached_view->min_lod == min_lod &&
+ tex->cached_view->max_lod == max_lod) {
+ svga_sampler_view_reference(&sv, tex->cached_view);
+ pipe_mutex_unlock(ss->tex_mutex);
+ SVGA_DBG(DEBUG_VIEWS, "svga: Sampler view: reuse %p, %u %u, last %u\n",
+ pt, min_lod, max_lod, pt->last_level);
+ svga_validate_sampler_view(svga_context(pipe), sv);
+ return sv;
+ }
+ pipe_mutex_unlock(ss->tex_mutex);
+ }
+
+ sv = CALLOC_STRUCT(svga_sampler_view);
+ pipe_reference_init(&sv->reference, 1);
+ pipe_texture_reference(&sv->texture, pt);
+ sv->min_lod = min_lod;
+ sv->max_lod = max_lod;
+
+ /* No view needed just use the whole texture */
+ if (!view) {
+ SVGA_DBG(DEBUG_VIEWS,
+ "svga: Sampler view: no %p, mips %u..%u, nr %u, size (%ux%ux%u), last %u\n",
+ pt, min_lod, max_lod,
+ max_lod - min_lod + 1,
+ pt->width0,
+ pt->height0,
+ pt->depth0,
+ pt->last_level);
+ sv->key.cachable = 0;
+ sv->handle = tex->handle;
+ return sv;
+ }
+
+ SVGA_DBG(DEBUG_VIEWS,
+ "svga: Sampler view: yes %p, mips %u..%u, nr %u, size (%ux%ux%u), last %u\n",
+ pt, min_lod, max_lod,
+ max_lod - min_lod + 1,
+ pt->width0,
+ pt->height0,
+ pt->depth0,
+ pt->last_level);
+
+ sv->age = tex->age;
+ sv->handle = svga_texture_view_surface(pipe, tex, format,
+ min_lod,
+ max_lod - min_lod + 1,
+ -1, -1,
+ &sv->key);
+
+ if (!sv->handle) {
+ assert(0);
+ sv->key.cachable = 0;
+ sv->handle = tex->handle;
+ return sv;
+ }
+
+ pipe_mutex_lock(ss->tex_mutex);
+ svga_sampler_view_reference(&tex->cached_view, sv);
+ pipe_mutex_unlock(ss->tex_mutex);
+
+ return sv;
+}
+
+void
+svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *v)
+{
+ struct svga_texture *tex = svga_texture(v->texture);
+ unsigned numFaces;
+ unsigned age = 0;
+ int i, k;
+
+ assert(svga);
+
+ if (v->handle == tex->handle)
+ return;
+
+ age = tex->age;
+
+ if(tex->base.target == PIPE_TEXTURE_CUBE)
+ numFaces = 6;
+ else
+ numFaces = 1;
+
+ for (i = v->min_lod; i <= v->max_lod; i++) {
+ for (k = 0; k < numFaces; k++) {
+ if (v->age < tex->view_age[i])
+ svga_texture_copy_handle(svga, NULL,
+ tex->handle, 0, 0, 0, i, k,
+ v->handle, 0, 0, 0, i - v->min_lod, k,
+ u_minify(tex->base.width0, i),
+ u_minify(tex->base.height0, i),
+ u_minify(tex->base.depth0, i));
+ }
+ }
+
+ v->age = age;
+}
+
+void
+svga_destroy_sampler_view_priv(struct svga_sampler_view *v)
+{
+ struct svga_texture *tex = svga_texture(v->texture);
+
+ if(v->handle != tex->handle) {
+ struct svga_screen *ss = svga_screen(v->texture->screen);
+ SVGA_DBG(DEBUG_DMA, "unref sid %p (sampler view)\n", v->handle);
+ svga_screen_surface_destroy(ss, &v->key, &v->handle);
+ }
+ pipe_texture_reference(&v->texture, NULL);
+ FREE(v);
+}
+
+boolean
+svga_screen_buffer_from_texture(struct pipe_texture *texture,
+ struct pipe_buffer **buffer,
+ unsigned *stride)
+{
+ struct svga_texture *stex = svga_texture(texture);
+
+ *buffer = svga_screen_buffer_wrap_surface
+ (texture->screen,
+ svga_translate_format(texture->format),
+ stex->handle);
+
+ *stride = util_format_get_stride(texture->format, texture->width0);
+
+ return *buffer != NULL;
+}
+
+
+struct svga_winsys_surface *
+svga_screen_texture_get_winsys_surface(struct pipe_texture *texture)
+{
+ struct svga_winsys_screen *sws = svga_winsys_screen(texture->screen);
+ struct svga_winsys_surface *vsurf = NULL;
+
+ assert(svga_texture(texture)->key.cachable == 0);
+ svga_texture(texture)->key.cachable = 0;
+ sws->surface_reference(sws, &vsurf, svga_texture(texture)->handle);
+ return vsurf;
+}
diff --git a/src/gallium/drivers/svga/svga_screen_texture.h b/src/gallium/drivers/svga/svga_screen_texture.h
new file mode 100644
index 00000000000..89ae24219fd
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen_texture.h
@@ -0,0 +1,195 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_TEXTURE_H
+#define SVGA_TEXTURE_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "svga_screen_cache.h"
+
+struct pipe_context;
+struct pipe_screen;
+struct svga_context;
+struct svga_winsys_surface;
+enum SVGA3dSurfaceFormat;
+
+
+#define SVGA_MAX_TEXTURE_LEVELS 12 /* 2048x2048 */
+
+
+/**
+ * A sampler's view into a texture
+ *
+ * We currently cache one sampler view on
+ * the texture and in there by holding a reference
+ * from the texture to the sampler view.
+ *
+ * Because of this we can not hold a refernce to the
+ * texture from the sampler view. So the user
+ * of the sampler views must make sure that the
+ * texture has a reference take for as long as
+ * the sampler view is refrenced.
+ *
+ * Just unreferencing the sampler_view before the
+ * texture is enough.
+ */
+struct svga_sampler_view
+{
+ struct pipe_reference reference;
+
+ struct pipe_texture *texture;
+
+ int min_lod;
+ int max_lod;
+
+ unsigned age;
+
+ struct svga_host_surface_cache_key key;
+ struct svga_winsys_surface *handle;
+};
+
+
+struct svga_texture
+{
+ struct pipe_texture base;
+
+ boolean defined[6][PIPE_MAX_TEXTURE_LEVELS];
+
+ struct svga_sampler_view *cached_view;
+
+ unsigned view_age[SVGA_MAX_TEXTURE_LEVELS];
+ unsigned age;
+
+ boolean views_modified;
+
+ /**
+ * Creation key for the host surface handle.
+ *
+ * This structure describes all the host surface characteristics so that it
+ * can be looked up in cache, since creating a host surface is often a slow
+ * operation.
+ */
+ struct svga_host_surface_cache_key key;
+
+ /**
+ * Handle for the host side surface.
+ *
+ * This handle is owned by this texture. Views should hold on to a reference
+ * to this texture and never destroy this handle directly.
+ */
+ struct svga_winsys_surface *handle;
+};
+
+
+struct svga_surface
+{
+ struct pipe_surface base;
+
+ struct svga_host_surface_cache_key key;
+ struct svga_winsys_surface *handle;
+
+ unsigned real_face;
+ unsigned real_level;
+ unsigned real_zslice;
+
+ boolean dirty;
+};
+
+
+struct svga_transfer
+{
+ struct pipe_transfer base;
+
+ struct svga_winsys_buffer *hwbuf;
+
+ /* Height of the hardware buffer in pixel blocks */
+ unsigned hw_nblocksy;
+
+ /* Temporary malloc buffer when we can't allocate a hardware buffer
+ * big enough */
+ void *swbuf;
+};
+
+
+static INLINE struct svga_texture *
+svga_texture(struct pipe_texture *texture)
+{
+ return (struct svga_texture *)texture;
+}
+
+static INLINE struct svga_surface *
+svga_surface(struct pipe_surface *surface)
+{
+ assert(surface);
+ return (struct svga_surface *)surface;
+}
+
+static INLINE struct svga_transfer *
+svga_transfer(struct pipe_transfer *transfer)
+{
+ assert(transfer);
+ return (struct svga_transfer *)transfer;
+}
+
+extern struct svga_sampler_view *
+svga_get_tex_sampler_view(struct pipe_context *pipe,
+ struct pipe_texture *pt,
+ unsigned min_lod, unsigned max_lod);
+
+void
+svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *v);
+
+void
+svga_destroy_sampler_view_priv(struct svga_sampler_view *v);
+
+static INLINE void
+svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_view *v)
+{
+ struct svga_sampler_view *old = *ptr;
+
+ if (pipe_reference(&(*ptr)->reference, &v->reference))
+ svga_destroy_sampler_view_priv(old);
+ *ptr = v;
+}
+
+extern void
+svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf);
+
+extern boolean
+svga_surface_needs_propagation(struct pipe_surface *surf);
+
+extern void
+svga_screen_init_texture_functions(struct pipe_screen *screen);
+
+enum SVGA3dSurfaceFormat
+svga_translate_format(enum pipe_format format);
+
+enum SVGA3dSurfaceFormat
+svga_translate_format_render(enum pipe_format format);
+
+
+#endif /* SVGA_TEXTURE_H */
diff --git a/src/gallium/drivers/svga/svga_state.c b/src/gallium/drivers/svga/svga_state.c
new file mode 100644
index 00000000000..1c21d3acfe3
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state.c
@@ -0,0 +1,278 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_debug.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "draw/draw_context.h"
+
+#include "svga_context.h"
+#include "svga_screen.h"
+#include "svga_state.h"
+#include "svga_draw.h"
+#include "svga_cmd.h"
+#include "svga_hw_reg.h"
+
+/* This is just enough to decide whether we need to use the draw
+ * module (swtnl) or not.
+ */
+static const struct svga_tracked_state *need_swtnl_state[] =
+{
+ &svga_update_need_swvfetch,
+ &svga_update_need_pipeline,
+ &svga_update_need_swtnl,
+ NULL
+};
+
+
+/* Atoms to update hardware state prior to emitting a clear or draw
+ * packet.
+ */
+static const struct svga_tracked_state *hw_clear_state[] =
+{
+ &svga_hw_scissor,
+ &svga_hw_viewport,
+ &svga_hw_framebuffer,
+ NULL
+};
+
+
+/* Atoms to update hardware state prior to emitting a draw packet.
+ */
+static const struct svga_tracked_state *hw_draw_state[] =
+{
+ &svga_hw_update_zero_stride,
+ &svga_hw_fs,
+ &svga_hw_vs,
+ &svga_hw_rss,
+ &svga_hw_tss,
+ &svga_hw_tss_binding,
+ &svga_hw_clip_planes,
+ &svga_hw_vdecl,
+ &svga_hw_fs_parameters,
+ &svga_hw_vs_parameters,
+ NULL
+};
+
+
+static const struct svga_tracked_state *swtnl_draw_state[] =
+{
+ &svga_update_swtnl_draw,
+ &svga_update_swtnl_vdecl,
+ NULL
+};
+
+/* Flattens the graph of state dependencies. Could swap the positions
+ * of hw_clear_state and need_swtnl_state without breaking anything.
+ */
+static const struct svga_tracked_state **state_levels[] =
+{
+ need_swtnl_state,
+ hw_clear_state,
+ hw_draw_state,
+ swtnl_draw_state
+};
+
+
+
+static unsigned check_state( unsigned a,
+ unsigned b )
+{
+ return (a & b);
+}
+
+static void accumulate_state( unsigned *a,
+ unsigned b )
+{
+ *a |= b;
+}
+
+
+static void xor_states( unsigned *result,
+ unsigned a,
+ unsigned b )
+{
+ *result = a ^ b;
+}
+
+
+
+static int update_state( struct svga_context *svga,
+ const struct svga_tracked_state *atoms[],
+ unsigned *state )
+{
+ boolean debug = TRUE;
+ enum pipe_error ret = 0;
+ unsigned i;
+
+ ret = svga_hwtnl_flush( svga->hwtnl );
+ if (ret != 0)
+ return ret;
+
+ if (debug) {
+ /* Debug version which enforces various sanity checks on the
+ * state flags which are generated and checked to help ensure
+ * state atoms are ordered correctly in the list.
+ */
+ unsigned examined, prev;
+
+ examined = 0;
+ prev = *state;
+
+ for (i = 0; atoms[i] != NULL; i++) {
+ unsigned generated;
+
+ assert(atoms[i]->dirty);
+ assert(atoms[i]->update);
+
+ if (check_state(*state, atoms[i]->dirty)) {
+ if (0)
+ debug_printf("update: %s\n", atoms[i]->name);
+ ret = atoms[i]->update( svga, *state );
+ if (ret != 0)
+ return ret;
+ }
+
+ /* generated = (prev ^ state)
+ * if (examined & generated)
+ * fail;
+ */
+ xor_states(&generated, prev, *state);
+ if (check_state(examined, generated)) {
+ debug_printf("state atom %s generated state already examined\n",
+ atoms[i]->name);
+ assert(0);
+ }
+
+ prev = *state;
+ accumulate_state(&examined, atoms[i]->dirty);
+ }
+ }
+ else {
+ for (i = 0; atoms[i] != NULL; i++) {
+ if (check_state(*state, atoms[i]->dirty)) {
+ ret = atoms[i]->update( svga, *state );
+ if (ret != 0)
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+
+int svga_update_state( struct svga_context *svga,
+ unsigned max_level )
+{
+ struct svga_screen *screen = svga_screen(svga->pipe.screen);
+ int ret = 0;
+ int i;
+
+ /* Check for updates to bound textures. This can't be done in an
+ * atom as there is no flag which could provoke this test, and we
+ * cannot create one.
+ */
+ if (svga->state.texture_timestamp != screen->texture_timestamp) {
+ svga->state.texture_timestamp = screen->texture_timestamp;
+ svga->dirty |= SVGA_NEW_TEXTURE;
+ }
+
+ for (i = 0; i <= max_level; i++) {
+ svga->dirty |= svga->state.dirty[i];
+
+ if (svga->dirty) {
+ ret = update_state( svga,
+ state_levels[i],
+ &svga->dirty );
+ if (ret != 0)
+ return ret;
+
+ svga->state.dirty[i] = 0;
+ }
+ }
+
+ for (; i < SVGA_STATE_MAX; i++)
+ svga->state.dirty[i] |= svga->dirty;
+
+ svga->dirty = 0;
+ return 0;
+}
+
+
+
+
+void svga_update_state_retry( struct svga_context *svga,
+ unsigned max_level )
+{
+ int ret;
+
+ ret = svga_update_state( svga, max_level );
+
+ if (ret == PIPE_ERROR_OUT_OF_MEMORY) {
+ svga_context_flush(svga, NULL);
+ ret = svga_update_state( svga, max_level );
+ }
+
+ assert( ret == 0 );
+}
+
+
+
+#define EMIT_RS(_rs, _count, _name, _value) \
+do { \
+ _rs[_count].state = _name; \
+ _rs[_count].uintValue = _value; \
+ _count++; \
+} while (0)
+
+
+/* Setup any hardware state which will be constant through the life of
+ * a context.
+ */
+enum pipe_error svga_emit_initial_state( struct svga_context *svga )
+{
+ SVGA3dRenderState *rs;
+ unsigned count = 0;
+ const unsigned COUNT = 2;
+ enum pipe_error ret;
+
+ ret = SVGA3D_BeginSetRenderState( svga->swc, &rs, COUNT );
+ if (ret)
+ return ret;
+
+ /* Always use D3D style coordinate space as this is the only one
+ * which is implemented on all backends.
+ */
+ EMIT_RS(rs, count, SVGA3D_RS_COORDINATETYPE, SVGA3D_COORDINATE_LEFTHANDED );
+ EMIT_RS(rs, count, SVGA3D_RS_FRONTWINDING, SVGA3D_FRONTWINDING_CW );
+
+ assert( COUNT == count );
+ SVGA_FIFOCommitAll( svga->swc );
+
+ return 0;
+
+}
diff --git a/src/gallium/drivers/svga/svga_state.h b/src/gallium/drivers/svga/svga_state.h
new file mode 100644
index 00000000000..22d5a6d552a
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state.h
@@ -0,0 +1,95 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_STATE_H
+#define SVGA_STATE_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_defines.h"
+
+struct svga_context;
+
+
+void svga_init_state( struct svga_context *svga );
+void svga_destroy_state( struct svga_context *svga );
+
+
+struct svga_tracked_state {
+ const char *name;
+ unsigned dirty;
+ int (*update)( struct svga_context *svga, unsigned dirty );
+};
+
+/* NEED_SWTNL
+ */
+extern struct svga_tracked_state svga_update_need_swvfetch;
+extern struct svga_tracked_state svga_update_need_pipeline;
+extern struct svga_tracked_state svga_update_need_swtnl;
+
+/* HW_CLEAR
+ */
+extern struct svga_tracked_state svga_hw_viewport;
+extern struct svga_tracked_state svga_hw_scissor;
+extern struct svga_tracked_state svga_hw_framebuffer;
+
+/* HW_DRAW
+ */
+extern struct svga_tracked_state svga_hw_vs;
+extern struct svga_tracked_state svga_hw_fs;
+extern struct svga_tracked_state svga_hw_rss;
+extern struct svga_tracked_state svga_hw_tss;
+extern struct svga_tracked_state svga_hw_tss_binding;
+extern struct svga_tracked_state svga_hw_clip_planes;
+extern struct svga_tracked_state svga_hw_vdecl;
+extern struct svga_tracked_state svga_hw_fs_parameters;
+extern struct svga_tracked_state svga_hw_vs_parameters;
+extern struct svga_tracked_state svga_hw_update_zero_stride;
+
+/* SWTNL_DRAW
+ */
+extern struct svga_tracked_state svga_update_swtnl_draw;
+extern struct svga_tracked_state svga_update_swtnl_vdecl;
+
+/* Bring the hardware fully up-to-date so that we can emit draw
+ * commands.
+ */
+#define SVGA_STATE_NEED_SWTNL 0
+#define SVGA_STATE_HW_CLEAR 1
+#define SVGA_STATE_HW_DRAW 2
+#define SVGA_STATE_SWTNL_DRAW 3
+#define SVGA_STATE_MAX 4
+
+
+enum pipe_error svga_update_state( struct svga_context *svga,
+ unsigned level );
+
+void svga_update_state_retry( struct svga_context *svga,
+ unsigned level );
+
+
+enum pipe_error svga_emit_initial_state( struct svga_context *svga );
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c
new file mode 100644
index 00000000000..6b0e511cec1
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_constants.c
@@ -0,0 +1,240 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_cmd.h"
+#include "svga_tgsi.h"
+#include "svga_debug.h"
+
+#include "svga_hw_reg.h"
+
+/***********************************************************************
+ * Hardware update
+ */
+
+/* Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_*
+ */
+static int svga_shader_type( int unit )
+{
+ return unit + 1;
+}
+
+
+static int emit_const( struct svga_context *svga,
+ int unit,
+ int i,
+ const float *value )
+{
+ int ret = PIPE_OK;
+
+ if (memcmp(svga->state.hw_draw.cb[unit][i], value, 4 * sizeof(float)) != 0) {
+ if (SVGA_DEBUG & DEBUG_CONSTS)
+ debug_printf("%s %s %d: %f %f %f %f\n",
+ __FUNCTION__,
+ unit == PIPE_SHADER_VERTEX ? "VERT" : "FRAG",
+ i,
+ value[0],
+ value[1],
+ value[2],
+ value[3]);
+
+ ret = SVGA3D_SetShaderConst( svga->swc,
+ i,
+ svga_shader_type(unit),
+ SVGA3D_CONST_TYPE_FLOAT,
+ value );
+ if (ret)
+ return ret;
+
+ memcpy(svga->state.hw_draw.cb[unit][i], value, 4 * sizeof(float));
+ }
+
+ return ret;
+}
+
+static int emit_consts( struct svga_context *svga,
+ int offset,
+ int unit )
+{
+ struct pipe_screen *screen = svga->pipe.screen;
+ unsigned count;
+ const float (*data)[4] = NULL;
+ unsigned i;
+ int ret = PIPE_OK;
+
+ if (svga->curr.cb[unit] == NULL)
+ goto done;
+
+ count = svga->curr.cb[unit]->size / (4 * sizeof(float));
+
+ data = (const float (*)[4])pipe_buffer_map(screen,
+ svga->curr.cb[unit],
+ PIPE_BUFFER_USAGE_CPU_READ);
+ if (data == NULL) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
+ goto done;
+ }
+
+ for (i = 0; i < count; i++) {
+ ret = emit_const( svga, unit, offset + i, data[i] );
+ if (ret)
+ goto done;
+ }
+
+done:
+ if (data)
+ pipe_buffer_unmap(screen, svga->curr.cb[unit]);
+
+ return ret;
+}
+
+static int emit_fs_consts( struct svga_context *svga,
+ unsigned dirty )
+{
+ const struct svga_shader_result *result = svga->state.hw_draw.fs;
+ const struct svga_fs_compile_key *key = &result->key.fkey;
+ int ret = 0;
+
+ ret = emit_consts( svga, 0, PIPE_SHADER_FRAGMENT );
+ if (ret)
+ return ret;
+
+ /* The internally generated fragment shader for xor blending
+ * doesn't have a 'result' struct. It should be fixed to avoid
+ * this special case, but work around it with a NULL check:
+ */
+ if (result != NULL &&
+ key->num_unnormalized_coords)
+ {
+ unsigned offset = result->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
+ int i;
+
+ for (i = 0; i < key->num_textures; i++) {
+ if (key->tex[i].unnormalized) {
+ struct pipe_texture *tex = svga->curr.texture[i];
+ float data[4];
+
+ data[0] = 1.0 / (float)tex->width0;
+ data[1] = 1.0 / (float)tex->height0;
+ data[2] = 1.0;
+ data[3] = 1.0;
+
+ ret = emit_const( svga,
+ PIPE_SHADER_FRAGMENT,
+ key->tex[i].width_height_idx + offset,
+ data );
+ if (ret)
+ return ret;
+ }
+ }
+
+ offset += key->num_unnormalized_coords;
+ }
+
+ return 0;
+}
+
+
+struct svga_tracked_state svga_hw_fs_parameters =
+{
+ "hw fs params",
+ (SVGA_NEW_FS_CONST_BUFFER |
+ SVGA_NEW_FS_RESULT |
+ SVGA_NEW_TEXTURE_BINDING),
+ emit_fs_consts
+};
+
+/***********************************************************************
+ */
+
+static int emit_vs_consts( struct svga_context *svga,
+ unsigned dirty )
+{
+ const struct svga_shader_result *result = svga->state.hw_draw.vs;
+ const struct svga_vs_compile_key *key = &result->key.vkey;
+ int ret = 0;
+ unsigned offset;
+
+ /* SVGA_NEW_VS_RESULT
+ */
+ if (result == NULL)
+ return 0;
+
+ /* SVGA_NEW_VS_CONST_BUFFER
+ */
+ ret = emit_consts( svga, 0, PIPE_SHADER_VERTEX );
+ if (ret)
+ return ret;
+
+ offset = result->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
+
+ /* SVGA_NEW_VS_RESULT
+ */
+ if (key->need_prescale) {
+ ret = emit_const( svga, PIPE_SHADER_VERTEX, offset++,
+ svga->state.hw_clear.prescale.scale );
+ if (ret)
+ return ret;
+
+ ret = emit_const( svga, PIPE_SHADER_VERTEX, offset++,
+ svga->state.hw_clear.prescale.translate );
+ if (ret)
+ return ret;
+ }
+
+ /* SVGA_NEW_ZERO_STRIDE
+ */
+ if (key->zero_stride_vertex_elements) {
+ unsigned i, curr_zero_stride = 0;
+ for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) {
+ if (key->zero_stride_vertex_elements & (1 << i)) {
+ ret = emit_const( svga, PIPE_SHADER_VERTEX, offset++,
+ svga->curr.zero_stride_constants +
+ 4 * curr_zero_stride );
+ if (ret)
+ return ret;
+ ++curr_zero_stride;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+struct svga_tracked_state svga_hw_vs_parameters =
+{
+ "hw vs params",
+ (SVGA_NEW_PRESCALE |
+ SVGA_NEW_VS_CONST_BUFFER |
+ SVGA_NEW_ZERO_STRIDE |
+ SVGA_NEW_VS_RESULT),
+ emit_vs_consts
+};
+
diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c
new file mode 100644
index 00000000000..cfdcae4ee4a
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_framebuffer.c
@@ -0,0 +1,458 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_cmd.h"
+#include "svga_debug.h"
+
+#include "svga_hw_reg.h"
+
+
+/***********************************************************************
+ * Hardware state update
+ */
+
+
+static int emit_framebuffer( struct svga_context *svga,
+ unsigned dirty )
+{
+ const struct pipe_framebuffer_state *curr = &svga->curr.framebuffer;
+ struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
+ unsigned i;
+ enum pipe_error ret;
+
+ /* XXX: Need shadow state in svga->hw to eliminate redundant
+ * uploads, especially of NULL buffers.
+ */
+
+ for(i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) {
+ if (curr->cbufs[i] != hw->cbufs[i]) {
+ if (svga->curr.nr_fbs++ > 8)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_COLOR0 + i, curr->cbufs[i]);
+ if (ret != PIPE_OK)
+ return ret;
+
+ pipe_surface_reference(&hw->cbufs[i], curr->cbufs[i]);
+ }
+ }
+
+
+ if (curr->zsbuf != hw->zsbuf) {
+ ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_DEPTH, curr->zsbuf);
+ if (ret != PIPE_OK)
+ return ret;
+
+ if (curr->zsbuf &&
+ curr->zsbuf->format == PIPE_FORMAT_Z24S8_UNORM) {
+ ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, curr->zsbuf);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+ else {
+ ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, NULL);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+
+ pipe_surface_reference(&hw->zsbuf, curr->zsbuf);
+ }
+
+
+ return 0;
+}
+
+
+struct svga_tracked_state svga_hw_framebuffer =
+{
+ "hw framebuffer state",
+ SVGA_NEW_FRAME_BUFFER,
+ emit_framebuffer
+};
+
+
+
+
+/***********************************************************************
+ */
+
+static int emit_viewport( struct svga_context *svga,
+ unsigned dirty )
+{
+ const struct pipe_viewport_state *viewport = &svga->curr.viewport;
+ struct svga_prescale prescale;
+ SVGA3dRect rect;
+ /* Not sure if this state is relevant with POSITIONT. Probably
+ * not, but setting to 0,1 avoids some state pingponging.
+ */
+ float range_min = 0.0;
+ float range_max = 1.0;
+ float flip = -1.0;
+ boolean degenerate = FALSE;
+ enum pipe_error ret;
+
+ float fb_width = svga->curr.framebuffer.width;
+ float fb_height = svga->curr.framebuffer.height;
+
+ memset( &prescale, 0, sizeof(prescale) );
+
+ if (svga->curr.rast->templ.bypass_vs_clip_and_viewport) {
+
+ /* Avoid POSITIONT as it has a non trivial implementation outside the D3D
+ * API. Always generate a vertex shader.
+ */
+ rect.x = 0;
+ rect.y = 0;
+ rect.w = svga->curr.framebuffer.width;
+ rect.h = svga->curr.framebuffer.height;
+
+ prescale.scale[0] = 2.0 / (float)rect.w;
+ prescale.scale[1] = - 2.0 / (float)rect.h;
+ prescale.scale[2] = 1.0;
+ prescale.scale[3] = 1.0;
+ prescale.translate[0] = -1.0f;
+ prescale.translate[1] = 1.0f;
+ prescale.translate[2] = 0;
+ prescale.translate[3] = 0;
+ prescale.enabled = TRUE;
+ } else {
+
+ /* Examine gallium viewport transformation and produce a screen
+ * rectangle and possibly vertex shader pre-transformation to
+ * get the same results.
+ */
+ float fx = viewport->scale[0] * -1.0 + viewport->translate[0];
+ float fy = flip * viewport->scale[1] * -1.0 + viewport->translate[1];
+ float fw = viewport->scale[0] * 2;
+ float fh = flip * viewport->scale[1] * 2;
+
+ SVGA_DBG(DEBUG_VIEWPORT,
+ "\ninitial %f,%f %fx%f\n",
+ fx,
+ fy,
+ fw,
+ fh);
+
+ prescale.scale[0] = 1.0;
+ prescale.scale[1] = 1.0;
+ prescale.scale[2] = 1.0;
+ prescale.scale[3] = 1.0;
+ prescale.translate[0] = 0;
+ prescale.translate[1] = 0;
+ prescale.translate[2] = 0;
+ prescale.translate[3] = 0;
+ prescale.enabled = TRUE;
+
+
+
+ if (fw < 0) {
+ prescale.scale[0] *= -1.0;
+ prescale.translate[0] += -fw;
+ fw = -fw;
+ fx = viewport->scale[0] * 1.0 + viewport->translate[0];
+ }
+
+ if (fh < 0) {
+ prescale.scale[1] *= -1.0;
+ prescale.translate[1] += -fh;
+ fh = -fh;
+ fy = flip * viewport->scale[1] * 1.0 + viewport->translate[1];
+ }
+
+ if (fx < 0) {
+ prescale.translate[0] += fx;
+ prescale.scale[0] *= fw / (fw + fx);
+ fw += fx;
+ fx = 0;
+ }
+
+ if (fy < 0) {
+ prescale.translate[1] += fy;
+ prescale.scale[1] *= fh / (fh + fy);
+ fh += fy;
+ fy = 0;
+ }
+
+ if (fx + fw > fb_width) {
+ prescale.scale[0] *= fw / (fb_width - fx);
+ prescale.translate[0] -= fx * (fw / (fb_width - fx));
+ prescale.translate[0] += fx;
+ fw = fb_width - fx;
+
+ }
+
+ if (fy + fh > fb_height) {
+ prescale.scale[1] *= fh / (fb_height - fy);
+ prescale.translate[1] -= fy * (fh / (fb_height - fy));
+ prescale.translate[1] += fy;
+ fh = fb_height - fy;
+ }
+
+ if (fw < 0 || fh < 0) {
+ fw = fh = fx = fy = 0;
+ degenerate = TRUE;
+ goto out;
+ }
+
+
+ /* D3D viewport is integer space. Convert fx,fy,etc. to
+ * integers.
+ *
+ * TODO: adjust pretranslate correct for any subpixel error
+ * introduced converting to integers.
+ */
+ rect.x = fx;
+ rect.y = fy;
+ rect.w = fw;
+ rect.h = fh;
+
+ SVGA_DBG(DEBUG_VIEWPORT,
+ "viewport error %f,%f %fx%f\n",
+ fabs((float)rect.x - fx),
+ fabs((float)rect.y - fy),
+ fabs((float)rect.w - fw),
+ fabs((float)rect.h - fh));
+
+ SVGA_DBG(DEBUG_VIEWPORT,
+ "viewport %d,%d %dx%d\n",
+ rect.x,
+ rect.y,
+ rect.w,
+ rect.h);
+
+
+ /* Finally, to get GL rasterization rules, need to tweak the
+ * screen-space coordinates slightly relative to D3D which is
+ * what hardware implements natively.
+ */
+ if (svga->curr.rast->templ.gl_rasterization_rules) {
+ float adjust_x = 0.0;
+ float adjust_y = 0.0;
+
+ switch (svga->curr.reduced_prim) {
+ case PIPE_PRIM_LINES:
+ adjust_x = -0.5;
+ adjust_y = 0;
+ break;
+ case PIPE_PRIM_POINTS:
+ case PIPE_PRIM_TRIANGLES:
+ adjust_x = -0.375;
+ adjust_y = -0.5;
+ break;
+ }
+
+ prescale.translate[0] += adjust_x;
+ prescale.translate[1] += adjust_y;
+ prescale.translate[2] = 0.5; /* D3D clip space */
+ prescale.scale[2] = 0.5; /* D3D clip space */
+ }
+
+
+ range_min = viewport->scale[2] * -1.0 + viewport->translate[2];
+ range_max = viewport->scale[2] * 1.0 + viewport->translate[2];
+
+ /* D3D (and by implication SVGA) doesn't like dealing with zmax
+ * less than zmin. Detect that case, flip the depth range and
+ * invert our z-scale factor to achieve the same effect.
+ */
+ if (range_min > range_max) {
+ float range_tmp;
+ range_tmp = range_min;
+ range_min = range_max;
+ range_max = range_tmp;
+ prescale.scale[2] = -prescale.scale[2];
+ }
+ }
+
+ if (prescale.enabled) {
+ float H[2];
+ float J[2];
+ int i;
+
+ SVGA_DBG(DEBUG_VIEWPORT,
+ "prescale %f,%f %fx%f\n",
+ prescale.translate[0],
+ prescale.translate[1],
+ prescale.scale[0],
+ prescale.scale[1]);
+
+ H[0] = (float)rect.w / 2.0;
+ H[1] = -(float)rect.h / 2.0;
+ J[0] = (float)rect.x + (float)rect.w / 2.0;
+ J[1] = (float)rect.y + (float)rect.h / 2.0;
+
+ SVGA_DBG(DEBUG_VIEWPORT,
+ "H %f,%f\n"
+ "J %fx%f\n",
+ H[0],
+ H[1],
+ J[0],
+ J[1]);
+
+ /* Adjust prescale to take into account the fact that it is
+ * going to be applied prior to the perspective divide and
+ * viewport transformation.
+ *
+ * Vwin = H(Vc/Vc.w) + J
+ *
+ * We want to tweak Vwin with scale and translation from above,
+ * as in:
+ *
+ * Vwin' = S Vwin + T
+ *
+ * But we can only modify the values at Vc. Plugging all the
+ * above together, and rearranging, eventually we get:
+ *
+ * Vwin' = H(Vc'/Vc'.w) + J
+ * where:
+ * Vc' = SVc + KVc.w
+ * K = (T + (S-1)J) / H
+ *
+ * Overwrite prescale.translate with values for K:
+ */
+ for (i = 0; i < 2; i++) {
+ prescale.translate[i] = ((prescale.translate[i] +
+ (prescale.scale[i] - 1.0) * J[i]) / H[i]);
+ }
+
+ SVGA_DBG(DEBUG_VIEWPORT,
+ "clipspace %f,%f %fx%f\n",
+ prescale.translate[0],
+ prescale.translate[1],
+ prescale.scale[0],
+ prescale.scale[1]);
+ }
+
+out:
+ if (degenerate) {
+ rect.x = 0;
+ rect.y = 0;
+ rect.w = 1;
+ rect.h = 1;
+ prescale.enabled = FALSE;
+ }
+
+ if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) {
+ ret = SVGA3D_SetViewport(svga->swc, &rect);
+ if(ret != PIPE_OK)
+ return ret;
+
+ memcpy(&svga->state.hw_clear.viewport, &rect, sizeof(rect));
+ assert(sizeof(rect) == sizeof(svga->state.hw_clear.viewport));
+ }
+
+ if (svga->state.hw_clear.depthrange.zmin != range_min ||
+ svga->state.hw_clear.depthrange.zmax != range_max)
+ {
+ ret = SVGA3D_SetZRange(svga->swc, range_min, range_max );
+ if(ret != PIPE_OK)
+ return ret;
+
+ svga->state.hw_clear.depthrange.zmin = range_min;
+ svga->state.hw_clear.depthrange.zmax = range_max;
+ }
+
+ if (memcmp(&prescale, &svga->state.hw_clear.prescale, sizeof prescale) != 0) {
+ svga->dirty |= SVGA_NEW_PRESCALE;
+ svga->state.hw_clear.prescale = prescale;
+ }
+
+ return 0;
+}
+
+
+struct svga_tracked_state svga_hw_viewport =
+{
+ "hw viewport state",
+ ( SVGA_NEW_FRAME_BUFFER |
+ SVGA_NEW_VIEWPORT |
+ SVGA_NEW_RAST |
+ SVGA_NEW_REDUCED_PRIMITIVE ),
+ emit_viewport
+};
+
+
+/***********************************************************************
+ * Scissor state
+ */
+static int emit_scissor_rect( struct svga_context *svga,
+ unsigned dirty )
+{
+ const struct pipe_scissor_state *scissor = &svga->curr.scissor;
+ SVGA3dRect rect;
+
+ rect.x = scissor->minx;
+ rect.y = scissor->miny;
+ rect.w = scissor->maxx - scissor->minx; /* + 1 ?? */
+ rect.h = scissor->maxy - scissor->miny; /* + 1 ?? */
+
+ return SVGA3D_SetScissorRect(svga->swc, &rect);
+}
+
+
+struct svga_tracked_state svga_hw_scissor =
+{
+ "hw scissor state",
+ SVGA_NEW_SCISSOR,
+ emit_scissor_rect
+};
+
+
+/***********************************************************************
+ * Userclip state
+ */
+
+static int emit_clip_planes( struct svga_context *svga,
+ unsigned dirty )
+{
+ unsigned i;
+ enum pipe_error ret;
+
+ /* TODO: just emit directly from svga_set_clip_state()?
+ */
+ for (i = 0; i < svga->curr.clip.nr; i++) {
+ ret = SVGA3D_SetClipPlane( svga->swc,
+ i,
+ svga->curr.clip.ucp[i] );
+ if(ret != PIPE_OK)
+ return ret;
+ }
+
+ return 0;
+}
+
+
+struct svga_tracked_state svga_hw_clip_planes =
+{
+ "hw viewport state",
+ SVGA_NEW_CLIP,
+ emit_clip_planes
+};
diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
new file mode 100644
index 00000000000..bba80a93a56
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -0,0 +1,302 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_bitmask.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_cmd.h"
+#include "svga_tgsi.h"
+
+#include "svga_hw_reg.h"
+
+
+
+static INLINE int compare_fs_keys( const struct svga_fs_compile_key *a,
+ const struct svga_fs_compile_key *b )
+{
+ unsigned keysize_a = svga_fs_key_size( a );
+ unsigned keysize_b = svga_fs_key_size( b );
+
+ if (keysize_a != keysize_b) {
+ return (int)(keysize_a - keysize_b);
+ }
+ return memcmp( a, b, keysize_a );
+}
+
+
+static struct svga_shader_result *search_fs_key( struct svga_fragment_shader *fs,
+ const struct svga_fs_compile_key *key )
+{
+ struct svga_shader_result *result = fs->base.results;
+
+ assert(key);
+
+ for ( ; result; result = result->next) {
+ if (compare_fs_keys( key, &result->key.fkey ) == 0)
+ return result;
+ }
+
+ return NULL;
+}
+
+
+static enum pipe_error compile_fs( struct svga_context *svga,
+ struct svga_fragment_shader *fs,
+ const struct svga_fs_compile_key *key,
+ struct svga_shader_result **out_result )
+{
+ struct svga_shader_result *result;
+ enum pipe_error ret;
+
+ result = svga_translate_fragment_program( fs, key );
+ if (result == NULL) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
+ goto fail;
+ }
+
+ result->id = util_bitmask_add(svga->fs_bm);
+ if(result->id == UTIL_BITMASK_INVALID_INDEX)
+ goto fail;
+
+ ret = SVGA3D_DefineShader(svga->swc,
+ result->id,
+ SVGA3D_SHADERTYPE_PS,
+ result->tokens,
+ result->nr_tokens * sizeof result->tokens[0]);
+ if (ret)
+ goto fail;
+
+ *out_result = result;
+ result->next = fs->base.results;
+ fs->base.results = result;
+ return PIPE_OK;
+
+fail:
+ if (result) {
+ if (result->id != UTIL_BITMASK_INVALID_INDEX)
+ util_bitmask_clear( svga->fs_bm, result->id );
+ svga_destroy_shader_result( result );
+ }
+ return ret;
+}
+
+/* The blend workaround for simulating logicop xor behaviour requires
+ * that the incoming fragment color be white. This change achieves
+ * that by hooking up a hard-wired fragment shader that just emits
+ * color 1,1,1,1
+ *
+ * This is a slightly incomplete solution as it assumes that the
+ * actual bound shader has no other effects beyond generating a
+ * fragment color. In particular shaders containing TEXKIL and/or
+ * depth-write will not have the correct behaviour, nor will those
+ * expecting to use alphatest.
+ *
+ * These are avoidable issues, but they are not much worse than the
+ * unavoidable ones associated with this technique, so it's not clear
+ * how much effort should be expended trying to resolve them - the
+ * ultimate result will still not be correct in most cases.
+ *
+ * Shader below was generated with:
+ * SVGA_DEBUG=tgsi ./mesa/progs/fp/fp-tri white.txt
+ */
+static int emit_white_fs( struct svga_context *svga )
+{
+ int ret = PIPE_ERROR;
+
+ /* ps_3_0
+ * def c0, 1.000000, 0.000000, 0.000000, 1.000000
+ * mov oC0, c0.x
+ * end
+ */
+ static const unsigned white_tokens[] = {
+ 0xffff0300,
+ 0x05000051,
+ 0xa00f0000,
+ 0x3f800000,
+ 0x00000000,
+ 0x00000000,
+ 0x3f800000,
+ 0x02000001,
+ 0x800f0800,
+ 0xa0000000,
+ 0x0000ffff,
+ };
+
+ assert(SVGA3D_INVALID_ID == UTIL_BITMASK_INVALID_INDEX);
+ svga->state.white_fs_id = util_bitmask_add(svga->fs_bm);
+ if(svga->state.white_fs_id == SVGA3D_INVALID_ID)
+ goto no_fs_id;
+
+ ret = SVGA3D_DefineShader(svga->swc,
+ svga->state.white_fs_id,
+ SVGA3D_SHADERTYPE_PS,
+ white_tokens,
+ sizeof(white_tokens));
+ if (ret)
+ goto no_definition;
+
+ return 0;
+
+no_definition:
+ util_bitmask_clear(svga->fs_bm, svga->state.white_fs_id);
+ svga->state.white_fs_id = SVGA3D_INVALID_ID;
+no_fs_id:
+ return ret;
+}
+
+
+/* SVGA_NEW_TEXTURE_BINDING
+ * SVGA_NEW_RAST
+ * SVGA_NEW_NEED_SWTNL
+ * SVGA_NEW_SAMPLER
+ */
+static int make_fs_key( const struct svga_context *svga,
+ struct svga_fs_compile_key *key )
+{
+ int i;
+ int idx = 0;
+
+ memset(key, 0, sizeof *key);
+
+ /* Only need fragment shader fixup for twoside lighting if doing
+ * hwtnl. Otherwise the draw module does the whole job for us.
+ *
+ * SVGA_NEW_SWTNL
+ */
+ if (!svga->state.sw.need_swtnl) {
+ /* SVGA_NEW_RAST
+ */
+ key->light_twoside = svga->curr.rast->templ.light_twoside;
+ key->front_cw = (svga->curr.rast->templ.front_winding ==
+ PIPE_WINDING_CW);
+ }
+
+
+ /* XXX: want to limit this to the textures that the shader actually
+ * refers to.
+ *
+ * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER
+ */
+ for (i = 0; i < svga->curr.num_textures; i++) {
+ if (svga->curr.texture[i]) {
+ assert(svga->curr.sampler[i]);
+ key->tex[i].texture_target = svga->curr.texture[i]->target;
+ if (!svga->curr.sampler[i]->normalized_coords) {
+ key->tex[i].width_height_idx = idx++;
+ key->tex[i].unnormalized = TRUE;
+ ++key->num_unnormalized_coords;
+ }
+ }
+ }
+ key->num_textures = svga->curr.num_textures;
+
+ idx = 0;
+ for (i = 0; i < svga->curr.num_samplers; ++i) {
+ if (svga->curr.sampler[i]) {
+ key->tex[i].compare_mode = svga->curr.sampler[i]->compare_mode;
+ key->tex[i].compare_func = svga->curr.sampler[i]->compare_func;
+ }
+ }
+
+ return 0;
+}
+
+
+
+static int emit_hw_fs( struct svga_context *svga,
+ unsigned dirty )
+{
+ struct svga_shader_result *result = NULL;
+ unsigned id = SVGA3D_INVALID_ID;
+ int ret = 0;
+
+ /* SVGA_NEW_BLEND
+ */
+ if (svga->curr.blend->need_white_fragments) {
+ if (svga->state.white_fs_id == SVGA3D_INVALID_ID) {
+ ret = emit_white_fs( svga );
+ if (ret)
+ return ret;
+ }
+ id = svga->state.white_fs_id;
+ }
+ else {
+ struct svga_fragment_shader *fs = svga->curr.fs;
+ struct svga_fs_compile_key key;
+
+ /* SVGA_NEW_TEXTURE_BINDING
+ * SVGA_NEW_RAST
+ * SVGA_NEW_NEED_SWTNL
+ * SVGA_NEW_SAMPLER
+ */
+ ret = make_fs_key( svga, &key );
+ if (ret)
+ return ret;
+
+ result = search_fs_key( fs, &key );
+ if (!result) {
+ ret = compile_fs( svga, fs, &key, &result );
+ if (ret)
+ return ret;
+ }
+
+ assert (result);
+ id = result->id;
+ }
+
+ assert(id != SVGA3D_INVALID_ID);
+
+ if (result != svga->state.hw_draw.fs) {
+ ret = SVGA3D_SetShader(svga->swc,
+ SVGA3D_SHADERTYPE_PS,
+ id );
+ if (ret)
+ return ret;
+
+ svga->dirty |= SVGA_NEW_FS_RESULT;
+ svga->state.hw_draw.fs = result;
+ }
+
+ return 0;
+}
+
+struct svga_tracked_state svga_hw_fs =
+{
+ "fragment shader (hwtnl)",
+ (SVGA_NEW_FS |
+ SVGA_NEW_TEXTURE_BINDING |
+ SVGA_NEW_NEED_SWTNL |
+ SVGA_NEW_RAST |
+ SVGA_NEW_SAMPLER |
+ SVGA_NEW_BLEND),
+ emit_hw_fs
+};
+
+
+
diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c
new file mode 100644
index 00000000000..3c35a8579f7
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c
@@ -0,0 +1,200 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_state.h"
+
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_debug.h"
+#include "svga_hw_reg.h"
+
+/***********************************************************************
+ */
+
+static INLINE SVGA3dDeclType
+svga_translate_vertex_format(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_R32_FLOAT: return SVGA3D_DECLTYPE_FLOAT1;
+ case PIPE_FORMAT_R32G32_FLOAT: return SVGA3D_DECLTYPE_FLOAT2;
+ case PIPE_FORMAT_R32G32B32_FLOAT: return SVGA3D_DECLTYPE_FLOAT3;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT: return SVGA3D_DECLTYPE_FLOAT4;
+ case PIPE_FORMAT_B8G8R8A8_UNORM: return SVGA3D_DECLTYPE_D3DCOLOR;
+ case PIPE_FORMAT_R8G8B8A8_USCALED: return SVGA3D_DECLTYPE_UBYTE4;
+ case PIPE_FORMAT_R16G16_SSCALED: return SVGA3D_DECLTYPE_SHORT2;
+ case PIPE_FORMAT_R16G16B16A16_SSCALED: return SVGA3D_DECLTYPE_SHORT4;
+ case PIPE_FORMAT_R8G8B8A8_UNORM: return SVGA3D_DECLTYPE_UBYTE4N;
+ case PIPE_FORMAT_R16G16_SNORM: return SVGA3D_DECLTYPE_SHORT2N;
+ case PIPE_FORMAT_R16G16B16A16_SNORM: return SVGA3D_DECLTYPE_SHORT4N;
+ case PIPE_FORMAT_R16G16_UNORM: return SVGA3D_DECLTYPE_USHORT2N;
+ case PIPE_FORMAT_R16G16B16A16_UNORM: return SVGA3D_DECLTYPE_USHORT4N;
+
+ /* These formats don't exist yet:
+ *
+ case PIPE_FORMAT_R10G10B10_USCALED: return SVGA3D_DECLTYPE_UDEC3;
+ case PIPE_FORMAT_R10G10B10_SNORM: return SVGA3D_DECLTYPE_DEC3N;
+ case PIPE_FORMAT_R16G16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_2;
+ case PIPE_FORMAT_R16G16B16A16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_4;
+ */
+
+ default:
+ /* There are many formats without hardware support. This case
+ * will be hit regularly, meaning we'll need swvfetch.
+ */
+ return SVGA3D_DECLTYPE_MAX;
+ }
+}
+
+
+static int update_need_swvfetch( struct svga_context *svga,
+ unsigned dirty )
+{
+ unsigned i;
+ boolean need_swvfetch = FALSE;
+
+ for (i = 0; i < svga->curr.num_vertex_elements; i++) {
+ svga->state.sw.ve_format[i] = svga_translate_vertex_format(svga->curr.ve[i].src_format);
+ if (svga->state.sw.ve_format[i] == SVGA3D_DECLTYPE_MAX) {
+ need_swvfetch = TRUE;
+ break;
+ }
+ }
+
+ if (need_swvfetch != svga->state.sw.need_swvfetch) {
+ svga->state.sw.need_swvfetch = need_swvfetch;
+ svga->dirty |= SVGA_NEW_NEED_SWVFETCH;
+ }
+
+ return 0;
+}
+
+struct svga_tracked_state svga_update_need_swvfetch =
+{
+ "update need_swvfetch",
+ ( SVGA_NEW_VELEMENT ),
+ update_need_swvfetch
+};
+
+
+/***********************************************************************
+ */
+
+static int update_need_pipeline( struct svga_context *svga,
+ unsigned dirty )
+{
+
+ boolean need_pipeline = FALSE;
+ struct svga_vertex_shader *vs = svga->curr.vs;
+
+ /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE
+ */
+ if (svga->curr.rast->need_pipeline & (1 << svga->curr.reduced_prim)) {
+ SVGA_DBG(DEBUG_SWTNL, "%s: rast need_pipeline (%d) & prim (%x)\n",
+ __FUNCTION__,
+ svga->curr.rast->need_pipeline,
+ (1 << svga->curr.reduced_prim) );
+ need_pipeline = TRUE;
+ }
+
+ /* EDGEFLAGS
+ */
+ if (vs->base.info.writes_edgeflag) {
+ SVGA_DBG(DEBUG_SWTNL, "%s: edgeflags\n", __FUNCTION__);
+ need_pipeline = TRUE;
+ }
+
+ /* SVGA_NEW_CLIP
+ */
+ if (!svga->curr.rast->templ.bypass_vs_clip_and_viewport &&
+ svga->curr.clip.nr) {
+ SVGA_DBG(DEBUG_SWTNL, "%s: userclip\n", __FUNCTION__);
+ need_pipeline = TRUE;
+ }
+
+ if (need_pipeline != svga->state.sw.need_pipeline) {
+ svga->state.sw.need_pipeline = need_pipeline;
+ svga->dirty |= SVGA_NEW_NEED_PIPELINE;
+ }
+
+ return 0;
+}
+
+
+struct svga_tracked_state svga_update_need_pipeline =
+{
+ "need pipeline",
+ (SVGA_NEW_RAST |
+ SVGA_NEW_CLIP |
+ SVGA_NEW_VS |
+ SVGA_NEW_REDUCED_PRIMITIVE),
+ update_need_pipeline
+};
+
+
+/***********************************************************************
+ */
+
+static int update_need_swtnl( struct svga_context *svga,
+ unsigned dirty )
+{
+ boolean need_swtnl;
+
+ if (svga->debug.no_swtnl) {
+ svga->state.sw.need_swvfetch = 0;
+ svga->state.sw.need_pipeline = 0;
+ }
+
+ need_swtnl = (svga->state.sw.need_swvfetch ||
+ svga->state.sw.need_pipeline);
+
+ if (svga->debug.force_swtnl) {
+ need_swtnl = 1;
+ }
+
+ if (need_swtnl != svga->state.sw.need_swtnl) {
+ SVGA_DBG(DEBUG_SWTNL|DEBUG_PERF,
+ "%s need_swvfetch: %s, need_pipeline %s\n",
+ __FUNCTION__,
+ svga->state.sw.need_swvfetch ? "true" : "false",
+ svga->state.sw.need_pipeline ? "true" : "false");
+
+ svga->state.sw.need_swtnl = need_swtnl;
+ svga->dirty |= SVGA_NEW_NEED_SWTNL;
+ svga->swtnl.new_vdecl = TRUE;
+ }
+
+ return 0;
+}
+
+
+struct svga_tracked_state svga_update_need_swtnl =
+{
+ "need swtnl",
+ (SVGA_NEW_NEED_PIPELINE |
+ SVGA_NEW_NEED_SWVFETCH),
+ update_need_swtnl
+};
diff --git a/src/gallium/drivers/svga/svga_state_rss.c b/src/gallium/drivers/svga/svga_state_rss.c
new file mode 100644
index 00000000000..8b6803a285a
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_rss.c
@@ -0,0 +1,268 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_cmd.h"
+
+#include "svga_hw_reg.h"
+
+
+
+struct rs_queue {
+ unsigned rs_count;
+ SVGA3dRenderState rs[SVGA3D_RS_MAX];
+};
+
+
+#define EMIT_RS(svga, value, token, fail) \
+do { \
+ if (svga->state.hw_draw.rs[SVGA3D_RS_##token] != value) { \
+ svga_queue_rs( &queue, SVGA3D_RS_##token, value ); \
+ svga->state.hw_draw.rs[SVGA3D_RS_##token] = value; \
+ } \
+} while (0)
+
+#define EMIT_RS_FLOAT(svga, fvalue, token, fail) \
+do { \
+ unsigned value = fui(fvalue); \
+ if (svga->state.hw_draw.rs[SVGA3D_RS_##token] != value) { \
+ svga_queue_rs( &queue, SVGA3D_RS_##token, value ); \
+ svga->state.hw_draw.rs[SVGA3D_RS_##token] = value; \
+ } \
+} while (0)
+
+
+static INLINE void
+svga_queue_rs( struct rs_queue *q,
+ unsigned rss,
+ unsigned value )
+{
+ q->rs[q->rs_count].state = rss;
+ q->rs[q->rs_count].uintValue = value;
+ q->rs_count++;
+}
+
+
+/* Compare old and new render states and emit differences between them
+ * to hardware. Simplest implementation would be to emit the whole of
+ * the "to" state.
+ */
+static int emit_rss( struct svga_context *svga,
+ unsigned dirty )
+{
+ struct rs_queue queue;
+
+ queue.rs_count = 0;
+
+ if (dirty & SVGA_NEW_BLEND) {
+ const struct svga_blend_state *curr = svga->curr.blend;
+
+ EMIT_RS( svga, curr->rt[0].writemask, COLORWRITEENABLE, fail );
+ EMIT_RS( svga, curr->rt[0].blend_enable, BLENDENABLE, fail );
+
+ if (curr->rt[0].blend_enable) {
+ EMIT_RS( svga, curr->rt[0].srcblend, SRCBLEND, fail );
+ EMIT_RS( svga, curr->rt[0].dstblend, DSTBLEND, fail );
+ EMIT_RS( svga, curr->rt[0].blendeq, BLENDEQUATION, fail );
+
+ EMIT_RS( svga, curr->rt[0].separate_alpha_blend_enable,
+ SEPARATEALPHABLENDENABLE, fail );
+
+ if (curr->rt[0].separate_alpha_blend_enable) {
+ EMIT_RS( svga, curr->rt[0].srcblend_alpha, SRCBLENDALPHA, fail );
+ EMIT_RS( svga, curr->rt[0].dstblend_alpha, DSTBLENDALPHA, fail );
+ EMIT_RS( svga, curr->rt[0].blendeq_alpha, BLENDEQUATIONALPHA, fail );
+ }
+ }
+ }
+
+
+ if (dirty & (SVGA_NEW_DEPTH_STENCIL | SVGA_NEW_RAST)) {
+ const struct svga_depth_stencil_state *curr = svga->curr.depth;
+ const struct svga_rasterizer_state *rast = svga->curr.rast;
+
+ if (!curr->stencil[0].enabled)
+ {
+ /* Stencil disabled
+ */
+ EMIT_RS( svga, FALSE, STENCILENABLE, fail );
+ EMIT_RS( svga, FALSE, STENCILENABLE2SIDED, fail );
+ }
+ else if (curr->stencil[0].enabled && !curr->stencil[1].enabled)
+ {
+ /* Regular stencil
+ */
+ EMIT_RS( svga, TRUE, STENCILENABLE, fail );
+ EMIT_RS( svga, FALSE, STENCILENABLE2SIDED, fail );
+
+ EMIT_RS( svga, curr->stencil[0].func, STENCILFUNC, fail );
+ EMIT_RS( svga, curr->stencil[0].fail, STENCILFAIL, fail );
+ EMIT_RS( svga, curr->stencil[0].zfail, STENCILZFAIL, fail );
+ EMIT_RS( svga, curr->stencil[0].pass, STENCILPASS, fail );
+
+ EMIT_RS( svga, curr->stencil_ref, STENCILREF, fail );
+ EMIT_RS( svga, curr->stencil_mask, STENCILMASK, fail );
+ EMIT_RS( svga, curr->stencil_writemask, STENCILWRITEMASK, fail );
+ }
+ else
+ {
+ int cw, ccw;
+
+ /* Hardware frontwinding is always CW, so if ours is also CW,
+ * then our definition of front face agrees with hardware.
+ * Otherwise need to flip.
+ */
+ if (rast->templ.front_winding == PIPE_WINDING_CW) {
+ cw = 0;
+ ccw = 1;
+ }
+ else {
+ cw = 1;
+ ccw = 0;
+ }
+
+ /* Twoside stencil
+ */
+ EMIT_RS( svga, TRUE, STENCILENABLE, fail );
+ EMIT_RS( svga, TRUE, STENCILENABLE2SIDED, fail );
+
+ EMIT_RS( svga, curr->stencil[cw].func, STENCILFUNC, fail );
+ EMIT_RS( svga, curr->stencil[cw].fail, STENCILFAIL, fail );
+ EMIT_RS( svga, curr->stencil[cw].zfail, STENCILZFAIL, fail );
+ EMIT_RS( svga, curr->stencil[cw].pass, STENCILPASS, fail );
+
+ EMIT_RS( svga, curr->stencil[ccw].func, CCWSTENCILFUNC, fail );
+ EMIT_RS( svga, curr->stencil[ccw].fail, CCWSTENCILFAIL, fail );
+ EMIT_RS( svga, curr->stencil[ccw].zfail, CCWSTENCILZFAIL, fail );
+ EMIT_RS( svga, curr->stencil[ccw].pass, CCWSTENCILPASS, fail );
+
+ EMIT_RS( svga, curr->stencil_ref, STENCILREF, fail );
+ EMIT_RS( svga, curr->stencil_mask, STENCILMASK, fail );
+ EMIT_RS( svga, curr->stencil_writemask, STENCILWRITEMASK, fail );
+ }
+
+ EMIT_RS( svga, curr->zenable, ZENABLE, fail );
+ if (curr->zenable) {
+ EMIT_RS( svga, curr->zfunc, ZFUNC, fail );
+ EMIT_RS( svga, curr->zwriteenable, ZWRITEENABLE, fail );
+ }
+
+ EMIT_RS( svga, curr->alphatestenable, ALPHATESTENABLE, fail );
+ if (curr->alphatestenable) {
+ EMIT_RS( svga, curr->alphafunc, ALPHAFUNC, fail );
+ EMIT_RS_FLOAT( svga, curr->alpharef, ALPHAREF, fail );
+ }
+ }
+
+
+ if (dirty & SVGA_NEW_RAST)
+ {
+ const struct svga_rasterizer_state *curr = svga->curr.rast;
+
+ /* Shademode: still need to rearrange index list to move
+ * flat-shading PV first vertex.
+ */
+ EMIT_RS( svga, curr->shademode, SHADEMODE, fail );
+ EMIT_RS( svga, curr->cullmode, CULLMODE, fail );
+ EMIT_RS( svga, curr->scissortestenable, SCISSORTESTENABLE, fail );
+ EMIT_RS( svga, curr->multisampleantialias, MULTISAMPLEANTIALIAS, fail );
+ EMIT_RS( svga, curr->lastpixel, LASTPIXEL, fail );
+ EMIT_RS( svga, curr->linepattern, LINEPATTERN, fail );
+ EMIT_RS_FLOAT( svga, curr->pointsize, POINTSIZE, fail );
+ EMIT_RS_FLOAT( svga, curr->pointsize_min, POINTSIZEMIN, fail );
+ EMIT_RS_FLOAT( svga, curr->pointsize_max, POINTSIZEMAX, fail );
+ }
+
+ if (dirty & (SVGA_NEW_RAST | SVGA_NEW_FRAME_BUFFER | SVGA_NEW_NEED_PIPELINE))
+ {
+ const struct svga_rasterizer_state *curr = svga->curr.rast;
+ float slope = 0.0;
+ float bias = 0.0;
+
+ /* Need to modify depth bias according to bound depthbuffer
+ * format. Don't do hardware depthbias while the software
+ * pipeline is active.
+ */
+ if (!svga->state.sw.need_pipeline &&
+ svga->curr.framebuffer.zsbuf)
+ {
+ slope = curr->slopescaledepthbias;
+ bias = svga->curr.depthscale * curr->depthbias;
+ }
+
+ EMIT_RS_FLOAT( svga, slope, SLOPESCALEDEPTHBIAS, fail );
+ EMIT_RS_FLOAT( svga, bias, DEPTHBIAS, fail );
+ }
+
+
+ if (queue.rs_count) {
+ SVGA3dRenderState *rs;
+
+ if (SVGA3D_BeginSetRenderState( svga->swc,
+ &rs,
+ queue.rs_count ) != PIPE_OK)
+ goto fail;
+
+ memcpy( rs,
+ queue.rs,
+ queue.rs_count * sizeof queue.rs[0]);
+
+ SVGA_FIFOCommitAll( svga->swc );
+ }
+
+ /* Also blend color:
+ */
+
+ return 0;
+
+fail:
+ /* XXX: need to poison cached hardware state on failure to ensure
+ * dirty state gets re-emitted. Fix this by re-instating partial
+ * FIFOCommit command and only updating cached hw state once the
+ * initial allocation has succeeded.
+ */
+ memset(svga->state.hw_draw.rs, 0xcd, sizeof(svga->state.hw_draw.rs));
+
+ return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+
+struct svga_tracked_state svga_hw_rss =
+{
+ "hw rss state",
+
+ (SVGA_NEW_BLEND |
+ SVGA_NEW_DEPTH_STENCIL |
+ SVGA_NEW_RAST |
+ SVGA_NEW_FRAME_BUFFER |
+ SVGA_NEW_NEED_PIPELINE),
+
+ emit_rss
+};
diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c
new file mode 100644
index 00000000000..b3137945202
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_tss.c
@@ -0,0 +1,279 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+
+#include "svga_screen_texture.h"
+#include "svga_winsys.h"
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_cmd.h"
+
+#include "svga_hw_reg.h"
+
+
+void svga_cleanup_tss_binding(struct svga_context *svga)
+{
+ int i;
+ unsigned count = MAX2( svga->curr.num_textures,
+ svga->state.hw_draw.num_views );
+
+ for (i = 0; i < count; i++) {
+ struct svga_hw_view_state *view = &svga->state.hw_draw.views[i];
+
+ svga_sampler_view_reference(&view->v, NULL);
+ pipe_texture_reference( &svga->curr.texture[i], NULL );
+ pipe_texture_reference( &view->texture, NULL );
+
+ view->dirty = 1;
+ }
+}
+
+
+static int
+update_tss_binding(struct svga_context *svga,
+ unsigned dirty )
+{
+ unsigned i;
+ unsigned count = MAX2( svga->curr.num_textures,
+ svga->state.hw_draw.num_views );
+ unsigned min_lod;
+ unsigned max_lod;
+
+
+ struct {
+ struct {
+ unsigned unit;
+ struct svga_hw_view_state *view;
+ } bind[PIPE_MAX_SAMPLERS];
+
+ unsigned bind_count;
+ } queue;
+
+ queue.bind_count = 0;
+
+ for (i = 0; i < count; i++) {
+ const struct svga_sampler_state *s = svga->curr.sampler[i];
+ struct svga_hw_view_state *view = &svga->state.hw_draw.views[i];
+
+ /* get min max lod */
+ if (svga->curr.texture[i]) {
+ min_lod = MAX2(s->view_min_lod, 0);
+ max_lod = MIN2(s->view_max_lod, svga->curr.texture[i]->last_level);
+ } else {
+ min_lod = 0;
+ max_lod = 0;
+ }
+
+ if (view->texture != svga->curr.texture[i] ||
+ view->min_lod != min_lod ||
+ view->max_lod != max_lod) {
+
+ svga_sampler_view_reference(&view->v, NULL);
+ pipe_texture_reference( &view->texture, svga->curr.texture[i] );
+
+ view->dirty = TRUE;
+ view->min_lod = min_lod;
+ view->max_lod = max_lod;
+
+ if (svga->curr.texture[i])
+ view->v = svga_get_tex_sampler_view(&svga->pipe,
+ svga->curr.texture[i],
+ min_lod,
+ max_lod);
+ }
+
+ if (view->dirty) {
+ queue.bind[queue.bind_count].unit = i;
+ queue.bind[queue.bind_count].view = view;
+ queue.bind_count++;
+ }
+ else if (view->v) {
+ svga_validate_sampler_view(svga, view->v);
+ }
+ }
+
+ svga->state.hw_draw.num_views = svga->curr.num_textures;
+
+ if (queue.bind_count) {
+ SVGA3dTextureState *ts;
+
+ if (SVGA3D_BeginSetTextureState( svga->swc,
+ &ts,
+ queue.bind_count ) != PIPE_OK)
+ goto fail;
+
+ for (i = 0; i < queue.bind_count; i++) {
+ ts[i].stage = queue.bind[i].unit;
+ ts[i].name = SVGA3D_TS_BIND_TEXTURE;
+
+ if (queue.bind[i].view->v) {
+ svga->swc->surface_relocation(svga->swc,
+ &ts[i].value,
+ queue.bind[i].view->v->handle,
+ PIPE_BUFFER_USAGE_GPU_READ);
+ }
+ else {
+ ts[i].value = SVGA3D_INVALID_ID;
+ }
+
+ queue.bind[i].view->dirty = FALSE;
+ }
+
+ SVGA_FIFOCommitAll( svga->swc );
+ }
+
+ return 0;
+
+fail:
+ return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+
+struct svga_tracked_state svga_hw_tss_binding = {
+ "texture binding emit",
+ SVGA_NEW_TEXTURE_BINDING |
+ SVGA_NEW_SAMPLER,
+ update_tss_binding
+};
+
+
+/***********************************************************************
+ */
+
+struct ts_queue {
+ unsigned ts_count;
+ SVGA3dTextureState ts[PIPE_MAX_SAMPLERS*SVGA3D_TS_MAX];
+};
+
+
+#define EMIT_TS(svga, unit, val, token, fail) \
+do { \
+ if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) { \
+ svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val ); \
+ svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val; \
+ } \
+} while (0)
+
+#define EMIT_TS_FLOAT(svga, unit, fvalue, token, fail) \
+do { \
+ unsigned val = fui(fvalue); \
+ if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) { \
+ svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val ); \
+ svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val; \
+ } \
+} while (0)
+
+
+static INLINE void
+svga_queue_tss( struct ts_queue *q,
+ unsigned unit,
+ unsigned tss,
+ unsigned value )
+{
+ assert(q->ts_count < sizeof(q->ts)/sizeof(q->ts[0]));
+ q->ts[q->ts_count].stage = unit;
+ q->ts[q->ts_count].name = tss;
+ q->ts[q->ts_count].value = value;
+ q->ts_count++;
+}
+
+
+static int
+update_tss(struct svga_context *svga,
+ unsigned dirty )
+{
+ unsigned i;
+ struct ts_queue queue;
+
+ queue.ts_count = 0;
+ for (i = 0; i < svga->curr.num_samplers; i++) {
+ if (svga->curr.sampler[i]) {
+ const struct svga_sampler_state *curr = svga->curr.sampler[i];
+
+ EMIT_TS(svga, i, curr->mipfilter, MIPFILTER, fail);
+ EMIT_TS(svga, i, curr->min_lod, TEXTURE_MIPMAP_LEVEL, fail);
+ EMIT_TS(svga, i, curr->magfilter, MAGFILTER, fail);
+ EMIT_TS(svga, i, curr->minfilter, MINFILTER, fail);
+ EMIT_TS(svga, i, curr->aniso_level, TEXTURE_ANISOTROPIC_LEVEL, fail);
+ EMIT_TS_FLOAT(svga, i, curr->lod_bias, TEXTURE_LOD_BIAS, fail);
+ EMIT_TS(svga, i, curr->addressu, ADDRESSU, fail);
+ EMIT_TS(svga, i, curr->addressw, ADDRESSW, fail);
+ EMIT_TS(svga, i, curr->bordercolor, BORDERCOLOR, fail);
+ // TEXCOORDINDEX -- hopefully not needed
+
+ if (svga->curr.tex_flags.flag_1d & (1 << i)) {
+ debug_printf("wrap 1d tex %d\n", i);
+ EMIT_TS(svga, i, SVGA3D_TEX_ADDRESS_WRAP, ADDRESSV, fail);
+ }
+ else
+ EMIT_TS(svga, i, curr->addressv, ADDRESSV, fail);
+
+ if (svga->curr.tex_flags.flag_srgb & (1 << i))
+ EMIT_TS_FLOAT(svga, i, 2.2f, GAMMA, fail);
+ else
+ EMIT_TS_FLOAT(svga, i, 1.0f, GAMMA, fail);
+
+ }
+ }
+
+ if (queue.ts_count) {
+ SVGA3dTextureState *ts;
+
+ if (SVGA3D_BeginSetTextureState( svga->swc,
+ &ts,
+ queue.ts_count ) != PIPE_OK)
+ goto fail;
+
+ memcpy( ts,
+ queue.ts,
+ queue.ts_count * sizeof queue.ts[0]);
+
+ SVGA_FIFOCommitAll( svga->swc );
+ }
+
+ return 0;
+
+fail:
+ /* XXX: need to poison cached hardware state on failure to ensure
+ * dirty state gets re-emitted. Fix this by re-instating partial
+ * FIFOCommit command and only updating cached hw state once the
+ * initial allocation has succeeded.
+ */
+ memset(svga->state.hw_draw.ts, 0xcd, sizeof(svga->state.hw_draw.ts));
+
+ return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+
+struct svga_tracked_state svga_hw_tss = {
+ "texture state emit",
+ (SVGA_NEW_SAMPLER |
+ SVGA_NEW_TEXTURE_FLAGS),
+ update_tss
+};
+
diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c
new file mode 100644
index 00000000000..c534308f503
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_vdecl.c
@@ -0,0 +1,182 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_upload_mgr.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_draw.h"
+#include "svga_tgsi.h"
+#include "svga_screen.h"
+#include "svga_screen_buffer.h"
+
+#include "svga_hw_reg.h"
+
+
+static int
+upload_user_buffers( struct svga_context *svga )
+{
+ enum pipe_error ret = PIPE_OK;
+ int i;
+ int nr;
+
+ if (0)
+ debug_printf("%s: %d\n", __FUNCTION__, svga->curr.num_vertex_buffers);
+
+ nr = svga->curr.num_vertex_buffers;
+
+ for (i = 0; i < nr; i++)
+ {
+ if (svga_buffer_is_user_buffer(svga->curr.vb[i].buffer))
+ {
+ struct pipe_buffer *upload_buffer = NULL;
+ unsigned offset = /*svga->curr.vb[i].buffer_offset*/ 0;
+ unsigned size = svga->curr.vb[i].buffer->size /*- offset*/;
+ unsigned upload_offset;
+
+ ret = u_upload_buffer( svga->upload_vb,
+ offset,
+ size,
+ svga->curr.vb[i].buffer,
+ &upload_offset,
+ &upload_buffer );
+ if (ret)
+ return ret;
+
+ if (0)
+ debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sz %d\n",
+ __FUNCTION__,
+ i,
+ svga->curr.vb[i].buffer,
+ upload_buffer, upload_offset, size);
+
+ /* Make sure we release the old buffer and end up with the
+ * correct refcount on the uploaded buffer.
+ */
+ pipe_buffer_reference( &svga->curr.vb[i].buffer, NULL );
+ svga->curr.vb[i].buffer = upload_buffer;
+ svga->curr.vb[i].buffer_offset = upload_offset;
+ }
+ }
+
+ if (0)
+ debug_printf("%s: DONE\n", __FUNCTION__);
+
+ return ret;
+}
+
+
+/***********************************************************************
+ */
+
+
+static int emit_hw_vs_vdecl( struct svga_context *svga,
+ unsigned dirty )
+{
+ const struct pipe_vertex_element *ve = svga->curr.ve;
+ SVGA3dVertexDecl decl;
+ unsigned i;
+
+ assert(svga->curr.num_vertex_elements >=
+ svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]);
+
+ svga_hwtnl_reset_vdecl( svga->hwtnl,
+ svga->curr.num_vertex_elements );
+
+ for (i = 0; i < svga->curr.num_vertex_elements; i++) {
+ const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index];
+ unsigned usage, index;
+
+
+ svga_generate_vdecl_semantics( i, &usage, &index );
+
+ /* SVGA_NEW_VELEMENT
+ */
+ decl.identity.type = svga->state.sw.ve_format[i];
+ decl.identity.method = SVGA3D_DECLMETHOD_DEFAULT;
+ decl.identity.usage = usage;
+ decl.identity.usageIndex = index;
+ decl.array.stride = vb->stride;
+ decl.array.offset = (vb->buffer_offset +
+ ve[i].src_offset);
+
+ svga_hwtnl_vdecl( svga->hwtnl,
+ i,
+ &decl,
+ vb->buffer );
+ }
+
+ return 0;
+}
+
+
+static int emit_hw_vdecl( struct svga_context *svga,
+ unsigned dirty )
+{
+ int ret = 0;
+
+ /* SVGA_NEW_NEED_SWTNL
+ */
+ if (svga->state.sw.need_swtnl)
+ return 0; /* Do not emit during swtnl */
+
+ /* If we get to here, we know that we're going to draw. Upload
+ * userbuffers now and try to combine multiple userbuffers from
+ * multiple draw calls into a single host buffer for performance.
+ */
+ if (svga->curr.any_user_vertex_buffers &&
+ SVGA_COMBINE_USERBUFFERS)
+ {
+ ret = upload_user_buffers( svga );
+ if (ret)
+ return ret;
+
+ svga->curr.any_user_vertex_buffers = FALSE;
+ }
+
+ return emit_hw_vs_vdecl( svga, dirty );
+}
+
+
+struct svga_tracked_state svga_hw_vdecl =
+{
+ "hw vertex decl state (hwtnl version)",
+ ( SVGA_NEW_NEED_SWTNL |
+ SVGA_NEW_VELEMENT |
+ SVGA_NEW_VBUFFER |
+ SVGA_NEW_RAST |
+ SVGA_NEW_FS |
+ SVGA_NEW_VS ),
+ emit_hw_vdecl
+};
+
+
+
+
+
+
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
new file mode 100644
index 00000000000..ae1e77e7d44
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -0,0 +1,246 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_bitmask.h"
+#include "translate/translate.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_cmd.h"
+#include "svga_tgsi.h"
+
+#include "svga_hw_reg.h"
+
+/***********************************************************************
+ */
+
+
+static INLINE int compare_vs_keys( const struct svga_vs_compile_key *a,
+ const struct svga_vs_compile_key *b )
+{
+ unsigned keysize = svga_vs_key_size( a );
+ return memcmp( a, b, keysize );
+}
+
+
+static struct svga_shader_result *search_vs_key( struct svga_vertex_shader *vs,
+ const struct svga_vs_compile_key *key )
+{
+ struct svga_shader_result *result = vs->base.results;
+
+ assert(key);
+
+ for ( ; result; result = result->next) {
+ if (compare_vs_keys( key, &result->key.vkey ) == 0)
+ return result;
+ }
+
+ return NULL;
+}
+
+
+static enum pipe_error compile_vs( struct svga_context *svga,
+ struct svga_vertex_shader *vs,
+ const struct svga_vs_compile_key *key,
+ struct svga_shader_result **out_result )
+{
+ struct svga_shader_result *result;
+ enum pipe_error ret = PIPE_OK;
+
+ result = svga_translate_vertex_program( vs, key );
+ if (result == NULL) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
+ goto fail;
+ }
+
+ result->id = util_bitmask_add(svga->vs_bm);
+ if(result->id == UTIL_BITMASK_INVALID_INDEX)
+ goto fail;
+
+ ret = SVGA3D_DefineShader(svga->swc,
+ result->id,
+ SVGA3D_SHADERTYPE_VS,
+ result->tokens,
+ result->nr_tokens * sizeof result->tokens[0]);
+ if (ret)
+ goto fail;
+
+ *out_result = result;
+ result->next = vs->base.results;
+ vs->base.results = result;
+ return PIPE_OK;
+
+fail:
+ if (result) {
+ if (result->id != UTIL_BITMASK_INVALID_INDEX)
+ util_bitmask_clear( svga->vs_bm, result->id );
+ svga_destroy_shader_result( result );
+ }
+ return ret;
+}
+
+/* SVGA_NEW_PRESCALE, SVGA_NEW_RAST, SVGA_NEW_ZERO_STRIDE
+ */
+static int make_vs_key( struct svga_context *svga,
+ struct svga_vs_compile_key *key )
+{
+ memset(key, 0, sizeof *key);
+ key->need_prescale = svga->state.hw_clear.prescale.enabled;
+ key->allow_psiz = svga->curr.rast->templ.point_size_per_vertex;
+ key->zero_stride_vertex_elements =
+ svga->curr.zero_stride_vertex_elements;
+ key->num_zero_stride_vertex_elements =
+ svga->curr.num_zero_stride_vertex_elements;
+ return 0;
+}
+
+
+
+static int emit_hw_vs( struct svga_context *svga,
+ unsigned dirty )
+{
+ struct svga_shader_result *result = NULL;
+ unsigned id = SVGA3D_INVALID_ID;
+ int ret = 0;
+
+ /* SVGA_NEW_NEED_SWTNL */
+ if (!svga->state.sw.need_swtnl) {
+ struct svga_vertex_shader *vs = svga->curr.vs;
+ struct svga_vs_compile_key key;
+
+ ret = make_vs_key( svga, &key );
+ if (ret)
+ return ret;
+
+ result = search_vs_key( vs, &key );
+ if (!result) {
+ ret = compile_vs( svga, vs, &key, &result );
+ if (ret)
+ return ret;
+ }
+
+ assert (result);
+ id = result->id;
+ }
+
+ if (result != svga->state.hw_draw.vs) {
+ ret = SVGA3D_SetShader(svga->swc,
+ SVGA3D_SHADERTYPE_VS,
+ id );
+ if (ret)
+ return ret;
+
+ svga->dirty |= SVGA_NEW_VS_RESULT;
+ svga->state.hw_draw.vs = result;
+ }
+
+ return 0;
+}
+
+struct svga_tracked_state svga_hw_vs =
+{
+ "vertex shader (hwtnl)",
+ (SVGA_NEW_VS |
+ SVGA_NEW_PRESCALE |
+ SVGA_NEW_NEED_SWTNL |
+ SVGA_NEW_ZERO_STRIDE),
+ emit_hw_vs
+};
+
+
+/***********************************************************************
+ */
+static int update_zero_stride( struct svga_context *svga,
+ unsigned dirty )
+{
+ unsigned i;
+
+ svga->curr.zero_stride_vertex_elements = 0;
+ svga->curr.num_zero_stride_vertex_elements = 0;
+
+ for (i = 0; i < svga->curr.num_vertex_elements; i++) {
+ const struct pipe_vertex_element *vel = &svga->curr.ve[i];
+ const struct pipe_vertex_buffer *vbuffer = &svga->curr.vb[
+ vel->vertex_buffer_index];
+ if (vbuffer->stride == 0) {
+ unsigned const_idx =
+ svga->curr.num_zero_stride_vertex_elements;
+ struct translate *translate;
+ struct translate_key key;
+ void *mapped_buffer;
+
+ svga->curr.zero_stride_vertex_elements |= (1 << i);
+ ++svga->curr.num_zero_stride_vertex_elements;
+
+ key.output_stride = 4 * sizeof(float);
+ key.nr_elements = 1;
+ key.element[0].input_format = vel->src_format;
+ key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ key.element[0].input_buffer = vel->vertex_buffer_index;
+ key.element[0].input_offset = vel->src_offset;
+ key.element[0].output_offset = const_idx * 4 * sizeof(float);
+
+ translate_key_sanitize(&key);
+ /* translate_generic_create is technically private but
+ * we don't want to code-generate, just want generic
+ * translation */
+ translate = translate_generic_create(&key);
+
+ assert(vel->src_offset == 0);
+
+ mapped_buffer = pipe_buffer_map_range(svga->pipe.screen,
+ vbuffer->buffer,
+ vel->src_offset,
+ util_format_get_blocksize(vel->src_format),
+ PIPE_BUFFER_USAGE_CPU_READ);
+ translate->set_buffer(translate, vel->vertex_buffer_index,
+ mapped_buffer,
+ vbuffer->stride);
+ translate->run(translate, 0, 1,
+ svga->curr.zero_stride_constants);
+
+ pipe_buffer_unmap(svga->pipe.screen,
+ vbuffer->buffer);
+ translate->release(translate);
+ }
+ }
+
+ if (svga->curr.num_zero_stride_vertex_elements)
+ svga->dirty |= SVGA_NEW_ZERO_STRIDE;
+
+ return 0;
+}
+
+struct svga_tracked_state svga_hw_update_zero_stride =
+{
+ "update zero_stride",
+ ( SVGA_NEW_VELEMENT |
+ SVGA_NEW_VBUFFER ),
+ update_zero_stride
+};
diff --git a/src/gallium/drivers/svga/svga_swtnl.h b/src/gallium/drivers/svga/svga_swtnl.h
new file mode 100644
index 00000000000..4882f26b170
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_swtnl.h
@@ -0,0 +1,52 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_SWTNL_H
+#define SVGA_SWTNL_H
+
+#include "pipe/p_compiler.h"
+
+struct svga_context;
+struct pipe_context;
+struct pipe_buffer;
+struct vbuf_render;
+
+
+boolean svga_init_swtnl( struct svga_context *svga );
+void svga_destroy_swtnl( struct svga_context *svga );
+
+
+enum pipe_error
+svga_swtnl_draw_range_elements(struct svga_context *svga,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned min_index,
+ unsigned max_index,
+ unsigned prim,
+ unsigned start,
+ unsigned count);
+
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c
new file mode 100644
index 00000000000..b4f757a47a9
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_swtnl_backend.c
@@ -0,0 +1,349 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "draw/draw_vbuf.h"
+#include "draw/draw_context.h"
+#include "draw/draw_vertex.h"
+
+#include "util/u_debug.h"
+#include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_simple_shaders.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_swtnl.h"
+
+#include "svga_types.h"
+#include "svga_reg.h"
+#include "svga3d_reg.h"
+#include "svga_draw.h"
+#include "svga_swtnl_private.h"
+
+
+static const struct vertex_info *
+svga_vbuf_render_get_vertex_info( struct vbuf_render *render )
+{
+ struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+ struct svga_context *svga = svga_render->svga;
+
+ svga_swtnl_update_vdecl(svga);
+
+ return &svga_render->vertex_info;
+}
+
+
+static boolean
+svga_vbuf_render_allocate_vertices( struct vbuf_render *render,
+ ushort vertex_size,
+ ushort nr_vertices )
+{
+ struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+ struct svga_context *svga = svga_render->svga;
+ struct pipe_screen *screen = svga->pipe.screen;
+ size_t size = (size_t)nr_vertices * (size_t)vertex_size;
+ boolean new_vbuf = FALSE;
+ boolean new_ibuf = FALSE;
+
+ if (svga_render->vertex_size != vertex_size)
+ svga->swtnl.new_vdecl = TRUE;
+ svga_render->vertex_size = (size_t)vertex_size;
+
+ if (svga->swtnl.new_vbuf)
+ new_ibuf = new_vbuf = TRUE;
+ svga->swtnl.new_vbuf = FALSE;
+
+ if (svga_render->vbuf_size < svga_render->vbuf_offset + svga_render->vbuf_used + size)
+ new_vbuf = TRUE;
+
+ if (new_vbuf)
+ pipe_buffer_reference(&svga_render->vbuf, NULL);
+ if (new_ibuf)
+ pipe_buffer_reference(&svga_render->ibuf, NULL);
+
+ if (!svga_render->vbuf) {
+ svga_render->vbuf_size = MAX2(size, svga_render->vbuf_alloc_size);
+ svga_render->vbuf = pipe_buffer_create(screen,
+ 0,
+ PIPE_BUFFER_USAGE_VERTEX,
+ svga_render->vbuf_size);
+ if(!svga_render->vbuf) {
+ svga_context_flush(svga, NULL);
+ svga_render->vbuf = pipe_buffer_create(screen,
+ 0,
+ PIPE_BUFFER_USAGE_VERTEX,
+ svga_render->vbuf_size);
+ assert(svga_render->vbuf);
+ }
+
+ svga->swtnl.new_vdecl = TRUE;
+ svga_render->vbuf_offset = 0;
+ } else {
+ svga_render->vbuf_offset += svga_render->vbuf_used;
+ }
+
+ svga_render->vbuf_used = 0;
+
+ if (svga->swtnl.new_vdecl)
+ svga_render->vdecl_offset = svga_render->vbuf_offset;
+
+ return TRUE;
+}
+
+static void *
+svga_vbuf_render_map_vertices( struct vbuf_render *render )
+{
+ struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+ struct svga_context *svga = svga_render->svga;
+ struct pipe_screen *screen = svga->pipe.screen;
+
+ char *ptr = (char*)pipe_buffer_map(screen,
+ svga_render->vbuf,
+ PIPE_BUFFER_USAGE_CPU_WRITE |
+ PIPE_BUFFER_USAGE_FLUSH_EXPLICIT);
+ return ptr + svga_render->vbuf_offset;
+}
+
+static void
+svga_vbuf_render_unmap_vertices( struct vbuf_render *render,
+ ushort min_index,
+ ushort max_index )
+{
+ struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+ struct svga_context *svga = svga_render->svga;
+ struct pipe_screen *screen = svga->pipe.screen;
+ unsigned offset, length;
+ size_t used = svga_render->vertex_size * ((size_t)max_index + 1);
+
+ offset = svga_render->vbuf_offset + svga_render->vertex_size * min_index;
+ length = svga_render->vertex_size * (max_index + 1 - min_index);
+ pipe_buffer_flush_mapped_range(screen, svga_render->vbuf, offset, length);
+ pipe_buffer_unmap(screen, svga_render->vbuf);
+ svga_render->min_index = min_index;
+ svga_render->max_index = max_index;
+ svga_render->vbuf_used = MAX2(svga_render->vbuf_used, used);
+}
+
+static boolean
+svga_vbuf_render_set_primitive( struct vbuf_render *render,
+ unsigned prim )
+{
+ struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+ svga_render->prim = prim;
+
+ return TRUE;
+}
+
+static void
+svga_vbuf_sumbit_state( struct svga_vbuf_render *svga_render )
+{
+ struct svga_context *svga = svga_render->svga;
+ SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
+ enum pipe_error ret;
+ int i;
+
+ /* if the vdecl or vbuf hasn't changed do nothing */
+ if (!svga->swtnl.new_vdecl)
+ return;
+
+ memcpy(vdecl, svga_render->vdecl, sizeof(vdecl));
+
+ /* flush the hw state */
+ ret = svga_hwtnl_flush(svga->hwtnl);
+ if (ret) {
+ svga_context_flush(svga, NULL);
+ ret = svga_hwtnl_flush(svga->hwtnl);
+ /* if we hit this path we might become synced with hw */
+ svga->swtnl.new_vbuf = TRUE;
+ assert(ret == 0);
+ }
+
+ svga_hwtnl_reset_vdecl(svga->hwtnl, svga_render->vdecl_count);
+
+ for (i = 0; i < svga_render->vdecl_count; i++) {
+ vdecl[i].array.offset += svga_render->vdecl_offset;
+
+ svga_hwtnl_vdecl( svga->hwtnl,
+ i,
+ &vdecl[i],
+ svga_render->vbuf );
+ }
+
+ /* We have already taken care of flatshading, so let the hwtnl
+ * module use whatever is most convenient:
+ */
+ if (svga->state.sw.need_pipeline) {
+ svga_hwtnl_set_flatshade(svga->hwtnl, FALSE, FALSE);
+ svga_hwtnl_set_unfilled(svga->hwtnl, PIPE_POLYGON_MODE_FILL);
+ }
+ else {
+ svga_hwtnl_set_flatshade( svga->hwtnl,
+ svga->curr.rast->templ.flatshade,
+ svga->curr.rast->templ.flatshade_first );
+
+ svga_hwtnl_set_unfilled( svga->hwtnl,
+ svga->curr.rast->hw_unfilled );
+ }
+
+ svga->swtnl.new_vdecl = FALSE;
+}
+
+static void
+svga_vbuf_render_draw_arrays( struct vbuf_render *render,
+ unsigned start,
+ uint nr )
+{
+ struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+ struct svga_context *svga = svga_render->svga;
+ unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
+ enum pipe_error ret = 0;
+
+ svga_vbuf_sumbit_state(svga_render);
+
+ /* Need to call update_state() again as the draw module may have
+ * altered some of our state behind our backs. Testcase:
+ * redbook/polys.c
+ */
+ svga_update_state_retry( svga, SVGA_STATE_HW_DRAW );
+
+ ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr);
+ svga->swtnl.new_vbuf = TRUE;
+ assert(ret == PIPE_OK);
+ }
+}
+
+
+static void
+svga_vbuf_render_draw( struct vbuf_render *render,
+ const ushort *indices,
+ uint nr_indices)
+{
+ struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+ struct svga_context *svga = svga_render->svga;
+ struct pipe_screen *screen = svga->pipe.screen;
+ unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
+ boolean ret;
+ size_t size = 2 * nr_indices;
+
+ assert(( svga_render->vbuf_offset - svga_render->vdecl_offset) % svga_render->vertex_size == 0);
+
+ if (svga_render->ibuf_size < svga_render->ibuf_offset + size)
+ pipe_buffer_reference(&svga_render->ibuf, NULL);
+
+ if (!svga_render->ibuf) {
+ svga_render->ibuf_size = MAX2(size, svga_render->ibuf_alloc_size);
+ svga_render->ibuf = pipe_buffer_create(screen,
+ 0,
+ PIPE_BUFFER_USAGE_VERTEX,
+ svga_render->ibuf_size);
+ svga_render->ibuf_offset = 0;
+ }
+
+ pipe_buffer_write(screen, svga_render->ibuf,
+ svga_render->ibuf_offset, 2 * nr_indices, indices);
+
+
+ /* off to hardware */
+ svga_vbuf_sumbit_state(svga_render);
+
+ /* Need to call update_state() again as the draw module may have
+ * altered some of our state behind our backs. Testcase:
+ * redbook/polys.c
+ */
+ svga_update_state_retry( svga, SVGA_STATE_HW_DRAW );
+
+ ret = svga_hwtnl_draw_range_elements(svga->hwtnl,
+ svga_render->ibuf,
+ 2,
+ svga_render->min_index,
+ svga_render->max_index,
+ svga_render->prim,
+ svga_render->ibuf_offset / 2, nr_indices, bias);
+ if(ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = svga_hwtnl_draw_range_elements(svga->hwtnl,
+ svga_render->ibuf,
+ 2,
+ svga_render->min_index,
+ svga_render->max_index,
+ svga_render->prim,
+ svga_render->ibuf_offset / 2, nr_indices, bias);
+ svga->swtnl.new_vbuf = TRUE;
+ assert(ret == PIPE_OK);
+ }
+
+ svga_render->ibuf_offset += size;
+}
+
+
+static void
+svga_vbuf_render_release_vertices( struct vbuf_render *render )
+{
+
+}
+
+
+static void
+svga_vbuf_render_destroy( struct vbuf_render *render )
+{
+ struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+
+ pipe_buffer_reference(&svga_render->vbuf, NULL);
+ pipe_buffer_reference(&svga_render->ibuf, NULL);
+ FREE(svga_render);
+}
+
+
+/**
+ * Create a new primitive render.
+ */
+struct vbuf_render *
+svga_vbuf_render_create( struct svga_context *svga )
+{
+ struct svga_vbuf_render *svga_render = CALLOC_STRUCT(svga_vbuf_render);
+
+ svga_render->svga = svga;
+ svga_render->ibuf_size = 0;
+ svga_render->vbuf_size = 0;
+ svga_render->ibuf_alloc_size = 4*1024;
+ svga_render->vbuf_alloc_size = 64*1024;
+ svga_render->base.max_vertex_buffer_bytes = 64*1024/10;
+ svga_render->base.max_indices = 65536;
+ svga_render->base.get_vertex_info = svga_vbuf_render_get_vertex_info;
+ svga_render->base.allocate_vertices = svga_vbuf_render_allocate_vertices;
+ svga_render->base.map_vertices = svga_vbuf_render_map_vertices;
+ svga_render->base.unmap_vertices = svga_vbuf_render_unmap_vertices;
+ svga_render->base.set_primitive = svga_vbuf_render_set_primitive;
+ svga_render->base.draw = svga_vbuf_render_draw;
+ svga_render->base.draw_arrays = svga_vbuf_render_draw_arrays;
+ svga_render->base.release_vertices = svga_vbuf_render_release_vertices;
+ svga_render->base.destroy = svga_vbuf_render_destroy;
+
+ return &svga_render->base;
+}
diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c
new file mode 100644
index 00000000000..7655121bec1
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_swtnl_draw.c
@@ -0,0 +1,170 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_state.h"
+#include "util/u_memory.h"
+
+#include "svga_context.h"
+#include "svga_swtnl.h"
+#include "svga_state.h"
+#include "svga_swtnl_private.h"
+
+
+
+enum pipe_error
+svga_swtnl_draw_range_elements(struct svga_context *svga,
+ struct pipe_buffer *indexBuffer,
+ unsigned indexSize,
+ unsigned min_index,
+ unsigned max_index,
+ unsigned prim, unsigned start, unsigned count)
+{
+ struct draw_context *draw = svga->swtnl.draw;
+ unsigned i;
+ const void *map;
+ enum pipe_error ret;
+
+ assert(!svga->dirty);
+ assert(svga->state.sw.need_swtnl);
+ assert(draw);
+
+ ret = svga_update_state(svga, SVGA_STATE_SWTNL_DRAW);
+ if (ret) {
+ svga_context_flush(svga, NULL);
+ ret = svga_update_state(svga, SVGA_STATE_SWTNL_DRAW);
+ svga->swtnl.new_vbuf = TRUE;
+ assert(ret == PIPE_OK);
+ }
+
+ /*
+ * Map vertex buffers
+ */
+ for (i = 0; i < svga->curr.num_vertex_buffers; i++) {
+ map = pipe_buffer_map(svga->pipe.screen,
+ svga->curr.vb[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+
+ draw_set_mapped_vertex_buffer(draw, i, map);
+ }
+
+ /* Map index buffer, if present */
+ if (indexBuffer) {
+ map = pipe_buffer_map(svga->pipe.screen, indexBuffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+
+ draw_set_mapped_element_buffer_range(draw,
+ indexSize,
+ min_index,
+ max_index,
+ map);
+ }
+
+ if (svga->curr.cb[PIPE_SHADER_VERTEX]) {
+ map = pipe_buffer_map(svga->pipe.screen,
+ svga->curr.cb[PIPE_SHADER_VERTEX],
+ PIPE_BUFFER_USAGE_CPU_READ);
+ assert(map);
+ draw_set_mapped_constant_buffer(
+ draw, PIPE_SHADER_VERTEX,
+ map,
+ svga->curr.cb[PIPE_SHADER_VERTEX]->size);
+ }
+
+ draw_arrays(svga->swtnl.draw, prim, start, count);
+
+ draw_flush(svga->swtnl.draw);
+
+ /* Ensure the draw module didn't touch this */
+ assert(i == svga->curr.num_vertex_buffers);
+
+ /*
+ * unmap vertex/index buffers
+ */
+ for (i = 0; i < svga->curr.num_vertex_buffers; i++) {
+ pipe_buffer_unmap(svga->pipe.screen, svga->curr.vb[i].buffer);
+ draw_set_mapped_vertex_buffer(draw, i, NULL);
+ }
+
+ if (indexBuffer) {
+ pipe_buffer_unmap(svga->pipe.screen, indexBuffer);
+ draw_set_mapped_element_buffer(draw, 0, NULL);
+ }
+
+ if (svga->curr.cb[PIPE_SHADER_VERTEX]) {
+ pipe_buffer_unmap(svga->pipe.screen,
+ svga->curr.cb[PIPE_SHADER_VERTEX]);
+ }
+
+ return ret;
+}
+
+
+
+
+boolean svga_init_swtnl( struct svga_context *svga )
+{
+ svga->swtnl.backend = svga_vbuf_render_create(svga);
+ if(!svga->swtnl.backend)
+ goto fail;
+
+ /*
+ * Create drawing context and plug our rendering stage into it.
+ */
+ svga->swtnl.draw = draw_create();
+ if (svga->swtnl.draw == NULL)
+ goto fail;
+
+
+ draw_set_rasterize_stage(svga->swtnl.draw,
+ draw_vbuf_stage( svga->swtnl.draw, svga->swtnl.backend ));
+
+ draw_set_render(svga->swtnl.draw, svga->swtnl.backend);
+
+ draw_install_aaline_stage(svga->swtnl.draw, &svga->pipe);
+ draw_install_aapoint_stage(svga->swtnl.draw, &svga->pipe);
+ draw_install_pstipple_stage(svga->swtnl.draw, &svga->pipe);
+
+ draw_set_driver_clipping(svga->swtnl.draw, debug_get_bool_option("SVGA_SWTNL_FSE", FALSE));
+
+ return TRUE;
+
+fail:
+ if (svga->swtnl.backend)
+ svga->swtnl.backend->destroy( svga->swtnl.backend );
+
+ if (svga->swtnl.draw)
+ draw_destroy( svga->swtnl.draw );
+
+ return FALSE;
+}
+
+
+void svga_destroy_swtnl( struct svga_context *svga )
+{
+ draw_destroy( svga->swtnl.draw );
+}
diff --git a/src/gallium/drivers/svga/svga_swtnl_private.h b/src/gallium/drivers/svga/svga_swtnl_private.h
new file mode 100644
index 00000000000..9bbb42910f5
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_swtnl_private.h
@@ -0,0 +1,93 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_SWTNL_PRIVATE_H
+#define SVGA_SWTNL_PRIVATE_H
+
+#include "svga_swtnl.h"
+#include "draw/draw_vertex.h"
+
+#include "svga_types.h"
+#include "svga3d_reg.h"
+
+/**
+ * Primitive renderer for svga.
+ */
+struct svga_vbuf_render {
+ struct vbuf_render base;
+
+ struct svga_context *svga;
+ struct vertex_info vertex_info;
+
+ unsigned vertex_size;
+
+ unsigned prim;
+
+ struct pipe_buffer *vbuf;
+ struct pipe_buffer *ibuf;
+
+ /* current size of buffer */
+ size_t vbuf_size;
+ size_t ibuf_size;
+
+ /* size of that the buffer should be */
+ size_t vbuf_alloc_size;
+ size_t ibuf_alloc_size;
+
+ /* current write place */
+ size_t vbuf_offset;
+ size_t ibuf_offset;
+
+ /* currently used */
+ size_t vbuf_used;
+
+ SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
+ unsigned vdecl_offset;
+ unsigned vdecl_count;
+
+ ushort min_index;
+ ushort max_index;
+};
+
+/**
+ * Basically a cast wrapper.
+ */
+static INLINE struct svga_vbuf_render *
+svga_vbuf_render( struct vbuf_render *render )
+{
+ assert(render);
+ return (struct svga_vbuf_render *)render;
+}
+
+
+struct vbuf_render *
+svga_vbuf_render_create( struct svga_context *svga );
+
+
+int
+svga_swtnl_update_vdecl( struct svga_context *svga );
+
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c
new file mode 100644
index 00000000000..94b6ccc62dd
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_swtnl_state.c
@@ -0,0 +1,237 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_state.h"
+#include "util/u_memory.h"
+
+#include "svga_context.h"
+#include "svga_swtnl.h"
+#include "svga_state.h"
+
+#include "svga_swtnl_private.h"
+
+
+#define SVGA_POINT_ADJ_X -0.375
+#define SVGA_POINT_ADJ_Y -0.5
+
+#define SVGA_LINE_ADJ_X -0.5
+#define SVGA_LINE_ADJ_Y -0.5
+
+#define SVGA_TRIANGLE_ADJ_X -0.375
+#define SVGA_TRIANGLE_ADJ_Y -0.5
+
+
+static void set_draw_viewport( struct svga_context *svga )
+{
+ struct pipe_viewport_state vp = svga->curr.viewport;
+ float adjx = 0;
+ float adjy = 0;
+
+ switch (svga->curr.reduced_prim) {
+ case PIPE_PRIM_POINTS:
+ adjx = SVGA_POINT_ADJ_X;
+ adjy = SVGA_POINT_ADJ_Y;
+ break;
+ case PIPE_PRIM_LINES:
+ /* XXX: This is to compensate for the fact that wide lines are
+ * going to be drawn with triangles, but we're not catching all
+ * cases where that will happen.
+ */
+ if (svga->curr.rast->templ.line_width > 1.0)
+ {
+ adjx = SVGA_LINE_ADJ_X + 0.175;
+ adjy = SVGA_LINE_ADJ_Y - 0.175;
+ }
+ else {
+ adjx = SVGA_LINE_ADJ_X;
+ adjy = SVGA_LINE_ADJ_Y;
+ }
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ adjx += SVGA_TRIANGLE_ADJ_X;
+ adjy += SVGA_TRIANGLE_ADJ_Y;
+ break;
+ }
+
+ vp.translate[0] += adjx;
+ vp.translate[1] += adjy;
+
+ draw_set_viewport_state(svga->swtnl.draw, &vp);
+}
+
+static int update_swtnl_draw( struct svga_context *svga,
+ unsigned dirty )
+{
+ draw_flush( svga->swtnl.draw );
+
+ if (dirty & SVGA_NEW_VS)
+ draw_bind_vertex_shader(svga->swtnl.draw,
+ svga->curr.vs->draw_shader);
+
+ if (dirty & SVGA_NEW_VBUFFER)
+ draw_set_vertex_buffers(svga->swtnl.draw,
+ svga->curr.num_vertex_buffers,
+ svga->curr.vb);
+
+ if (dirty & SVGA_NEW_VELEMENT)
+ draw_set_vertex_elements(svga->swtnl.draw,
+ svga->curr.num_vertex_elements,
+ svga->curr.ve );
+
+ if (dirty & SVGA_NEW_CLIP)
+ draw_set_clip_state(svga->swtnl.draw,
+ &svga->curr.clip);
+
+ if (dirty & (SVGA_NEW_VIEWPORT |
+ SVGA_NEW_REDUCED_PRIMITIVE |
+ SVGA_NEW_RAST))
+ set_draw_viewport( svga );
+
+ if (dirty & SVGA_NEW_RAST)
+ draw_set_rasterizer_state(svga->swtnl.draw,
+ &svga->curr.rast->templ);
+
+ if (dirty & SVGA_NEW_FRAME_BUFFER)
+ draw_set_mrd(svga->swtnl.draw,
+ svga->curr.depthscale);
+
+ return 0;
+}
+
+
+struct svga_tracked_state svga_update_swtnl_draw =
+{
+ "update draw module state",
+ (SVGA_NEW_VS |
+ SVGA_NEW_VBUFFER |
+ SVGA_NEW_VELEMENT |
+ SVGA_NEW_CLIP |
+ SVGA_NEW_VIEWPORT |
+ SVGA_NEW_RAST |
+ SVGA_NEW_FRAME_BUFFER |
+ SVGA_NEW_REDUCED_PRIMITIVE),
+ update_swtnl_draw
+};
+
+
+int svga_swtnl_update_vdecl( struct svga_context *svga )
+{
+ struct svga_vbuf_render *svga_render = svga_vbuf_render(svga->swtnl.backend);
+ struct draw_context *draw = svga->swtnl.draw;
+ struct vertex_info *vinfo = &svga_render->vertex_info;
+ SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
+ const enum interp_mode colorInterp =
+ svga->curr.rast->templ.flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
+ const struct svga_fragment_shader *fs = svga->curr.fs;
+ int offset = 0;
+ int nr_decls = 0;
+ int src, i;
+
+ memset(vinfo, 0, sizeof(*vinfo));
+ memset(vdecl, 0, sizeof(vdecl));
+
+ /* always add position */
+ src = draw_find_shader_output(draw, TGSI_SEMANTIC_POSITION, 0);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src);
+ vinfo->attrib[0].emit = EMIT_4F;
+ vdecl[0].array.offset = offset;
+ vdecl[0].identity.type = SVGA3D_DECLTYPE_FLOAT4;
+ vdecl[0].identity.usage = SVGA3D_DECLUSAGE_POSITIONT;
+ vdecl[0].identity.usageIndex = 0;
+ offset += 16;
+ nr_decls++;
+
+ for (i = 0; i < fs->base.info.num_inputs; i++) {
+ unsigned name = fs->base.info.input_semantic_name[i];
+ unsigned index = fs->base.info.input_semantic_index[i];
+ src = draw_find_shader_output(draw, name, index);
+ vdecl[nr_decls].array.offset = offset;
+ vdecl[nr_decls].identity.usageIndex = fs->base.info.input_semantic_index[i];
+
+ switch (name) {
+ case TGSI_SEMANTIC_COLOR:
+ draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
+ vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_COLOR;
+ vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT4;
+ offset += 16;
+ nr_decls++;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_TEXCOORD;
+ vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT4;
+ vdecl[nr_decls].identity.usageIndex += 1;
+ offset += 16;
+ nr_decls++;
+ break;
+ case TGSI_SEMANTIC_FOG:
+ draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
+ vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_TEXCOORD;
+ vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT1;
+ assert(vdecl[nr_decls].identity.usageIndex == 0);
+ offset += 4;
+ nr_decls++;
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ /* generated internally, not a vertex shader output */
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ draw_compute_vertex_size(vinfo);
+
+ svga_render->vdecl_count = nr_decls;
+ for (i = 0; i < svga_render->vdecl_count; i++)
+ vdecl[i].array.stride = offset;
+
+ if (memcmp(svga_render->vdecl, vdecl, sizeof(vdecl)) == 0)
+ return 0;
+
+ memcpy(svga_render->vdecl, vdecl, sizeof(vdecl));
+ svga->swtnl.new_vdecl = TRUE;
+
+ return 0;
+}
+
+
+static int update_swtnl_vdecl( struct svga_context *svga,
+ unsigned dirty )
+{
+ return svga_swtnl_update_vdecl( svga );
+}
+
+
+struct svga_tracked_state svga_update_swtnl_vdecl =
+{
+ "update draw module vdecl",
+ (SVGA_NEW_VS |
+ SVGA_NEW_FS),
+ update_swtnl_vdecl
+};
diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c
new file mode 100644
index 00000000000..0cd620189b7
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi.c
@@ -0,0 +1,282 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_defines.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_scan.h"
+#include "util/u_memory.h"
+#include "util/u_bitmask.h"
+
+#include "svgadump/svga_shader_dump.h"
+
+#include "svga_context.h"
+#include "svga_tgsi.h"
+#include "svga_tgsi_emit.h"
+#include "svga_debug.h"
+
+#include "svga_hw_reg.h"
+#include "svga3d_shaderdefs.h"
+
+
+/* Sinkhole used only in error conditions.
+ */
+static char err_buf[128];
+
+#if 0
+static void svga_destroy_shader_emitter( struct svga_shader_emitter *emit )
+{
+ if (emit->buf != err_buf)
+ FREE(emit->buf);
+}
+#endif
+
+
+static boolean svga_shader_expand( struct svga_shader_emitter *emit )
+{
+ char *new_buf;
+ unsigned newsize = emit->size * 2;
+
+ if(emit->buf != err_buf)
+ new_buf = REALLOC(emit->buf, emit->size, newsize);
+ else
+ new_buf = NULL;
+
+ if (new_buf == NULL) {
+ emit->ptr = err_buf;
+ emit->buf = err_buf;
+ emit->size = sizeof(err_buf);
+ return FALSE;
+ }
+
+ emit->size = newsize;
+ emit->ptr = new_buf + (emit->ptr - emit->buf);
+ emit->buf = new_buf;
+ return TRUE;
+}
+
+static INLINE boolean reserve( struct svga_shader_emitter *emit,
+ unsigned nr_dwords )
+{
+ if (emit->ptr - emit->buf + nr_dwords * sizeof(unsigned) >= emit->size) {
+ if (!svga_shader_expand( emit ))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+boolean svga_shader_emit_dword( struct svga_shader_emitter *emit,
+ unsigned dword )
+{
+ if (!reserve(emit, 1))
+ return FALSE;
+
+ *(unsigned *)emit->ptr = dword;
+ emit->ptr += sizeof dword;
+ return TRUE;
+}
+
+boolean svga_shader_emit_dwords( struct svga_shader_emitter *emit,
+ const unsigned *dwords,
+ unsigned nr )
+{
+ if (!reserve(emit, nr))
+ return FALSE;
+
+ memcpy( emit->ptr, dwords, nr * sizeof *dwords );
+ emit->ptr += nr * sizeof *dwords;
+ return TRUE;
+}
+
+boolean svga_shader_emit_opcode( struct svga_shader_emitter *emit,
+ unsigned opcode )
+{
+ SVGA3dShaderInstToken *here;
+
+ if (!reserve(emit, 1))
+ return FALSE;
+
+ here = (SVGA3dShaderInstToken *)emit->ptr;
+ here->value = opcode;
+
+ if (emit->insn_offset) {
+ SVGA3dShaderInstToken *prev = (SVGA3dShaderInstToken *)(emit->buf +
+ emit->insn_offset);
+ prev->size = (here - prev) - 1;
+ }
+
+ emit->insn_offset = emit->ptr - emit->buf;
+ emit->ptr += sizeof(unsigned);
+ return TRUE;
+}
+
+#define SVGA3D_PS_2X (SVGA3D_PS_20 | 1)
+#define SVGA3D_VS_2X (SVGA3D_VS_20 | 1)
+
+static boolean svga_shader_emit_header( struct svga_shader_emitter *emit )
+{
+ SVGA3dShaderVersion header;
+
+ memset( &header, 0, sizeof header );
+
+ switch (emit->unit) {
+ case PIPE_SHADER_FRAGMENT:
+ header.value = emit->use_sm30 ? SVGA3D_PS_30 : SVGA3D_PS_2X;
+ break;
+ case PIPE_SHADER_VERTEX:
+ header.value = emit->use_sm30 ? SVGA3D_VS_30 : SVGA3D_VS_2X;
+ break;
+ }
+
+ return svga_shader_emit_dword( emit, header.value );
+}
+
+
+
+
+
+/* Parse TGSI shader and translate to SVGA/DX9 serialized
+ * representation.
+ *
+ * In this function SVGA shader is emitted to an in-memory buffer that
+ * can be dynamically grown. Once we've finished and know how large
+ * it is, it will be copied to a hardware buffer for upload.
+ */
+static struct svga_shader_result *
+svga_tgsi_translate( const struct svga_shader *shader,
+ union svga_compile_key key,
+ unsigned unit )
+{
+ struct svga_shader_result *result = NULL;
+ struct svga_shader_emitter emit;
+ int ret = 0;
+
+ memset(&emit, 0, sizeof(emit));
+
+ emit.use_sm30 = shader->use_sm30;
+ emit.size = 1024;
+ emit.buf = MALLOC(emit.size);
+ if (emit.buf == NULL) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
+ goto fail;
+ }
+
+ emit.ptr = emit.buf;
+ emit.unit = unit;
+ emit.key = key;
+
+ tgsi_scan_shader( shader->tokens, &emit.info);
+
+ emit.imm_start = emit.info.file_max[TGSI_FILE_CONSTANT] + 1;
+
+ if (unit == PIPE_SHADER_FRAGMENT)
+ emit.imm_start += key.fkey.num_unnormalized_coords;
+
+ if (unit == PIPE_SHADER_VERTEX) {
+ emit.imm_start += key.vkey.need_prescale ? 2 : 0;
+ emit.imm_start += key.vkey.num_zero_stride_vertex_elements;
+ }
+
+ emit.nr_hw_const = (emit.imm_start + emit.info.file_max[TGSI_FILE_IMMEDIATE] + 1);
+
+ emit.nr_hw_temp = emit.info.file_max[TGSI_FILE_TEMPORARY] + 1;
+ emit.in_main_func = TRUE;
+
+ if (!svga_shader_emit_header( &emit ))
+ goto fail;
+
+ if (!svga_shader_emit_instructions( &emit, shader->tokens ))
+ goto fail;
+
+ result = CALLOC_STRUCT(svga_shader_result);
+ if (result == NULL)
+ goto fail;
+
+ result->shader = shader;
+ result->tokens = (const unsigned *)emit.buf;
+ result->nr_tokens = (emit.ptr - emit.buf) / sizeof(unsigned);
+ memcpy(&result->key, &key, sizeof key);
+ result->id = UTIL_BITMASK_INVALID_INDEX;
+
+ if (SVGA_DEBUG & DEBUG_TGSI)
+ {
+ debug_printf( "#####################################\n" );
+ debug_printf( "Shader %u below\n", shader->id );
+ tgsi_dump( shader->tokens, 0 );
+ if (SVGA_DEBUG & DEBUG_TGSI) {
+ debug_printf( "Shader %u compiled below\n", shader->id );
+ svga_shader_dump( result->tokens,
+ result->nr_tokens ,
+ FALSE );
+ }
+ debug_printf( "#####################################\n" );
+ }
+
+ return result;
+
+fail:
+ FREE(result);
+ FREE(emit.buf);
+ return NULL;
+}
+
+
+
+
+struct svga_shader_result *
+svga_translate_fragment_program( const struct svga_fragment_shader *fs,
+ const struct svga_fs_compile_key *fkey )
+{
+ union svga_compile_key key;
+ memcpy(&key.fkey, fkey, sizeof *fkey);
+
+ return svga_tgsi_translate( &fs->base,
+ key,
+ PIPE_SHADER_FRAGMENT );
+}
+
+struct svga_shader_result *
+svga_translate_vertex_program( const struct svga_vertex_shader *vs,
+ const struct svga_vs_compile_key *vkey )
+{
+ union svga_compile_key key;
+ memcpy(&key.vkey, vkey, sizeof *vkey);
+
+ return svga_tgsi_translate( &vs->base,
+ key,
+ PIPE_SHADER_VERTEX );
+}
+
+
+void svga_destroy_shader_result( struct svga_shader_result *result )
+{
+ FREE((unsigned *)result->tokens);
+ FREE(result);
+}
+
diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h
new file mode 100644
index 00000000000..737a2213af5
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi.h
@@ -0,0 +1,136 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_TGSI_H
+#define SVGA_TGSI_H
+
+#include "pipe/p_state.h"
+
+#include "svga_hw_reg.h"
+
+struct svga_fragment_shader;
+struct svga_vertex_shader;
+struct svga_shader;
+struct tgsi_shader_info;
+struct tgsi_token;
+
+
+struct svga_vs_compile_key
+{
+ unsigned zero_stride_vertex_elements;
+ unsigned need_prescale:1;
+ unsigned allow_psiz:1;
+ unsigned num_zero_stride_vertex_elements:6;
+};
+
+struct svga_fs_compile_key
+{
+ unsigned light_twoside:1;
+ unsigned front_cw:1;
+ unsigned num_textures:8;
+ unsigned num_unnormalized_coords:8;
+ struct {
+ unsigned compare_mode:1;
+ unsigned compare_func:3;
+ unsigned unnormalized:1;
+ unsigned width_height_idx:7;
+ unsigned texture_target:8;
+ } tex[PIPE_MAX_SAMPLERS];
+};
+
+union svga_compile_key {
+ struct svga_vs_compile_key vkey;
+ struct svga_fs_compile_key fkey;
+};
+
+struct svga_shader_result
+{
+ const struct svga_shader *shader;
+
+ /* Parameters used to generate this compilation result:
+ */
+ union svga_compile_key key;
+
+ /* Compiled shader tokens:
+ */
+ const unsigned *tokens;
+ unsigned nr_tokens;
+
+ /* SVGA Shader ID:
+ */
+ unsigned id;
+
+ /* Next compilation result:
+ */
+ struct svga_shader_result *next;
+};
+
+
+/* TGSI doesn't provide use with VS input semantics (they're actually
+ * pretty meaningless), so we just generate some plausible ones here.
+ * This is called both from within the TGSI translator and when
+ * building vdecls to ensure they match up.
+ *
+ * The real use of this information is matching vertex elements to
+ * fragment shader inputs in the case where vertex shader is disabled.
+ */
+static INLINE void svga_generate_vdecl_semantics( unsigned idx,
+ unsigned *usage,
+ unsigned *usage_index )
+{
+ if (idx == 0) {
+ *usage = SVGA3D_DECLUSAGE_POSITION;
+ *usage_index = 0;
+ }
+ else {
+ *usage = SVGA3D_DECLUSAGE_TEXCOORD;
+ *usage_index = idx - 1;
+ }
+}
+
+
+
+static INLINE unsigned svga_vs_key_size( const struct svga_vs_compile_key *key )
+{
+ return sizeof *key;
+}
+
+static INLINE unsigned svga_fs_key_size( const struct svga_fs_compile_key *key )
+{
+ return (const char *)&key->tex[key->num_textures] - (const char *)key;
+}
+
+struct svga_shader_result *
+svga_translate_fragment_program( const struct svga_fragment_shader *fs,
+ const struct svga_fs_compile_key *fkey );
+
+struct svga_shader_result *
+svga_translate_vertex_program( const struct svga_vertex_shader *fs,
+ const struct svga_vs_compile_key *vkey );
+
+
+void svga_destroy_shader_result( struct svga_shader_result *result );
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c
new file mode 100644
index 00000000000..23b3ace7f30
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c
@@ -0,0 +1,280 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/u_memory.h"
+
+#include "svga_tgsi_emit.h"
+#include "svga_context.h"
+
+
+
+
+static boolean ps20_input( struct svga_shader_emitter *emit,
+ struct tgsi_declaration_semantic semantic,
+ unsigned idx )
+{
+ struct src_register reg;
+ SVGA3DOpDclArgs dcl;
+ SVGA3dShaderInstToken opcode;
+
+ opcode = inst_token( SVGA3DOP_DCL );
+ dcl.values[0] = 0;
+ dcl.values[1] = 0;
+
+ switch (semantic.Name) {
+ case TGSI_SEMANTIC_POSITION:
+ /* Special case:
+ */
+ reg = src_register( SVGA3DREG_MISCTYPE,
+ SVGA3DMISCREG_POSITION );
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ reg = src_register( SVGA3DREG_INPUT,
+ semantic.Index );
+ break;
+ case TGSI_SEMANTIC_FOG:
+ assert(semantic.Index == 0);
+ reg = src_register( SVGA3DREG_TEXTURE, 0 );
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ reg = src_register( SVGA3DREG_TEXTURE,
+ semantic.Index + 1 );
+ break;
+ default:
+ assert(0);
+ return TRUE;
+ }
+
+ emit->input_map[idx] = reg;
+
+ dcl.dst = dst( reg );
+
+ dcl.usage = 0;
+ dcl.index = 0;
+
+ dcl.values[0] |= 1<<31;
+
+ return (emit_instruction(emit, opcode) &&
+ svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+}
+
+
+static boolean ps20_output( struct svga_shader_emitter *emit,
+ struct tgsi_declaration_semantic semantic,
+ unsigned idx )
+{
+ SVGA3dShaderDestToken reg;
+
+ switch (semantic.Name) {
+ case TGSI_SEMANTIC_COLOR:
+ if (semantic.Index < PIPE_MAX_COLOR_BUFS) {
+ unsigned cbuf = semantic.Index;
+
+ emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+ emit->nr_hw_temp++ );
+ emit->temp_col[cbuf] = emit->output_map[idx];
+ emit->true_col[cbuf] = dst_register( SVGA3DREG_COLOROUT,
+ semantic.Index );
+ }
+ else {
+ assert(0);
+ reg = dst_register( SVGA3DREG_COLOROUT, 0 );
+ }
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+ emit->nr_hw_temp++ );
+ emit->temp_pos = emit->output_map[idx];
+ emit->true_pos = dst_register( SVGA3DREG_DEPTHOUT,
+ semantic.Index );
+ break;
+ default:
+ assert(0);
+ reg = dst_register( SVGA3DREG_COLOROUT, 0 );
+ break;
+ }
+
+ return TRUE;
+}
+
+
+static boolean vs20_input( struct svga_shader_emitter *emit,
+ struct tgsi_declaration_semantic semantic,
+ unsigned idx )
+{
+ SVGA3DOpDclArgs dcl;
+ SVGA3dShaderInstToken opcode;
+
+ opcode = inst_token( SVGA3DOP_DCL );
+ dcl.values[0] = 0;
+ dcl.values[1] = 0;
+
+ emit->input_map[idx] = src_register( SVGA3DREG_INPUT, idx );
+ dcl.dst = dst_register( SVGA3DREG_INPUT, idx );
+
+ assert(dcl.dst.reserved0);
+
+ /* Mesa doesn't provide use with VS input semantics (they're
+ * actually pretty meaningless), so we just generate some plausible
+ * ones here. This has to match what we declare in the vdecl code
+ * in svga_pipe_vertex.c.
+ */
+ if (idx == 0) {
+ dcl.usage = SVGA3D_DECLUSAGE_POSITION;
+ dcl.index = 0;
+ }
+ else {
+ dcl.usage = SVGA3D_DECLUSAGE_TEXCOORD;
+ dcl.index = idx - 1;
+ }
+
+ dcl.values[0] |= 1<<31;
+
+ return (emit_instruction(emit, opcode) &&
+ svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+}
+
+
+static boolean vs20_output( struct svga_shader_emitter *emit,
+ struct tgsi_declaration_semantic semantic,
+ unsigned idx )
+{
+ /* Don't emit dcl instruction for vs20 inputs
+ */
+
+ /* Just build the register map table:
+ */
+ switch (semantic.Name) {
+ case TGSI_SEMANTIC_POSITION:
+ assert(semantic.Index == 0);
+ emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+ emit->nr_hw_temp++ );
+ emit->temp_pos = emit->output_map[idx];
+ emit->true_pos = dst_register( SVGA3DREG_RASTOUT,
+ SVGA3DRASTOUT_POSITION);
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ assert(semantic.Index == 0);
+ emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+ emit->nr_hw_temp++ );
+ emit->temp_psiz = emit->output_map[idx];
+ emit->true_psiz = dst_register( SVGA3DREG_RASTOUT,
+ SVGA3DRASTOUT_PSIZE );
+ break;
+ case TGSI_SEMANTIC_FOG:
+ assert(semantic.Index == 0);
+ emit->output_map[idx] = dst_register( SVGA3DREG_TEXCRDOUT, 0 );
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ /* oD0 */
+ emit->output_map[idx] = dst_register( SVGA3DREG_ATTROUT,
+ semantic.Index );
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ emit->output_map[idx] = dst_register( SVGA3DREG_TEXCRDOUT,
+ semantic.Index + 1 );
+ break;
+ default:
+ assert(0);
+ emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, 0 );
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean ps20_sampler( struct svga_shader_emitter *emit,
+ struct tgsi_declaration_semantic semantic,
+ unsigned idx )
+{
+ SVGA3DOpDclArgs dcl;
+ SVGA3dShaderInstToken opcode;
+
+ opcode = inst_token( SVGA3DOP_DCL );
+ dcl.values[0] = 0;
+ dcl.values[1] = 0;
+
+ dcl.dst = dst_register( SVGA3DREG_SAMPLER, idx );
+ dcl.type = svga_tgsi_sampler_type( emit, idx );
+
+ return (emit_instruction(emit, opcode) &&
+ svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+}
+
+
+boolean svga_translate_decl_sm20( struct svga_shader_emitter *emit,
+ const struct tgsi_full_declaration *decl )
+{
+ unsigned first = decl->Range.First;
+ unsigned last = decl->Range.Last;
+ unsigned semantic = 0;
+ unsigned semantic_idx = 0;
+ unsigned idx;
+
+ if (decl->Declaration.Semantic) {
+ semantic = decl->Semantic.Name;
+ semantic_idx = decl->Semantic.Index;
+ }
+
+ for( idx = first; idx <= last; idx++ ) {
+ boolean ok;
+
+ switch (decl->Declaration.File) {
+ case TGSI_FILE_SAMPLER:
+ assert (emit->unit == PIPE_SHADER_FRAGMENT);
+ ok = ps20_sampler( emit, decl->Semantic, idx );
+ break;
+
+ case TGSI_FILE_INPUT:
+ if (emit->unit == PIPE_SHADER_VERTEX)
+ ok = vs20_input( emit, decl->Semantic, idx );
+ else
+ ok = ps20_input( emit, decl->Semantic, idx );
+ break;
+
+ case TGSI_FILE_OUTPUT:
+ if (emit->unit == PIPE_SHADER_VERTEX)
+ ok = vs20_output( emit, decl->Semantic, idx );
+ else
+ ok = ps20_output( emit, decl->Semantic, idx );
+ break;
+
+ default:
+ /* don't need to declare other vars */
+ ok = TRUE;
+ }
+
+ if (!ok)
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+
diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
new file mode 100644
index 00000000000..d1c7336dec4
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
@@ -0,0 +1,385 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/u_memory.h"
+
+#include "svga_tgsi_emit.h"
+#include "svga_context.h"
+
+static boolean translate_vs_ps_semantic( struct tgsi_declaration_semantic semantic,
+ unsigned *usage,
+ unsigned *idx )
+{
+ switch (semantic.Name) {
+ case TGSI_SEMANTIC_POSITION:
+ *idx = semantic.Index;
+ *usage = SVGA3D_DECLUSAGE_POSITION;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+
+ *idx = semantic.Index;
+ *usage = SVGA3D_DECLUSAGE_COLOR;
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ *idx = semantic.Index + 2; /* sharing with COLOR */
+ *usage = SVGA3D_DECLUSAGE_COLOR;
+ break;
+ case TGSI_SEMANTIC_FOG:
+ *idx = 0;
+ assert(semantic.Index == 0);
+ *usage = SVGA3D_DECLUSAGE_TEXCOORD;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ *idx = semantic.Index;
+ *usage = SVGA3D_DECLUSAGE_PSIZE;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ *idx = semantic.Index + 1; /* texcoord[0] is reserved for fog */
+ *usage = SVGA3D_DECLUSAGE_TEXCOORD;
+ break;
+ case TGSI_SEMANTIC_NORMAL:
+ *idx = semantic.Index;
+ *usage = SVGA3D_DECLUSAGE_NORMAL;
+ break;
+ default:
+ assert(0);
+ *usage = SVGA3D_DECLUSAGE_TEXCOORD;
+ *idx = 0;
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+static boolean emit_decl( struct svga_shader_emitter *emit,
+ SVGA3dShaderDestToken reg,
+ unsigned usage,
+ unsigned index )
+{
+ SVGA3DOpDclArgs dcl;
+ SVGA3dShaderInstToken opcode;
+
+ opcode = inst_token( SVGA3DOP_DCL );
+ dcl.values[0] = 0;
+ dcl.values[1] = 0;
+
+ dcl.dst = reg;
+ dcl.usage = usage;
+ dcl.index = index;
+ dcl.values[0] |= 1<<31;
+
+ return (emit_instruction(emit, opcode) &&
+ svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+}
+
+static boolean emit_vface_decl( struct svga_shader_emitter *emit )
+{
+ if (!emit->emitted_vface) {
+ SVGA3dShaderDestToken reg =
+ dst_register( SVGA3DREG_MISCTYPE,
+ SVGA3DMISCREG_FACE );
+
+ if (!emit_decl( emit, reg, 0, 0 ))
+ return FALSE;
+
+ emit->emitted_vface = TRUE;
+ }
+ return TRUE;
+}
+
+static boolean ps30_input( struct svga_shader_emitter *emit,
+ struct tgsi_declaration_semantic semantic,
+ unsigned idx )
+{
+ unsigned usage, index;
+ SVGA3dShaderDestToken reg;
+
+ if (semantic.Name == TGSI_SEMANTIC_POSITION) {
+ emit->input_map[idx] = src_register( SVGA3DREG_MISCTYPE,
+ SVGA3DMISCREG_POSITION );
+
+ emit->input_map[idx].base.swizzle = TRANSLATE_SWIZZLE( TGSI_SWIZZLE_X,
+ TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_Y );
+
+ reg = writemask( dst(emit->input_map[idx]),
+ TGSI_WRITEMASK_XY );
+
+ return emit_decl( emit, reg, 0, 0 );
+ }
+ else if (emit->key.fkey.light_twoside &&
+ (semantic.Name == TGSI_SEMANTIC_COLOR)) {
+
+ if (!translate_vs_ps_semantic( semantic, &usage, &index ))
+ return FALSE;
+
+ emit->internal_color_idx[emit->internal_color_count] = idx;
+ emit->input_map[idx] = src_register( SVGA3DREG_INPUT, emit->ps30_input_count );
+ emit->ps30_input_count++;
+ emit->internal_color_count++;
+
+ reg = dst( emit->input_map[idx] );
+
+ if (!emit_decl( emit, reg, usage, index ))
+ return FALSE;
+
+ semantic.Name = TGSI_SEMANTIC_BCOLOR;
+ if (!translate_vs_ps_semantic( semantic, &usage, &index ))
+ return FALSE;
+
+ reg = dst_register( SVGA3DREG_INPUT, emit->ps30_input_count++ );
+
+ if (!emit_decl( emit, reg, usage, index ))
+ return FALSE;
+
+ if (!emit_vface_decl( emit ))
+ return FALSE;
+
+ return TRUE;
+ }
+ else if (semantic.Name == TGSI_SEMANTIC_FACE) {
+ if (!emit_vface_decl( emit ))
+ return FALSE;
+ emit->emit_frontface = TRUE;
+ emit->internal_frontface_idx = idx;
+ return TRUE;
+ }
+ else {
+
+ if (!translate_vs_ps_semantic( semantic, &usage, &index ))
+ return FALSE;
+
+ emit->input_map[idx] = src_register( SVGA3DREG_INPUT, emit->ps30_input_count++ );
+ reg = dst( emit->input_map[idx] );
+
+ return emit_decl( emit, reg, usage, index );
+ }
+
+}
+
+
+/* PS output registers are the same as 2.0
+ */
+static boolean ps30_output( struct svga_shader_emitter *emit,
+ struct tgsi_declaration_semantic semantic,
+ unsigned idx )
+{
+ SVGA3dShaderDestToken reg;
+
+ switch (semantic.Name) {
+ case TGSI_SEMANTIC_COLOR:
+ emit->output_map[idx] = dst_register( SVGA3DREG_COLOROUT,
+ semantic.Index );
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+ emit->nr_hw_temp++ );
+ emit->temp_pos = emit->output_map[idx];
+ emit->true_pos = dst_register( SVGA3DREG_DEPTHOUT,
+ semantic.Index );
+ break;
+ default:
+ assert(0);
+ reg = dst_register( SVGA3DREG_COLOROUT, 0 );
+ break;
+ }
+
+ return TRUE;
+}
+
+
+/* We still make up the input semantics the same as in 2.0
+ */
+static boolean vs30_input( struct svga_shader_emitter *emit,
+ struct tgsi_declaration_semantic semantic,
+ unsigned idx )
+{
+ SVGA3DOpDclArgs dcl;
+ SVGA3dShaderInstToken opcode;
+ unsigned usage, index;
+
+ opcode = inst_token( SVGA3DOP_DCL );
+ dcl.values[0] = 0;
+ dcl.values[1] = 0;
+
+ if (emit->key.vkey.zero_stride_vertex_elements & (1 << idx)) {
+ unsigned i;
+ unsigned offset = 0;
+ unsigned start_idx = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
+ /* adjust for prescale constants */
+ start_idx += emit->key.vkey.need_prescale ? 2 : 0;
+ /* compute the offset from the start of zero stride constants */
+ for (i = 0; i < PIPE_MAX_ATTRIBS && i < idx; ++i) {
+ if (emit->key.vkey.zero_stride_vertex_elements & (1<<i))
+ ++offset;
+ }
+ emit->input_map[idx] = src_register( SVGA3DREG_CONST,
+ start_idx + offset );
+ } else {
+ emit->input_map[idx] = src_register( SVGA3DREG_INPUT, idx );
+ dcl.dst = dst_register( SVGA3DREG_INPUT, idx );
+
+ assert(dcl.dst.reserved0);
+
+ svga_generate_vdecl_semantics( idx, &usage, &index );
+
+ dcl.usage = usage;
+ dcl.index = index;
+ dcl.values[0] |= 1<<31;
+
+ return (emit_instruction(emit, opcode) &&
+ svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+ }
+ return TRUE;
+}
+
+/* VS3.0 outputs have proper declarations and semantic info for
+ * matching against PS inputs.
+ */
+static boolean vs30_output( struct svga_shader_emitter *emit,
+ struct tgsi_declaration_semantic semantic,
+ unsigned idx )
+{
+ SVGA3DOpDclArgs dcl;
+ SVGA3dShaderInstToken opcode;
+ unsigned usage, index;
+
+ opcode = inst_token( SVGA3DOP_DCL );
+ dcl.values[0] = 0;
+ dcl.values[1] = 0;
+
+ if (!translate_vs_ps_semantic( semantic, &usage, &index ))
+ return FALSE;
+
+ dcl.dst = dst_register( SVGA3DREG_OUTPUT, idx );
+ dcl.usage = usage;
+ dcl.index = index;
+ dcl.values[0] |= 1<<31;
+
+ if (semantic.Name == TGSI_SEMANTIC_POSITION) {
+ assert(idx == 0);
+ emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+ emit->nr_hw_temp++ );
+ emit->temp_pos = emit->output_map[idx];
+ emit->true_pos = dcl.dst;
+ }
+ else if (semantic.Name == TGSI_SEMANTIC_PSIZE) {
+ emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+ emit->nr_hw_temp++ );
+ emit->temp_psiz = emit->output_map[idx];
+
+ /* This has the effect of not declaring psiz (below) and not
+ * emitting the final MOV to true_psiz in the postamble.
+ */
+ if (!emit->key.vkey.allow_psiz)
+ return TRUE;
+
+ emit->true_psiz = dcl.dst;
+ }
+ else {
+ emit->output_map[idx] = dcl.dst;
+ }
+
+
+ return (emit_instruction(emit, opcode) &&
+ svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+}
+
+static boolean ps30_sampler( struct svga_shader_emitter *emit,
+ struct tgsi_declaration_semantic semantic,
+ unsigned idx )
+{
+ SVGA3DOpDclArgs dcl;
+ SVGA3dShaderInstToken opcode;
+
+ opcode = inst_token( SVGA3DOP_DCL );
+ dcl.values[0] = 0;
+ dcl.values[1] = 0;
+
+ dcl.dst = dst_register( SVGA3DREG_SAMPLER, idx );
+ dcl.type = svga_tgsi_sampler_type( emit, idx );
+ dcl.values[0] |= 1<<31;
+
+ return (emit_instruction(emit, opcode) &&
+ svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+}
+
+
+boolean svga_translate_decl_sm30( struct svga_shader_emitter *emit,
+ const struct tgsi_full_declaration *decl )
+{
+ unsigned first = decl->Range.First;
+ unsigned last = decl->Range.Last;
+ unsigned semantic = 0;
+ unsigned semantic_idx = 0;
+ unsigned idx;
+
+ if (decl->Declaration.Semantic) {
+ semantic = decl->Semantic.Name;
+ semantic_idx = decl->Semantic.Index;
+ }
+
+ for( idx = first; idx <= last; idx++ ) {
+ boolean ok;
+
+ switch (decl->Declaration.File) {
+ case TGSI_FILE_SAMPLER:
+ assert (emit->unit == PIPE_SHADER_FRAGMENT);
+ ok = ps30_sampler( emit, decl->Semantic, idx );
+ break;
+
+ case TGSI_FILE_INPUT:
+ if (emit->unit == PIPE_SHADER_VERTEX)
+ ok = vs30_input( emit, decl->Semantic, idx );
+ else
+ ok = ps30_input( emit, decl->Semantic, idx );
+ break;
+
+ case TGSI_FILE_OUTPUT:
+ if (emit->unit == PIPE_SHADER_VERTEX)
+ ok = vs30_output( emit, decl->Semantic, idx );
+ else
+ ok = ps30_output( emit, decl->Semantic, idx );
+ break;
+
+ default:
+ /* don't need to declare other vars */
+ ok = TRUE;
+ }
+
+ if (!ok)
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+
diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h
new file mode 100644
index 00000000000..2557824293e
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi_emit.h
@@ -0,0 +1,345 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_TGSI_EMIT_H
+#define SVGA_TGSI_EMIT_H
+
+#include "tgsi/tgsi_scan.h"
+#include "svga_hw_reg.h"
+#include "svga_tgsi.h"
+#include "svga3d_shaderdefs.h"
+
+struct src_register
+{
+ SVGA3dShaderSrcToken base;
+ SVGA3dShaderSrcToken indirect;
+};
+
+
+struct svga_arl_consts {
+ int number;
+ int idx;
+ int swizzle;
+ int arl_num;
+};
+
+/* Internal functions:
+ */
+
+struct svga_shader_emitter
+{
+ boolean use_sm30;
+
+ unsigned size;
+ char *buf;
+ char *ptr;
+
+ union svga_compile_key key;
+ struct tgsi_shader_info info;
+ int unit;
+
+ int imm_start;
+
+ int nr_hw_const;
+ int nr_hw_temp;
+
+ int insn_offset;
+
+ int internal_temp_count;
+ int internal_imm_count;
+
+ int internal_color_idx[2]; /* diffuse, specular */
+ int internal_color_count;
+
+ boolean emitted_vface;
+ boolean emit_frontface;
+ int internal_frontface_idx;
+
+ int ps30_input_count;
+
+ boolean in_main_func;
+
+ boolean created_zero_immediate;
+ int zero_immediate_idx;
+
+ boolean created_loop_const;
+ int loop_const_idx;
+
+ boolean created_sincos_consts;
+ int sincos_consts_idx;
+
+ unsigned label[32];
+ unsigned nr_labels;
+
+ struct src_register input_map[PIPE_MAX_ATTRIBS];
+ SVGA3dShaderDestToken output_map[PIPE_MAX_ATTRIBS];
+
+ struct src_register imm_0055;
+ SVGA3dShaderDestToken temp_pos;
+ SVGA3dShaderDestToken true_pos;
+
+ SVGA3dShaderDestToken temp_col[PIPE_MAX_COLOR_BUFS];
+ SVGA3dShaderDestToken true_col[PIPE_MAX_COLOR_BUFS];
+
+ SVGA3dShaderDestToken temp_psiz;
+ SVGA3dShaderDestToken true_psiz;
+
+ struct svga_arl_consts arl_consts[12];
+ int num_arl_consts;
+ int current_arl;
+};
+
+
+boolean svga_shader_emit_dword( struct svga_shader_emitter *emit,
+ unsigned dword );
+
+boolean svga_shader_emit_dwords( struct svga_shader_emitter *emit,
+ const unsigned *dwords,
+ unsigned nr );
+
+boolean svga_shader_emit_opcode( struct svga_shader_emitter *emit,
+ unsigned opcode );
+
+boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit,
+ const struct tgsi_token *tokens );
+
+boolean svga_translate_decl_sm20( struct svga_shader_emitter *emit,
+ const struct tgsi_full_declaration *decl );
+
+boolean svga_translate_decl_sm30( struct svga_shader_emitter *emit,
+ const struct tgsi_full_declaration *decl );
+
+
+static INLINE boolean emit_dst( struct svga_shader_emitter *emit,
+ SVGA3dShaderDestToken dest )
+{
+ assert(dest.reserved0);
+ return svga_shader_emit_dword( emit, dest.value );
+}
+
+static INLINE boolean emit_src( struct svga_shader_emitter *emit,
+ const struct src_register src )
+{
+ if (src.base.relAddr) {
+ assert(src.base.reserved0);
+ assert(src.indirect.reserved0);
+ return (svga_shader_emit_dword( emit, src.base.value ) &&
+ svga_shader_emit_dword( emit, src.indirect.value ));
+ }
+ else {
+ assert(src.base.reserved0);
+ return svga_shader_emit_dword( emit, src.base.value );
+ }
+}
+
+
+static INLINE boolean emit_instruction( struct svga_shader_emitter *emit,
+ SVGA3dShaderInstToken opcode )
+{
+ return svga_shader_emit_opcode( emit, opcode.value );
+}
+
+
+static INLINE boolean emit_op1( struct svga_shader_emitter *emit,
+ SVGA3dShaderInstToken inst,
+ SVGA3dShaderDestToken dest,
+ struct src_register src0 )
+{
+ return (emit_instruction( emit, inst ) &&
+ emit_dst( emit, dest ) &&
+ emit_src( emit, src0 ));
+}
+
+static INLINE boolean emit_op2( struct svga_shader_emitter *emit,
+ SVGA3dShaderInstToken inst,
+ SVGA3dShaderDestToken dest,
+ struct src_register src0,
+ struct src_register src1 )
+{
+ return (emit_instruction( emit, inst ) &&
+ emit_dst( emit, dest ) &&
+ emit_src( emit, src0 ) &&
+ emit_src( emit, src1 ));
+}
+
+static INLINE boolean emit_op3( struct svga_shader_emitter *emit,
+ SVGA3dShaderInstToken inst,
+ SVGA3dShaderDestToken dest,
+ struct src_register src0,
+ struct src_register src1,
+ struct src_register src2 )
+{
+ return (emit_instruction( emit, inst ) &&
+ emit_dst( emit, dest ) &&
+ emit_src( emit, src0 ) &&
+ emit_src( emit, src1 ) &&
+ emit_src( emit, src2 ));
+}
+
+
+#define TRANSLATE_SWIZZLE(x,y,z,w) ((x) | ((y) << 2) | ((z) << 4) | ((w) << 6))
+#define SWIZZLE_XYZW \
+ TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Z,TGSI_SWIZZLE_W)
+#define SWIZZLE_XXXX \
+ TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,TGSI_SWIZZLE_X,TGSI_SWIZZLE_X,TGSI_SWIZZLE_X)
+#define SWIZZLE_YYYY \
+ TRANSLATE_SWIZZLE(TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Y)
+#define SWIZZLE_ZZZZ \
+ TRANSLATE_SWIZZLE(TGSI_SWIZZLE_Z,TGSI_SWIZZLE_Z,TGSI_SWIZZLE_Z,TGSI_SWIZZLE_Z)
+#define SWIZZLE_WWWW \
+ TRANSLATE_SWIZZLE(TGSI_SWIZZLE_W,TGSI_SWIZZLE_W,TGSI_SWIZZLE_W,TGSI_SWIZZLE_W)
+
+
+
+static INLINE SVGA3dShaderInstToken
+inst_token( unsigned opcode )
+{
+ SVGA3dShaderInstToken inst;
+
+ inst.value = 0;
+ inst.op = opcode;
+
+ return inst;
+}
+
+static INLINE SVGA3dShaderDestToken
+dst_register( unsigned file,
+ int number )
+{
+ SVGA3dShaderDestToken dest;
+
+ dest.value = 0;
+ dest.num = number;
+ dest.type_upper = file >> 3;
+ dest.relAddr = 0;
+ dest.reserved1 = 0;
+ dest.mask = 0xf;
+ dest.dstMod = 0;
+ dest.shfScale = 0;
+ dest.type_lower = file & 0x7;
+ dest.reserved0 = 1; /* is_reg */
+
+ return dest;
+}
+
+static INLINE SVGA3dShaderDestToken
+writemask( SVGA3dShaderDestToken dest,
+ unsigned mask )
+{
+ dest.mask &= mask;
+ return dest;
+}
+
+
+static INLINE SVGA3dShaderSrcToken
+src_token( unsigned file, int number )
+{
+ SVGA3dShaderSrcToken src;
+
+ src.value = 0;
+ src.num = number;
+ src.type_upper = file >> 3;
+ src.relAddr = 0;
+ src.reserved1 = 0;
+ src.swizzle = SWIZZLE_XYZW;
+ src.srcMod = 0;
+ src.type_lower = file & 0x7;
+ src.reserved0 = 1; /* is_reg */
+
+ return src;
+}
+
+
+static INLINE struct src_register
+absolute( struct src_register src )
+{
+ src.base.srcMod = SVGA3DSRCMOD_ABS;
+
+ return src;
+}
+
+
+static INLINE struct src_register
+negate( struct src_register src )
+{
+ switch (src.base.srcMod) {
+ case SVGA3DSRCMOD_ABS:
+ src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
+ break;
+ case SVGA3DSRCMOD_ABSNEG:
+ src.base.srcMod = SVGA3DSRCMOD_ABS;
+ break;
+ case SVGA3DSRCMOD_NEG:
+ src.base.srcMod = SVGA3DSRCMOD_NONE;
+ break;
+ case SVGA3DSRCMOD_NONE:
+ src.base.srcMod = SVGA3DSRCMOD_NEG;
+ break;
+ }
+ return src;
+}
+
+
+static INLINE struct src_register
+src_register( unsigned file, int number )
+{
+ struct src_register src;
+
+ src.base = src_token( file, number );
+ src.indirect.value = 0;
+
+ return src;
+}
+
+static INLINE SVGA3dShaderDestToken dst( struct src_register src )
+{
+ return dst_register( SVGA3dShaderGetRegType( src.base.value ),
+ src.base.num );
+}
+
+static INLINE struct src_register src( SVGA3dShaderDestToken dst )
+{
+ return src_register( SVGA3dShaderGetRegType( dst.value ),
+ dst.num );
+}
+
+static INLINE ubyte svga_tgsi_sampler_type( struct svga_shader_emitter *emit,
+ int idx )
+{
+ switch (emit->key.fkey.tex[idx].texture_target) {
+ case PIPE_TEXTURE_1D:
+ return SVGA3DSAMP_2D;
+ case PIPE_TEXTURE_2D:
+ return SVGA3DSAMP_2D;
+ case PIPE_TEXTURE_3D:
+ return SVGA3DSAMP_VOLUME;
+ case PIPE_TEXTURE_CUBE:
+ return SVGA3DSAMP_CUBE;
+ }
+
+ return SVGA3DSAMP_UNKNOWN;
+}
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
new file mode 100644
index 00000000000..dc5eb8fc606
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -0,0 +1,2716 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/u_memory.h"
+
+#include "svga_tgsi_emit.h"
+#include "svga_context.h"
+
+
+static boolean emit_vs_postamble( struct svga_shader_emitter *emit );
+static boolean emit_ps_postamble( struct svga_shader_emitter *emit );
+
+
+
+
+static unsigned
+translate_opcode(
+ uint opcode )
+{
+ switch (opcode) {
+ case TGSI_OPCODE_ABS: return SVGA3DOP_ABS;
+ case TGSI_OPCODE_ADD: return SVGA3DOP_ADD;
+ case TGSI_OPCODE_BREAKC: return SVGA3DOP_BREAKC;
+ case TGSI_OPCODE_DDX: return SVGA3DOP_DSX;
+ case TGSI_OPCODE_DDY: return SVGA3DOP_DSY;
+ case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD;
+ case TGSI_OPCODE_DP3: return SVGA3DOP_DP3;
+ case TGSI_OPCODE_DP4: return SVGA3DOP_DP4;
+ case TGSI_OPCODE_ENDFOR: return SVGA3DOP_ENDLOOP;
+ case TGSI_OPCODE_FRC: return SVGA3DOP_FRC;
+ case TGSI_OPCODE_BGNFOR: return SVGA3DOP_LOOP;
+ case TGSI_OPCODE_MAD: return SVGA3DOP_MAD;
+ case TGSI_OPCODE_MAX: return SVGA3DOP_MAX;
+ case TGSI_OPCODE_MIN: return SVGA3DOP_MIN;
+ case TGSI_OPCODE_MOV: return SVGA3DOP_MOV;
+ case TGSI_OPCODE_MUL: return SVGA3DOP_MUL;
+ case TGSI_OPCODE_NOP: return SVGA3DOP_NOP;
+ case TGSI_OPCODE_NRM4: return SVGA3DOP_NRM;
+ case TGSI_OPCODE_SSG: return SVGA3DOP_SGN;
+ default:
+ debug_printf("Unkown opcode %u\n", opcode);
+ assert( 0 );
+ return SVGA3DOP_LAST_INST;
+ }
+}
+
+
+static unsigned translate_file( unsigned file )
+{
+ switch (file) {
+ case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP;
+ case TGSI_FILE_INPUT: return SVGA3DREG_INPUT;
+ case TGSI_FILE_OUTPUT: return SVGA3DREG_OUTPUT; /* VS3.0+ only */
+ case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST;
+ case TGSI_FILE_CONSTANT: return SVGA3DREG_CONST;
+ case TGSI_FILE_SAMPLER: return SVGA3DREG_SAMPLER;
+ case TGSI_FILE_ADDRESS: return SVGA3DREG_ADDR;
+ default:
+ assert( 0 );
+ return SVGA3DREG_TEMP;
+ }
+}
+
+
+
+
+
+
+static SVGA3dShaderDestToken
+translate_dst_register( struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn,
+ unsigned idx )
+{
+ const struct tgsi_full_dst_register *reg = &insn->Dst[idx];
+ SVGA3dShaderDestToken dest;
+
+ switch (reg->Register.File) {
+ case TGSI_FILE_OUTPUT:
+ /* Output registers encode semantic information in their name.
+ * Need to lookup a table built at decl time:
+ */
+ dest = emit->output_map[reg->Register.Index];
+ break;
+
+ default:
+ dest = dst_register( translate_file( reg->Register.File ),
+ reg->Register.Index );
+ break;
+ }
+
+ dest.mask = reg->Register.WriteMask;
+
+ if (insn->Instruction.Saturate)
+ dest.dstMod = SVGA3DDSTMOD_SATURATE;
+
+ return dest;
+}
+
+
+static struct src_register
+swizzle( struct src_register src,
+ int x,
+ int y,
+ int z,
+ int w )
+{
+ x = (src.base.swizzle >> (x * 2)) & 0x3;
+ y = (src.base.swizzle >> (y * 2)) & 0x3;
+ z = (src.base.swizzle >> (z * 2)) & 0x3;
+ w = (src.base.swizzle >> (w * 2)) & 0x3;
+
+ src.base.swizzle = TRANSLATE_SWIZZLE(x,y,z,w);
+
+ return src;
+}
+
+static struct src_register
+scalar( struct src_register src,
+ int comp )
+{
+ return swizzle( src, comp, comp, comp, comp );
+}
+
+static INLINE boolean
+svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
+{
+ int i;
+
+ for (i = 0; i < emit->num_arl_consts; ++i) {
+ if (emit->arl_consts[i].arl_num == emit->current_arl)
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static INLINE int
+svga_arl_adjustment( const struct svga_shader_emitter *emit )
+{
+ int i;
+
+ for (i = 0; i < emit->num_arl_consts; ++i) {
+ if (emit->arl_consts[i].arl_num == emit->current_arl)
+ return emit->arl_consts[i].number;
+ }
+ return 0;
+}
+
+static struct src_register
+translate_src_register( const struct svga_shader_emitter *emit,
+ const struct tgsi_full_src_register *reg )
+{
+ struct src_register src;
+
+ switch (reg->Register.File) {
+ case TGSI_FILE_INPUT:
+ /* Input registers are referred to by their semantic name rather
+ * than by index. Use the mapping build up from the decls:
+ */
+ src = emit->input_map[reg->Register.Index];
+ break;
+
+ case TGSI_FILE_IMMEDIATE:
+ /* Immediates are appended after TGSI constants in the D3D
+ * constant buffer.
+ */
+ src = src_register( translate_file( reg->Register.File ),
+ reg->Register.Index +
+ emit->imm_start );
+ break;
+
+ default:
+ src = src_register( translate_file( reg->Register.File ),
+ reg->Register.Index );
+
+ break;
+ }
+
+ /* Indirect addressing (for coninstant buffer lookups only)
+ */
+ if (reg->Register.Indirect)
+ {
+ /* we shift the offset towards the minimum */
+ if (svga_arl_needs_adjustment( emit )) {
+ src.base.num -= svga_arl_adjustment( emit );
+ }
+ src.base.relAddr = 1;
+
+ /* Not really sure what should go in the second token:
+ */
+ src.indirect = src_token( SVGA3DREG_ADDR,
+ reg->Indirect.Index );
+
+ src.indirect.swizzle = SWIZZLE_XXXX;
+ }
+
+ src = swizzle( src,
+ reg->Register.SwizzleX,
+ reg->Register.SwizzleY,
+ reg->Register.SwizzleZ,
+ reg->Register.SwizzleW );
+
+ /* src.mod isn't a bitfield, unfortunately:
+ * See tgsi_util_get_full_src_register_sign_mode for implementation details.
+ */
+ if (reg->Register.Absolute) {
+ if (reg->Register.Negate)
+ src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
+ else
+ src.base.srcMod = SVGA3DSRCMOD_ABS;
+ }
+ else {
+ if (reg->Register.Negate)
+ src.base.srcMod = SVGA3DSRCMOD_NEG;
+ else
+ src.base.srcMod = SVGA3DSRCMOD_NONE;
+ }
+
+ return src;
+}
+
+
+/*
+ * Get a temporary register, return -1 if none available
+ */
+static INLINE SVGA3dShaderDestToken
+get_temp( struct svga_shader_emitter *emit )
+{
+ int i = emit->nr_hw_temp + emit->internal_temp_count++;
+
+ return dst_register( SVGA3DREG_TEMP, i );
+}
+
+/* Release a single temp. Currently only effective if it was the last
+ * allocated temp, otherwise release will be delayed until the next
+ * call to reset_temp_regs().
+ */
+static INLINE void
+release_temp( struct svga_shader_emitter *emit,
+ SVGA3dShaderDestToken temp )
+{
+ if (temp.num == emit->internal_temp_count - 1)
+ emit->internal_temp_count--;
+}
+
+static void reset_temp_regs( struct svga_shader_emitter *emit )
+{
+ emit->internal_temp_count = 0;
+}
+
+
+static boolean submit_op0( struct svga_shader_emitter *emit,
+ SVGA3dShaderInstToken inst,
+ SVGA3dShaderDestToken dest )
+{
+ return (emit_instruction( emit, inst ) &&
+ emit_dst( emit, dest ));
+}
+
+static boolean submit_op1( struct svga_shader_emitter *emit,
+ SVGA3dShaderInstToken inst,
+ SVGA3dShaderDestToken dest,
+ struct src_register src0 )
+{
+ return emit_op1( emit, inst, dest, src0 );
+}
+
+
+/* SVGA shaders may not refer to >1 constant register in a single
+ * instruction. This function checks for that usage and inserts a
+ * move to temporary if detected.
+ *
+ * The same applies to input registers -- at most a single input
+ * register may be read by any instruction.
+ */
+static boolean submit_op2( struct svga_shader_emitter *emit,
+ SVGA3dShaderInstToken inst,
+ SVGA3dShaderDestToken dest,
+ struct src_register src0,
+ struct src_register src1 )
+{
+ SVGA3dShaderDestToken temp;
+ SVGA3dShaderRegType type0, type1;
+ boolean need_temp = FALSE;
+
+ temp.value = 0;
+ type0 = SVGA3dShaderGetRegType( src0.base.value );
+ type1 = SVGA3dShaderGetRegType( src1.base.value );
+
+ if (type0 == SVGA3DREG_CONST &&
+ type1 == SVGA3DREG_CONST &&
+ src0.base.num != src1.base.num)
+ need_temp = TRUE;
+
+ if (type0 == SVGA3DREG_INPUT &&
+ type1 == SVGA3DREG_INPUT &&
+ src0.base.num != src1.base.num)
+ need_temp = TRUE;
+
+ if (need_temp)
+ {
+ temp = get_temp( emit );
+
+ if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 ))
+ return FALSE;
+
+ src0 = src( temp );
+ }
+
+ if (!emit_op2( emit, inst, dest, src0, src1 ))
+ return FALSE;
+
+ if (need_temp)
+ release_temp( emit, temp );
+
+ return TRUE;
+}
+
+
+/* SVGA shaders may not refer to >1 constant register in a single
+ * instruction. This function checks for that usage and inserts a
+ * move to temporary if detected.
+ */
+static boolean submit_op3( struct svga_shader_emitter *emit,
+ SVGA3dShaderInstToken inst,
+ SVGA3dShaderDestToken dest,
+ struct src_register src0,
+ struct src_register src1,
+ struct src_register src2 )
+{
+ SVGA3dShaderDestToken temp0;
+ SVGA3dShaderDestToken temp1;
+ boolean need_temp0 = FALSE;
+ boolean need_temp1 = FALSE;
+ SVGA3dShaderRegType type0, type1, type2;
+
+ temp0.value = 0;
+ temp1.value = 0;
+ type0 = SVGA3dShaderGetRegType( src0.base.value );
+ type1 = SVGA3dShaderGetRegType( src1.base.value );
+ type2 = SVGA3dShaderGetRegType( src2.base.value );
+
+ if (inst.op != SVGA3DOP_SINCOS) {
+ if (type0 == SVGA3DREG_CONST &&
+ ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) ||
+ (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
+ need_temp0 = TRUE;
+
+ if (type1 == SVGA3DREG_CONST &&
+ (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num))
+ need_temp1 = TRUE;
+ }
+
+ if (type0 == SVGA3DREG_INPUT &&
+ ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) ||
+ (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
+ need_temp0 = TRUE;
+
+ if (type1 == SVGA3DREG_INPUT &&
+ (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num))
+ need_temp1 = TRUE;
+
+ if (need_temp0)
+ {
+ temp0 = get_temp( emit );
+
+ if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 ))
+ return FALSE;
+
+ src0 = src( temp0 );
+ }
+
+ if (need_temp1)
+ {
+ temp1 = get_temp( emit );
+
+ if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp1, src1 ))
+ return FALSE;
+
+ src1 = src( temp1 );
+ }
+
+ if (!emit_op3( emit, inst, dest, src0, src1, src2 ))
+ return FALSE;
+
+ if (need_temp1)
+ release_temp( emit, temp1 );
+ if (need_temp0)
+ release_temp( emit, temp0 );
+ return TRUE;
+}
+
+
+static boolean emit_def_const( struct svga_shader_emitter *emit,
+ SVGA3dShaderConstType type,
+ unsigned idx,
+ float a,
+ float b,
+ float c,
+ float d )
+{
+ SVGA3DOpDefArgs def;
+ SVGA3dShaderInstToken opcode;
+
+ switch (type) {
+ case SVGA3D_CONST_TYPE_FLOAT:
+ opcode = inst_token( SVGA3DOP_DEF );
+ def.dst = dst_register( SVGA3DREG_CONST, idx );
+ def.constValues[0] = a;
+ def.constValues[1] = b;
+ def.constValues[2] = c;
+ def.constValues[3] = d;
+ break;
+ case SVGA3D_CONST_TYPE_INT:
+ opcode = inst_token( SVGA3DOP_DEFI );
+ def.dst = dst_register( SVGA3DREG_CONSTINT, idx );
+ def.constIValues[0] = (int)a;
+ def.constIValues[1] = (int)b;
+ def.constIValues[2] = (int)c;
+ def.constIValues[3] = (int)d;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ if (!emit_instruction(emit, opcode) ||
+ !svga_shader_emit_dwords( emit, def.values, Elements(def.values)))
+ return FALSE;
+
+ return TRUE;
+}
+
+static INLINE boolean
+create_zero_immediate( struct svga_shader_emitter *emit )
+{
+ unsigned idx = emit->nr_hw_const++;
+
+ if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
+ idx, 0, 0, 0, 1 ))
+ return FALSE;
+
+ emit->zero_immediate_idx = idx;
+ emit->created_zero_immediate = TRUE;
+
+ return TRUE;
+}
+
+static INLINE boolean
+create_loop_const( struct svga_shader_emitter *emit )
+{
+ unsigned idx = emit->nr_hw_const++;
+
+ if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx,
+ 255, /* iteration count */
+ 0, /* initial value */
+ 1, /* step size */
+ 0 /* not used, must be 0 */))
+ return FALSE;
+
+ emit->loop_const_idx = idx;
+ emit->created_loop_const = TRUE;
+
+ return TRUE;
+}
+
+static INLINE boolean
+create_sincos_consts( struct svga_shader_emitter *emit )
+{
+ unsigned idx = emit->nr_hw_const++;
+
+ if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
+ -1.5500992e-006f,
+ -2.1701389e-005f,
+ 0.0026041667f,
+ 0.00026041668f ))
+ return FALSE;
+
+ emit->sincos_consts_idx = idx;
+ idx = emit->nr_hw_const++;
+
+ if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
+ -0.020833334f,
+ -0.12500000f,
+ 1.0f,
+ 0.50000000f ))
+ return FALSE;
+
+ emit->created_sincos_consts = TRUE;
+
+ return TRUE;
+}
+
+static INLINE boolean
+create_arl_consts( struct svga_shader_emitter *emit )
+{
+ int i;
+
+ for (i = 0; i < emit->num_arl_consts; i += 4) {
+ int j;
+ unsigned idx = emit->nr_hw_const++;
+ float vals[4];
+ for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) {
+ vals[j] = emit->arl_consts[i + j].number;
+ emit->arl_consts[i + j].idx = idx;
+ switch (j) {
+ case 0:
+ emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X;
+ break;
+ case 1:
+ emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y;
+ break;
+ case 2:
+ emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z;
+ break;
+ case 3:
+ emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W;
+ break;
+ }
+ }
+ while (j < 4)
+ vals[j++] = 0;
+
+ if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
+ vals[0], vals[1],
+ vals[2], vals[3]))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static INLINE struct src_register
+get_vface( struct svga_shader_emitter *emit )
+{
+ assert(emit->emitted_vface);
+ return src_register(SVGA3DREG_MISCTYPE,
+ SVGA3DMISCREG_FACE);
+}
+
+/* returns {0, 0, 0, 1} immediate */
+static INLINE struct src_register
+get_zero_immediate( struct svga_shader_emitter *emit )
+{
+ assert(emit->created_zero_immediate);
+ assert(emit->zero_immediate_idx >= 0);
+ return src_register( SVGA3DREG_CONST,
+ emit->zero_immediate_idx );
+}
+
+/* returns the loop const */
+static INLINE struct src_register
+get_loop_const( struct svga_shader_emitter *emit )
+{
+ assert(emit->created_loop_const);
+ assert(emit->loop_const_idx >= 0);
+ return src_register( SVGA3DREG_CONSTINT,
+ emit->loop_const_idx );
+}
+
+/* returns a sincos const */
+static INLINE struct src_register
+get_sincos_const( struct svga_shader_emitter *emit,
+ unsigned index )
+{
+ assert(emit->created_sincos_consts);
+ assert(emit->sincos_consts_idx >= 0);
+ assert(index == 0 || index == 1);
+ return src_register( SVGA3DREG_CONST,
+ emit->sincos_consts_idx + index );
+}
+
+static INLINE struct src_register
+get_fake_arl_const( struct svga_shader_emitter *emit )
+{
+ struct src_register reg;
+ int idx = 0, swizzle = 0, i;
+
+ for (i = 0; i < emit->num_arl_consts; ++ i) {
+ if (emit->arl_consts[i].arl_num == emit->current_arl) {
+ idx = emit->arl_consts[i].idx;
+ swizzle = emit->arl_consts[i].swizzle;
+ }
+ }
+
+ reg = src_register( SVGA3DREG_CONST, idx );
+ return scalar(reg, swizzle);
+}
+
+static INLINE struct src_register
+get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
+{
+ int idx;
+ struct src_register reg;
+
+ /* the width/height indexes start right after constants */
+ idx = emit->key.fkey.tex[sampler_num].width_height_idx +
+ emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
+
+ reg = src_register( SVGA3DREG_CONST, idx );
+ return reg;
+}
+
+static boolean emit_fake_arl(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn)
+{
+ const struct src_register src0 = translate_src_register(
+ emit, &insn->Src[0] );
+ struct src_register src1 = get_fake_arl_const( emit );
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ SVGA3dShaderDestToken tmp = get_temp( emit );
+
+ if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
+ return FALSE;
+
+ if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ),
+ src1))
+ return FALSE;
+
+ /* replicate the original swizzle */
+ src1 = src(tmp);
+ src1.base.swizzle = src0.base.swizzle;
+
+ return submit_op1( emit, inst_token( SVGA3DOP_MOVA ),
+ dst, src1 );
+}
+
+static boolean emit_if(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn)
+{
+ const struct src_register src = translate_src_register(
+ emit, &insn->Src[0] );
+ struct src_register zero = get_zero_immediate( emit );
+ SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
+
+ if_token.control = SVGA3DOPCOMPC_NE;
+ zero = scalar(zero, TGSI_SWIZZLE_X);
+
+ return (emit_instruction( emit, if_token ) &&
+ emit_src( emit, src ) &&
+ emit_src( emit, zero ) );
+}
+
+static boolean emit_endif(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn)
+{
+ return (emit_instruction( emit,
+ inst_token( SVGA3DOP_ENDIF )));
+}
+
+static boolean emit_else(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn)
+{
+ return (emit_instruction( emit,
+ inst_token( SVGA3DOP_ELSE )));
+}
+
+/* Translate the following TGSI FLR instruction.
+ * FLR DST, SRC
+ * To the following SVGA3D instruction sequence.
+ * FRC TMP, SRC
+ * SUB DST, SRC, TMP
+ */
+static boolean emit_floor(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ const struct src_register src0 = translate_src_register(
+ emit, &insn->Src[0] );
+ SVGA3dShaderDestToken temp = get_temp( emit );
+
+ /* FRC TMP, SRC */
+ if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 ))
+ return FALSE;
+
+ /* SUB DST, SRC, TMP */
+ if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0,
+ negate( src( temp ) ) ))
+ return FALSE;
+
+ return TRUE;
+}
+
+
+/* Translate the following TGSI CMP instruction.
+ * CMP DST, SRC0, SRC1, SRC2
+ * To the following SVGA3D instruction sequence.
+ * CMP DST, SRC0, SRC2, SRC1
+ */
+static boolean emit_cmp(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ const struct src_register src0 = translate_src_register(
+ emit, &insn->Src[0] );
+ const struct src_register src1 = translate_src_register(
+ emit, &insn->Src[1] );
+ const struct src_register src2 = translate_src_register(
+ emit, &insn->Src[2] );
+
+ /* CMP DST, SRC0, SRC2, SRC1 */
+ return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, src0, src2, src1);
+}
+
+
+
+/* Translate the following TGSI DIV instruction.
+ * DIV DST.xy, SRC0, SRC1
+ * To the following SVGA3D instruction sequence.
+ * RCP TMP.x, SRC1.xxxx
+ * RCP TMP.y, SRC1.yyyy
+ * MUL DST.xy, SRC0, TMP
+ */
+static boolean emit_div(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ const struct src_register src0 = translate_src_register(
+ emit, &insn->Src[0] );
+ const struct src_register src1 = translate_src_register(
+ emit, &insn->Src[1] );
+ SVGA3dShaderDestToken temp = get_temp( emit );
+ int i;
+
+ /* For each enabled element, perform a RCP instruction. Note that
+ * RCP is scalar in SVGA3D:
+ */
+ for (i = 0; i < 4; i++) {
+ unsigned channel = 1 << i;
+ if (dst.mask & channel) {
+ /* RCP TMP.?, SRC1.???? */
+ if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
+ writemask(temp, channel),
+ scalar(src1, i) ))
+ return FALSE;
+ }
+ }
+
+ /* Then multiply them out with a single mul:
+ *
+ * MUL DST, SRC0, TMP
+ */
+ if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0,
+ src( temp ) ))
+ return FALSE;
+
+ return TRUE;
+}
+
+/* Translate the following TGSI DP2 instruction.
+ * DP2 DST, SRC1, SRC2
+ * To the following SVGA3D instruction sequence.
+ * MUL TMP, SRC1, SRC2
+ * ADD DST, TMP.xxxx, TMP.yyyy
+ */
+static boolean emit_dp2(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ const struct src_register src0 = translate_src_register(
+ emit, &insn->Src[0] );
+ const struct src_register src1 = translate_src_register(
+ emit, &insn->Src[1] );
+ SVGA3dShaderDestToken temp = get_temp( emit );
+ struct src_register temp_src0, temp_src1;
+
+ /* MUL TMP, SRC1, SRC2 */
+ if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 ))
+ return FALSE;
+
+ temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
+ temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y);
+
+ /* ADD DST, TMP.xxxx, TMP.yyyy */
+ if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
+ temp_src0, temp_src1 ))
+ return FALSE;
+
+ return TRUE;
+}
+
+
+/* Translate the following TGSI DPH instruction.
+ * DPH DST, SRC1, SRC2
+ * To the following SVGA3D instruction sequence.
+ * DP3 TMP, SRC1, SRC2
+ * ADD DST, TMP, SRC2.wwww
+ */
+static boolean emit_dph(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ const struct src_register src0 = translate_src_register(
+ emit, &insn->Src[0] );
+ struct src_register src1 = translate_src_register(
+ emit, &insn->Src[1] );
+ SVGA3dShaderDestToken temp = get_temp( emit );
+
+ /* DP3 TMP, SRC1, SRC2 */
+ if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 ))
+ return FALSE;
+
+ src1 = scalar(src1, TGSI_SWIZZLE_W);
+
+ /* ADD DST, TMP, SRC2.wwww */
+ if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
+ src( temp ), src1 ))
+ return FALSE;
+
+ return TRUE;
+}
+
+/* Translate the following TGSI DST instruction.
+ * NRM DST, SRC
+ * To the following SVGA3D instruction sequence.
+ * DP3 TMP, SRC, SRC
+ * RSQ TMP, TMP
+ * MUL DST, SRC, TMP
+ */
+static boolean emit_nrm(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ const struct src_register src0 = translate_src_register(
+ emit, &insn->Src[0] );
+ SVGA3dShaderDestToken temp = get_temp( emit );
+
+ /* DP3 TMP, SRC, SRC */
+ if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src0 ))
+ return FALSE;
+
+ /* RSQ TMP, TMP */
+ if (!submit_op1( emit, inst_token( SVGA3DOP_RSQ ), temp, src( temp )))
+ return FALSE;
+
+ /* MUL DST, SRC, TMP */
+ if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst,
+ src0, src( temp )))
+ return FALSE;
+
+ return TRUE;
+
+}
+
+static boolean do_emit_sincos(struct svga_shader_emitter *emit,
+ SVGA3dShaderDestToken dst,
+ struct src_register src0)
+{
+ src0 = scalar(src0, TGSI_SWIZZLE_X);
+
+ if (emit->use_sm30) {
+ return submit_op1( emit, inst_token( SVGA3DOP_SINCOS ),
+ dst, src0 );
+ } else {
+ struct src_register const1 = get_sincos_const( emit, 0 );
+ struct src_register const2 = get_sincos_const( emit, 1 );
+
+ return submit_op3( emit, inst_token( SVGA3DOP_SINCOS ),
+ dst, src0, const1, const2 );
+ }
+}
+
+static boolean emit_sincos(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn)
+{
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ struct src_register src0 = translate_src_register(
+ emit, &insn->Src[0] );
+ SVGA3dShaderDestToken temp = get_temp( emit );
+
+ /* SCS TMP SRC */
+ if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 ))
+ return FALSE;
+
+ /* MOV DST TMP */
+ if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) ))
+ return FALSE;
+
+ return TRUE;
+}
+
+/*
+ * SCS TMP SRC
+ * MOV DST TMP.yyyy
+ */
+static boolean emit_sin(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ struct src_register src0 = translate_src_register(
+ emit, &insn->Src[0] );
+ SVGA3dShaderDestToken temp = get_temp( emit );
+
+ /* SCS TMP SRC */
+ if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0))
+ return FALSE;
+
+ src0 = scalar(src( temp ), TGSI_SWIZZLE_Y);
+
+ /* MOV DST TMP.yyyy */
+ if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
+ return FALSE;
+
+ return TRUE;
+}
+
+/*
+ * SCS TMP SRC
+ * MOV DST TMP.xxxx
+ */
+static boolean emit_cos(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ struct src_register src0 = translate_src_register(
+ emit, &insn->Src[0] );
+ SVGA3dShaderDestToken temp = get_temp( emit );
+
+ /* SCS TMP SRC */
+ if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 ))
+ return FALSE;
+
+ src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
+
+ /* MOV DST TMP.xxxx */
+ if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
+ return FALSE;
+
+ return TRUE;
+}
+
+
+/*
+ * ADD DST SRC0, negate(SRC0)
+ */
+static boolean emit_sub(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn)
+{
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ struct src_register src0 = translate_src_register(
+ emit, &insn->Src[0] );
+ struct src_register src1 = translate_src_register(
+ emit, &insn->Src[1] );
+
+ src1 = negate(src1);
+
+ if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
+ src0, src1 ))
+ return FALSE;
+
+ return TRUE;
+}
+
+
+static boolean emit_kil(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ SVGA3dShaderInstToken inst;
+ const struct tgsi_full_src_register *reg = &insn->Src[0];
+ struct src_register src0;
+
+ inst = inst_token( SVGA3DOP_TEXKILL );
+ src0 = translate_src_register( emit, reg );
+
+ if (reg->Register.Absolute ||
+ reg->Register.Negate ||
+ reg->Register.Indirect ||
+ reg->Register.SwizzleX != 0 ||
+ reg->Register.SwizzleY != 1 ||
+ reg->Register.SwizzleZ != 2 ||
+ reg->Register.File != TGSI_FILE_TEMPORARY)
+ {
+ SVGA3dShaderDestToken temp = get_temp( emit );
+
+ submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 );
+ src0 = src( temp );
+ }
+
+ return submit_op0( emit, inst, dst(src0) );
+}
+
+
+/* mesa state tracker always emits kilp as an unconditional
+ * kil */
+static boolean emit_kilp(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ SVGA3dShaderInstToken inst;
+ SVGA3dShaderDestToken temp;
+ struct src_register one = get_zero_immediate( emit );
+
+ inst = inst_token( SVGA3DOP_TEXKILL );
+ one = scalar( one, TGSI_SWIZZLE_W );
+
+ /* texkill doesn't allow negation on the operand so lets move
+ * negation of {1} to a temp register */
+ temp = get_temp( emit );
+ if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp,
+ negate( one ) ))
+ return FALSE;
+
+ return submit_op0( emit, inst, temp );
+}
+
+/* Implement conditionals by initializing destination reg to 'fail',
+ * then set predicate reg with UFOP_SETP, then move 'pass' to dest
+ * based on predicate reg.
+ *
+ * SETP src0, cmp, src1 -- do this first to avoid aliasing problems.
+ * MOV dst, fail
+ * MOV dst, pass, p0
+ */
+static boolean
+emit_conditional(struct svga_shader_emitter *emit,
+ unsigned compare_func,
+ SVGA3dShaderDestToken dst,
+ struct src_register src0,
+ struct src_register src1,
+ struct src_register pass,
+ struct src_register fail)
+{
+ SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
+ SVGA3dShaderInstToken setp_token, mov_token;
+ setp_token = inst_token( SVGA3DOP_SETP );
+
+ switch (compare_func) {
+ case PIPE_FUNC_NEVER:
+ return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+ dst, fail );
+ break;
+ case PIPE_FUNC_LESS:
+ setp_token.control = SVGA3DOPCOMP_LT;
+ break;
+ case PIPE_FUNC_EQUAL:
+ setp_token.control = SVGA3DOPCOMP_EQ;
+ break;
+ case PIPE_FUNC_LEQUAL:
+ setp_token.control = SVGA3DOPCOMP_LE;
+ break;
+ case PIPE_FUNC_GREATER:
+ setp_token.control = SVGA3DOPCOMP_GT;
+ break;
+ case PIPE_FUNC_NOTEQUAL:
+ setp_token.control = SVGA3DOPCOMPC_NE;
+ break;
+ case PIPE_FUNC_GEQUAL:
+ setp_token.control = SVGA3DOPCOMP_GE;
+ break;
+ case PIPE_FUNC_ALWAYS:
+ return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+ dst, pass );
+ break;
+ }
+
+ /* SETP src0, COMPOP, src1 */
+ if (!submit_op2( emit, setp_token, pred_reg,
+ src0, src1 ))
+ return FALSE;
+
+ mov_token = inst_token( SVGA3DOP_MOV );
+
+ /* MOV dst, fail */
+ if (!submit_op1( emit, mov_token, dst,
+ fail ))
+ return FALSE;
+
+ /* MOV dst, pass (predicated)
+ *
+ * Note that the predicate reg (and possible modifiers) is passed
+ * as the first source argument.
+ */
+ mov_token.predicated = 1;
+ if (!submit_op2( emit, mov_token, dst,
+ src( pred_reg ), pass ))
+ return FALSE;
+
+ return TRUE;
+}
+
+
+static boolean
+emit_select(struct svga_shader_emitter *emit,
+ unsigned compare_func,
+ SVGA3dShaderDestToken dst,
+ struct src_register src0,
+ struct src_register src1 )
+{
+ /* There are some SVGA instructions which implement some selects
+ * directly, but they are only available in the vertex shader.
+ */
+ if (emit->unit == PIPE_SHADER_VERTEX) {
+ switch (compare_func) {
+ case PIPE_FUNC_GEQUAL:
+ return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 );
+ case PIPE_FUNC_LEQUAL:
+ return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 );
+ case PIPE_FUNC_GREATER:
+ return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 );
+ case PIPE_FUNC_LESS:
+ return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 );
+ default:
+ break;
+ }
+ }
+
+
+ /* Otherwise, need to use the setp approach:
+ */
+ {
+ struct src_register one, zero;
+ /* zero immediate is 0,0,0,1 */
+ zero = get_zero_immediate( emit );
+ one = scalar( zero, TGSI_SWIZZLE_W );
+ zero = scalar( zero, TGSI_SWIZZLE_X );
+
+ return emit_conditional(
+ emit,
+ compare_func,
+ dst,
+ src0,
+ src1,
+ one, zero);
+ }
+}
+
+
+static boolean emit_select_op(struct svga_shader_emitter *emit,
+ unsigned compare,
+ const struct tgsi_full_instruction *insn)
+{
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ struct src_register src0 = translate_src_register(
+ emit, &insn->Src[0] );
+ struct src_register src1 = translate_src_register(
+ emit, &insn->Src[1] );
+
+ return emit_select( emit, compare, dst, src0, src1 );
+}
+
+
+/* Translate texture instructions to SVGA3D representation.
+ */
+static boolean emit_tex2(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn,
+ SVGA3dShaderDestToken dst )
+{
+ SVGA3dShaderInstToken inst;
+ struct src_register src0;
+ struct src_register src1;
+
+ inst.value = 0;
+ inst.op = SVGA3DOP_TEX;
+
+ switch (insn->Instruction.Opcode) {
+ case TGSI_OPCODE_TEX:
+ break;
+ case TGSI_OPCODE_TXP:
+ inst.control = SVGA3DOPCONT_PROJECT;
+ break;
+ case TGSI_OPCODE_TXB:
+ inst.control = SVGA3DOPCONT_BIAS;
+ break;
+ default:
+ assert(0);
+ return FALSE;
+ }
+
+ src0 = translate_src_register( emit, &insn->Src[0] );
+ src1 = translate_src_register( emit, &insn->Src[1] );
+
+ if (emit->key.fkey.tex[src1.base.num].unnormalized) {
+ struct src_register wh = get_tex_dimensions( emit, src1.base.num );
+ SVGA3dShaderDestToken tmp = get_temp( emit );
+
+ /* MUL tmp, SRC0, WH */
+ if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
+ tmp, src0, wh ))
+ return FALSE;
+ src0 = src( tmp );
+ }
+
+ return submit_op2( emit, inst, dst, src0, src1 );
+}
+
+
+
+
+/* Translate texture instructions to SVGA3D representation.
+ */
+static boolean emit_tex3(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn,
+ SVGA3dShaderDestToken dst )
+{
+ SVGA3dShaderInstToken inst;
+ struct src_register src0;
+ struct src_register src1;
+ struct src_register src2;
+
+ inst.value = 0;
+
+ switch (insn->Instruction.Opcode) {
+ case TGSI_OPCODE_TXD:
+ inst.op = SVGA3DOP_TEXLDD;
+ break;
+ case TGSI_OPCODE_TXL:
+ inst.op = SVGA3DOP_TEXLDL;
+ break;
+ }
+
+ src0 = translate_src_register( emit, &insn->Src[0] );
+ src1 = translate_src_register( emit, &insn->Src[1] );
+ src2 = translate_src_register( emit, &insn->Src[2] );
+
+ return submit_op3( emit, inst, dst, src0, src1, src2 );
+}
+
+
+static boolean emit_tex(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ SVGA3dShaderDestToken dst =
+ translate_dst_register( emit, insn, 0 );
+ struct src_register src0 =
+ translate_src_register( emit, &insn->Src[0] );
+ struct src_register src1 =
+ translate_src_register( emit, &insn->Src[1] );
+
+ SVGA3dShaderDestToken tex_result;
+
+ /* check for shadow samplers */
+ boolean compare = (emit->key.fkey.tex[src1.base.num].compare_mode ==
+ PIPE_TEX_COMPARE_R_TO_TEXTURE);
+
+
+ /* If doing compare processing, need to put this value into a
+ * temporary so it can be used as a source later on.
+ */
+ if (compare ||
+ (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW) ) {
+ tex_result = get_temp( emit );
+ }
+ else {
+ tex_result = dst;
+ }
+
+ switch(insn->Instruction.Opcode) {
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXP:
+ if (!emit_tex2( emit, insn, tex_result ))
+ return FALSE;
+ break;
+ case TGSI_OPCODE_TXL:
+ case TGSI_OPCODE_TXD:
+ if (!emit_tex3( emit, insn, tex_result ))
+ return FALSE;
+ break;
+ default:
+ assert(0);
+ }
+
+
+ if (compare) {
+ SVGA3dShaderDestToken src0_zdivw = get_temp( emit );
+ struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y);
+ struct src_register one =
+ scalar( get_zero_immediate( emit ), TGSI_SWIZZLE_W );
+
+ /* Divide texcoord R by Q */
+ if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
+ src0_zdivw,
+ scalar(src0, TGSI_SWIZZLE_W) ))
+ return FALSE;
+
+ if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
+ src0_zdivw,
+ scalar(src0, TGSI_SWIZZLE_Z),
+ src(src0_zdivw) ))
+ return FALSE;
+
+ if (!emit_select(
+ emit,
+ emit->key.fkey.tex[src1.base.num].compare_func,
+ dst,
+ src(src0_zdivw),
+ tex_src_x))
+ return FALSE;
+
+ return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+ writemask( dst, TGSI_WRITEMASK_W),
+ one );
+ }
+ else if (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW)
+ {
+ if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) ))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean emit_bgnloop2( struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP );
+ struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 );
+ struct src_register const_int = get_loop_const( emit );
+
+ return (emit_instruction( emit, inst ) &&
+ emit_src( emit, loop_reg ) &&
+ emit_src( emit, const_int ) );
+}
+
+static boolean emit_endloop2( struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP );
+ return emit_instruction( emit, inst );
+}
+
+static boolean emit_brk( struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK );
+ return emit_instruction( emit, inst );
+}
+
+static boolean emit_scalar_op1( struct svga_shader_emitter *emit,
+ unsigned opcode,
+ const struct tgsi_full_instruction *insn )
+{
+ SVGA3dShaderInstToken inst;
+ SVGA3dShaderDestToken dst;
+ struct src_register src;
+
+ inst = inst_token( opcode );
+ dst = translate_dst_register( emit, insn, 0 );
+ src = translate_src_register( emit, &insn->Src[0] );
+ src = scalar( src, TGSI_SWIZZLE_X );
+
+ return submit_op1( emit, inst, dst, src );
+}
+
+
+static boolean emit_simple_instruction(struct svga_shader_emitter *emit,
+ unsigned opcode,
+ const struct tgsi_full_instruction *insn )
+{
+ const struct tgsi_full_src_register *src = insn->Src;
+ SVGA3dShaderInstToken inst;
+ SVGA3dShaderDestToken dst;
+
+ inst = inst_token( opcode );
+ dst = translate_dst_register( emit, insn, 0 );
+
+ switch (insn->Instruction.NumSrcRegs) {
+ case 0:
+ return submit_op0( emit, inst, dst );
+ case 1:
+ return submit_op1( emit, inst, dst,
+ translate_src_register( emit, &src[0] ));
+ case 2:
+ return submit_op2( emit, inst, dst,
+ translate_src_register( emit, &src[0] ),
+ translate_src_register( emit, &src[1] ) );
+ case 3:
+ return submit_op3( emit, inst, dst,
+ translate_src_register( emit, &src[0] ),
+ translate_src_register( emit, &src[1] ),
+ translate_src_register( emit, &src[2] ) );
+ default:
+ assert(0);
+ return FALSE;
+ }
+}
+
+static boolean emit_arl(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn)
+{
+ ++emit->current_arl;
+ if (svga_arl_needs_adjustment( emit )) {
+ return emit_fake_arl( emit, insn );
+ } else {
+ /* no need to adjust, just emit straight arl */
+ return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn);
+ }
+}
+
+static boolean alias_src_dst( struct src_register src,
+ SVGA3dShaderDestToken dst )
+{
+ if (src.base.num != dst.num)
+ return FALSE;
+
+ if (SVGA3dShaderGetRegType(dst.value) !=
+ SVGA3dShaderGetRegType(src.base.value))
+ return FALSE;
+
+ return TRUE;
+}
+
+static boolean emit_pow(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn)
+{
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ struct src_register src0 = translate_src_register(
+ emit, &insn->Src[0] );
+ struct src_register src1 = translate_src_register(
+ emit, &insn->Src[1] );
+ boolean need_tmp = FALSE;
+
+ /* POW can only output to a temporary */
+ if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY)
+ need_tmp = TRUE;
+
+ /* POW src1 must not be the same register as dst */
+ if (alias_src_dst( src1, dst ))
+ need_tmp = TRUE;
+
+ /* it's a scalar op */
+ src0 = scalar( src0, TGSI_SWIZZLE_X );
+ src1 = scalar( src1, TGSI_SWIZZLE_X );
+
+ if (need_tmp) {
+ SVGA3dShaderDestToken tmp = writemask(get_temp( emit ), TGSI_WRITEMASK_X );
+
+ if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1))
+ return FALSE;
+
+ return submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, scalar(src(tmp), 0) );
+ }
+ else {
+ return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1);
+ }
+}
+
+static boolean emit_xpd(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn)
+{
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ const struct src_register src0 = translate_src_register(
+ emit, &insn->Src[0] );
+ const struct src_register src1 = translate_src_register(
+ emit, &insn->Src[1] );
+ boolean need_dst_tmp = FALSE;
+
+ /* XPD can only output to a temporary */
+ if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP)
+ need_dst_tmp = TRUE;
+
+ /* The dst reg must not be the same as src0 or src1*/
+ if (alias_src_dst(src0, dst) ||
+ alias_src_dst(src1, dst))
+ need_dst_tmp = TRUE;
+
+ if (need_dst_tmp) {
+ SVGA3dShaderDestToken tmp = get_temp( emit );
+
+ /* Obey DX9 restrictions on mask:
+ */
+ tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ;
+
+ if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1))
+ return FALSE;
+
+ if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
+ return FALSE;
+ }
+ else {
+ if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
+ return FALSE;
+ }
+
+ /* Need to emit 1.0 to dst.w?
+ */
+ if (dst.mask & TGSI_WRITEMASK_W) {
+ struct src_register zero = get_zero_immediate( emit );
+
+ if (!submit_op1(emit,
+ inst_token( SVGA3DOP_MOV ),
+ writemask(dst, TGSI_WRITEMASK_W),
+ zero))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+static boolean emit_lrp(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn)
+{
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ SVGA3dShaderDestToken tmp;
+ const struct src_register src0 = translate_src_register(
+ emit, &insn->Src[0] );
+ const struct src_register src1 = translate_src_register(
+ emit, &insn->Src[1] );
+ const struct src_register src2 = translate_src_register(
+ emit, &insn->Src[2] );
+ boolean need_dst_tmp = FALSE;
+
+ /* The dst reg must not be the same as src0 or src2 */
+ if (alias_src_dst(src0, dst) ||
+ alias_src_dst(src2, dst))
+ need_dst_tmp = TRUE;
+
+ if (need_dst_tmp) {
+ tmp = get_temp( emit );
+ tmp.mask = dst.mask;
+ }
+ else {
+ tmp = dst;
+ }
+
+ if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
+ return FALSE;
+
+ if (need_dst_tmp) {
+ if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+static boolean emit_dst_insn(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ if (emit->unit == PIPE_SHADER_VERTEX) {
+ /* SVGA/DX9 has a DST instruction, but only for vertex shaders:
+ */
+ return emit_simple_instruction(emit, SVGA3DOP_DST, insn);
+ }
+ else {
+
+ /* result[0] = 1 * 1;
+ * result[1] = a[1] * b[1];
+ * result[2] = a[2] * 1;
+ * result[3] = 1 * b[3];
+ */
+
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ SVGA3dShaderDestToken tmp;
+ const struct src_register src0 = translate_src_register(
+ emit, &insn->Src[0] );
+ const struct src_register src1 = translate_src_register(
+ emit, &insn->Src[1] );
+ struct src_register zero = get_zero_immediate( emit );
+ boolean need_tmp = FALSE;
+
+ if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
+ alias_src_dst(src0, dst) ||
+ alias_src_dst(src1, dst))
+ need_tmp = TRUE;
+
+ if (need_tmp) {
+ tmp = get_temp( emit );
+ }
+ else {
+ tmp = dst;
+ }
+
+ /* tmp.xw = 1.0
+ */
+ if (tmp.mask & TGSI_WRITEMASK_XW) {
+ if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+ writemask(tmp, TGSI_WRITEMASK_XW ),
+ scalar( zero, 3 )))
+ return FALSE;
+ }
+
+ /* tmp.yz = src0
+ */
+ if (tmp.mask & TGSI_WRITEMASK_YZ) {
+ if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+ writemask(tmp, TGSI_WRITEMASK_YZ ),
+ src0))
+ return FALSE;
+ }
+
+ /* tmp.yw = tmp * src1
+ */
+ if (tmp.mask & TGSI_WRITEMASK_YW) {
+ if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
+ writemask(tmp, TGSI_WRITEMASK_YW ),
+ src(tmp),
+ src1))
+ return FALSE;
+ }
+
+ /* dst = tmp
+ */
+ if (need_tmp) {
+ if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+ dst,
+ src(tmp)))
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+
+static boolean emit_exp(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn)
+{
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ struct src_register src0 =
+ translate_src_register( emit, &insn->Src[0] );
+ struct src_register zero = get_zero_immediate( emit );
+ SVGA3dShaderDestToken fraction;
+
+ if (dst.mask & TGSI_WRITEMASK_Y)
+ fraction = dst;
+ else if (dst.mask & TGSI_WRITEMASK_X)
+ fraction = get_temp( emit );
+
+ /* If y is being written, fill it with src0 - floor(src0).
+ */
+ if (dst.mask & TGSI_WRITEMASK_XY) {
+ if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
+ writemask( fraction, TGSI_WRITEMASK_Y ),
+ src0 ))
+ return FALSE;
+ }
+
+ /* If x is being written, fill it with 2 ^ floor(src0).
+ */
+ if (dst.mask & TGSI_WRITEMASK_X) {
+ if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
+ writemask( dst, dst.mask & TGSI_WRITEMASK_X ),
+ src0,
+ scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) )
+ return FALSE;
+
+ if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
+ writemask( dst, dst.mask & TGSI_WRITEMASK_X ),
+ scalar( src( dst ), TGSI_SWIZZLE_X ) ) )
+ return FALSE;
+
+ if (!(dst.mask & TGSI_WRITEMASK_Y))
+ release_temp( emit, fraction );
+ }
+
+ /* If z is being written, fill it with 2 ^ src0 (partial precision).
+ */
+ if (dst.mask & TGSI_WRITEMASK_Z) {
+ if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ),
+ writemask( dst, dst.mask & TGSI_WRITEMASK_Z ),
+ src0 ) )
+ return FALSE;
+ }
+
+ /* If w is being written, fill it with one.
+ */
+ if (dst.mask & TGSI_WRITEMASK_W) {
+ if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+ writemask(dst, TGSI_WRITEMASK_W),
+ scalar( zero, TGSI_SWIZZLE_W ) ))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean emit_lit(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ if (emit->unit == PIPE_SHADER_VERTEX) {
+ /* SVGA/DX9 has a LIT instruction, but only for vertex shaders:
+ */
+ return emit_simple_instruction(emit, SVGA3DOP_LIT, insn);
+ }
+ else {
+
+ /* D3D vs. GL semantics can be fairly easily accomodated by
+ * variations on this sequence.
+ *
+ * GL:
+ * tmp.y = src.x
+ * tmp.z = pow(src.y,src.w)
+ * p0 = src0.xxxx > 0
+ * result = zero.wxxw
+ * (p0) result.yz = tmp
+ *
+ * D3D:
+ * tmp.y = src.x
+ * tmp.z = pow(src.y,src.w)
+ * p0 = src0.xxyy > 0
+ * result = zero.wxxw
+ * (p0) result.yz = tmp
+ *
+ * Will implement the GL version for now.
+ */
+
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ SVGA3dShaderDestToken tmp = get_temp( emit );
+ const struct src_register src0 = translate_src_register(
+ emit, &insn->Src[0] );
+ struct src_register zero = get_zero_immediate( emit );
+
+ /* tmp = pow(src.y, src.w)
+ */
+ if (dst.mask & TGSI_WRITEMASK_Z) {
+ if (!submit_op2(emit, inst_token( SVGA3DOP_POW ),
+ tmp,
+ scalar(src0, 1),
+ scalar(src0, 3)))
+ return FALSE;
+ }
+
+ /* tmp.y = src.x
+ */
+ if (dst.mask & TGSI_WRITEMASK_Y) {
+ if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+ writemask(tmp, TGSI_WRITEMASK_Y ),
+ scalar(src0, 0)))
+ return FALSE;
+ }
+
+ /* Can't quite do this with emit conditional due to the extra
+ * writemask on the predicated mov:
+ */
+ {
+ SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
+ SVGA3dShaderInstToken setp_token, mov_token;
+ struct src_register predsrc;
+
+ setp_token = inst_token( SVGA3DOP_SETP );
+ mov_token = inst_token( SVGA3DOP_MOV );
+
+ setp_token.control = SVGA3DOPCOMP_GT;
+
+ /* D3D vs GL semantics:
+ */
+ if (0)
+ predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */
+ else
+ predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */
+
+ /* SETP src0.xxyy, GT, {0}.x */
+ if (!submit_op2( emit, setp_token, pred_reg,
+ predsrc,
+ swizzle(zero, 0, 0, 0, 0) ))
+ return FALSE;
+
+ /* MOV dst, fail */
+ if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst,
+ swizzle(zero, 3, 0, 0, 3 )))
+ return FALSE;
+
+ /* MOV dst.yz, tmp (predicated)
+ *
+ * Note that the predicate reg (and possible modifiers) is passed
+ * as the first source argument.
+ */
+ if (dst.mask & TGSI_WRITEMASK_YZ) {
+ mov_token.predicated = 1;
+ if (!submit_op2( emit, mov_token,
+ writemask(dst, TGSI_WRITEMASK_YZ),
+ src( pred_reg ), src( tmp ) ))
+ return FALSE;
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+
+
+
+static boolean emit_ex2( struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ SVGA3dShaderInstToken inst;
+ SVGA3dShaderDestToken dst;
+ struct src_register src0;
+
+ inst = inst_token( SVGA3DOP_EXP );
+ dst = translate_dst_register( emit, insn, 0 );
+ src0 = translate_src_register( emit, &insn->Src[0] );
+ src0 = scalar( src0, TGSI_SWIZZLE_X );
+
+ if (dst.mask != TGSI_WRITEMASK_XYZW) {
+ SVGA3dShaderDestToken tmp = get_temp( emit );
+
+ if (!submit_op1( emit, inst, tmp, src0 ))
+ return FALSE;
+
+ return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+ dst,
+ scalar( src( tmp ), TGSI_SWIZZLE_X ) );
+ }
+
+ return submit_op1( emit, inst, dst, src0 );
+}
+
+
+static boolean emit_log(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn)
+{
+ SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+ struct src_register src0 =
+ translate_src_register( emit, &insn->Src[0] );
+ struct src_register zero = get_zero_immediate( emit );
+ SVGA3dShaderDestToken abs_tmp;
+ struct src_register abs_src0;
+ SVGA3dShaderDestToken log2_abs;
+
+ if (dst.mask & TGSI_WRITEMASK_Z)
+ log2_abs = dst;
+ else if (dst.mask & TGSI_WRITEMASK_XY)
+ log2_abs = get_temp( emit );
+
+ /* If z is being written, fill it with log2( abs( src0 ) ).
+ */
+ if (dst.mask & TGSI_WRITEMASK_XYZ) {
+ if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS)
+ abs_src0 = src0;
+ else {
+ abs_tmp = get_temp( emit );
+
+ if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+ abs_tmp,
+ src0 ) )
+ return FALSE;
+
+ abs_src0 = src( abs_tmp );
+ }
+
+ abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) );
+
+ if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ),
+ writemask( log2_abs, TGSI_WRITEMASK_Z ),
+ abs_src0 ) )
+ return FALSE;
+ }
+
+ if (dst.mask & TGSI_WRITEMASK_XY) {
+ SVGA3dShaderDestToken floor_log2;
+
+ if (dst.mask & TGSI_WRITEMASK_X)
+ floor_log2 = dst;
+ else
+ floor_log2 = get_temp( emit );
+
+ /* If x is being written, fill it with floor( log2( abs( src0 ) ) ).
+ */
+ if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
+ writemask( floor_log2, TGSI_WRITEMASK_X ),
+ scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) )
+ return FALSE;
+
+ if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
+ writemask( floor_log2, TGSI_WRITEMASK_X ),
+ scalar( src( log2_abs ), TGSI_SWIZZLE_Z ),
+ negate( src( floor_log2 ) ) ) )
+ return FALSE;
+
+ /* If y is being written, fill it with
+ * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ).
+ */
+ if (dst.mask & TGSI_WRITEMASK_Y) {
+ if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
+ writemask( dst, TGSI_WRITEMASK_Y ),
+ negate( scalar( src( floor_log2 ),
+ TGSI_SWIZZLE_X ) ) ) )
+ return FALSE;
+
+ if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
+ writemask( dst, TGSI_WRITEMASK_Y ),
+ src( dst ),
+ abs_src0 ) )
+ return FALSE;
+ }
+
+ if (!(dst.mask & TGSI_WRITEMASK_X))
+ release_temp( emit, floor_log2 );
+
+ if (!(dst.mask & TGSI_WRITEMASK_Z))
+ release_temp( emit, log2_abs );
+ }
+
+ if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod &&
+ src0.base.srcMod != SVGA3DSRCMOD_ABS)
+ release_temp( emit, abs_tmp );
+
+ /* If w is being written, fill it with one.
+ */
+ if (dst.mask & TGSI_WRITEMASK_W) {
+ if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+ writemask(dst, TGSI_WRITEMASK_W),
+ scalar( zero, TGSI_SWIZZLE_W ) ))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+static boolean emit_bgnsub( struct svga_shader_emitter *emit,
+ unsigned position,
+ const struct tgsi_full_instruction *insn )
+{
+ unsigned i;
+
+ /* Note that we've finished the main function and are now emitting
+ * subroutines. This affects how we terminate the generated
+ * shader.
+ */
+ emit->in_main_func = FALSE;
+
+ for (i = 0; i < emit->nr_labels; i++) {
+ if (emit->label[i] == position) {
+ return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) &&
+ emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) &&
+ emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
+ }
+ }
+
+ assert(0);
+ return TRUE;
+}
+
+static boolean emit_call( struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn )
+{
+ unsigned position = insn->Label.Label;
+ unsigned i;
+
+ for (i = 0; i < emit->nr_labels; i++) {
+ if (emit->label[i] == position)
+ break;
+ }
+
+ if (emit->nr_labels == Elements(emit->label))
+ return FALSE;
+
+ if (i == emit->nr_labels) {
+ emit->label[i] = position;
+ emit->nr_labels++;
+ }
+
+ return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) &&
+ emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
+}
+
+
+static boolean emit_end( struct svga_shader_emitter *emit )
+{
+ if (emit->unit == PIPE_SHADER_VERTEX) {
+ return emit_vs_postamble( emit );
+ }
+ else {
+ return emit_ps_postamble( emit );
+ }
+}
+
+
+
+static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
+ unsigned position,
+ const struct tgsi_full_instruction *insn )
+{
+ switch (insn->Instruction.Opcode) {
+
+ case TGSI_OPCODE_ARL:
+ return emit_arl( emit, insn );
+
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXP:
+ case TGSI_OPCODE_TXL:
+ case TGSI_OPCODE_TXD:
+ return emit_tex( emit, insn );
+
+ case TGSI_OPCODE_BGNSUB:
+ return emit_bgnsub( emit, position, insn );
+
+ case TGSI_OPCODE_ENDSUB:
+ return TRUE;
+
+ case TGSI_OPCODE_CAL:
+ return emit_call( emit, insn );
+
+ case TGSI_OPCODE_FLR:
+ case TGSI_OPCODE_TRUNC: /* should be TRUNC, not FLR */
+ return emit_floor( emit, insn );
+
+ case TGSI_OPCODE_CMP:
+ return emit_cmp( emit, insn );
+
+ case TGSI_OPCODE_DIV:
+ return emit_div( emit, insn );
+
+ case TGSI_OPCODE_DP2:
+ return emit_dp2( emit, insn );
+
+ case TGSI_OPCODE_DPH:
+ return emit_dph( emit, insn );
+
+ case TGSI_OPCODE_NRM:
+ return emit_nrm( emit, insn );
+
+ case TGSI_OPCODE_COS:
+ return emit_cos( emit, insn );
+
+ case TGSI_OPCODE_SIN:
+ return emit_sin( emit, insn );
+
+ case TGSI_OPCODE_SCS:
+ return emit_sincos( emit, insn );
+
+ case TGSI_OPCODE_END:
+ /* TGSI always finishes the main func with an END */
+ return emit_end( emit );
+
+ case TGSI_OPCODE_KIL:
+ return emit_kil( emit, insn );
+
+ /* Selection opcodes. The underlying language is fairly
+ * non-orthogonal about these.
+ */
+ case TGSI_OPCODE_SEQ:
+ return emit_select_op( emit, PIPE_FUNC_EQUAL, insn );
+
+ case TGSI_OPCODE_SNE:
+ return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn );
+
+ case TGSI_OPCODE_SGT:
+ return emit_select_op( emit, PIPE_FUNC_GREATER, insn );
+
+ case TGSI_OPCODE_SGE:
+ return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn );
+
+ case TGSI_OPCODE_SLT:
+ return emit_select_op( emit, PIPE_FUNC_LESS, insn );
+
+ case TGSI_OPCODE_SLE:
+ return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn );
+
+ case TGSI_OPCODE_SUB:
+ return emit_sub( emit, insn );
+
+ case TGSI_OPCODE_POW:
+ return emit_pow( emit, insn );
+
+ case TGSI_OPCODE_EX2:
+ return emit_ex2( emit, insn );
+
+ case TGSI_OPCODE_EXP:
+ return emit_exp( emit, insn );
+
+ case TGSI_OPCODE_LOG:
+ return emit_log( emit, insn );
+
+ case TGSI_OPCODE_LG2:
+ return emit_scalar_op1( emit, SVGA3DOP_LOG, insn );
+
+ case TGSI_OPCODE_RSQ:
+ return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn );
+
+ case TGSI_OPCODE_RCP:
+ return emit_scalar_op1( emit, SVGA3DOP_RCP, insn );
+
+ case TGSI_OPCODE_CONT:
+ case TGSI_OPCODE_RET:
+ /* This is a noop -- we tell mesa that we can't support RET
+ * within a function (early return), so this will always be
+ * followed by an ENDSUB.
+ */
+ return TRUE;
+
+ /* These aren't actually used by any of the frontends we care
+ * about:
+ */
+ case TGSI_OPCODE_CLAMP:
+ case TGSI_OPCODE_ROUND:
+ case TGSI_OPCODE_AND:
+ case TGSI_OPCODE_OR:
+ case TGSI_OPCODE_I2F:
+ case TGSI_OPCODE_NOT:
+ case TGSI_OPCODE_SHL:
+ case TGSI_OPCODE_ISHR:
+ case TGSI_OPCODE_XOR:
+ return FALSE;
+
+ case TGSI_OPCODE_IF:
+ return emit_if( emit, insn );
+ case TGSI_OPCODE_ELSE:
+ return emit_else( emit, insn );
+ case TGSI_OPCODE_ENDIF:
+ return emit_endif( emit, insn );
+
+ case TGSI_OPCODE_BGNLOOP:
+ return emit_bgnloop2( emit, insn );
+ case TGSI_OPCODE_ENDLOOP:
+ return emit_endloop2( emit, insn );
+ case TGSI_OPCODE_BRK:
+ return emit_brk( emit, insn );
+
+ case TGSI_OPCODE_XPD:
+ return emit_xpd( emit, insn );
+
+ case TGSI_OPCODE_KILP:
+ return emit_kilp( emit, insn );
+
+ case TGSI_OPCODE_DST:
+ return emit_dst_insn( emit, insn );
+
+ case TGSI_OPCODE_LIT:
+ return emit_lit( emit, insn );
+
+ case TGSI_OPCODE_LRP:
+ return emit_lrp( emit, insn );
+
+ default: {
+ unsigned opcode = translate_opcode(insn->Instruction.Opcode);
+
+ if (opcode == SVGA3DOP_LAST_INST)
+ return FALSE;
+
+ if (!emit_simple_instruction( emit, opcode, insn ))
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+
+static boolean svga_emit_immediate( struct svga_shader_emitter *emit,
+ struct tgsi_full_immediate *imm)
+{
+ static const float id[4] = {0,0,0,1};
+ float value[4];
+ unsigned i;
+
+ assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5);
+ for (i = 0; i < imm->Immediate.NrTokens - 1; i++)
+ value[i] = imm->u[i].Float;
+
+ for ( ; i < 4; i++ )
+ value[i] = id[i];
+
+ return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
+ emit->imm_start + emit->internal_imm_count++,
+ value[0], value[1], value[2], value[3]);
+}
+
+static boolean make_immediate( struct svga_shader_emitter *emit,
+ float a,
+ float b,
+ float c,
+ float d,
+ struct src_register *out )
+{
+ unsigned idx = emit->nr_hw_const++;
+
+ if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
+ idx, a, b, c, d ))
+ return FALSE;
+
+ *out = src_register( SVGA3DREG_CONST, idx );
+
+ return TRUE;
+}
+
+static boolean emit_vs_preamble( struct svga_shader_emitter *emit )
+{
+ if (!emit->key.vkey.need_prescale) {
+ if (!make_immediate( emit, 0, 0, .5, .5,
+ &emit->imm_0055))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean emit_ps_preamble( struct svga_shader_emitter *emit )
+{
+ unsigned i;
+
+ /* For SM20, need to initialize the temporaries we're using to hold
+ * color outputs to some value. Shaders which don't set all of
+ * these values are likely to be rejected by the DX9 runtime.
+ */
+ if (!emit->use_sm30) {
+ struct src_register zero = get_zero_immediate( emit );
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
+
+ if (!submit_op1( emit,
+ inst_token(SVGA3DOP_MOV),
+ emit->temp_col[i],
+ zero ))
+ return FALSE;
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+static boolean emit_ps_postamble( struct svga_shader_emitter *emit )
+{
+ unsigned i;
+
+ /* PS oDepth is incredibly fragile and it's very hard to catch the
+ * types of usage that break it during shader emit. Easier just to
+ * redirect the main program to a temporary and then only touch
+ * oDepth with a hand-crafted MOV below.
+ */
+ if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) {
+
+ if (!submit_op1( emit,
+ inst_token(SVGA3DOP_MOV),
+ emit->true_pos,
+ scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) ))
+ return FALSE;
+ }
+
+ /* Similarly for SM20 color outputs... Luckily SM30 isn't so
+ * fragile.
+ */
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
+
+ if (!submit_op1( emit,
+ inst_token(SVGA3DOP_MOV),
+ emit->true_col[i],
+ src(emit->temp_col[i]) ))
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+static boolean emit_vs_postamble( struct svga_shader_emitter *emit )
+{
+ /* PSIZ output is incredibly fragile and it's very hard to catch
+ * the types of usage that break it during shader emit. Easier
+ * just to redirect the main program to a temporary and then only
+ * touch PSIZ with a hand-crafted MOV below.
+ */
+ if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) {
+
+ if (!submit_op1( emit,
+ inst_token(SVGA3DOP_MOV),
+ emit->true_psiz,
+ scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) ))
+ return FALSE;
+ }
+
+ /* Need to perform various manipulations on vertex position to cope
+ * with the different GL and D3D clip spaces.
+ */
+ if (emit->key.vkey.need_prescale) {
+ SVGA3dShaderDestToken temp_pos = emit->temp_pos;
+ SVGA3dShaderDestToken pos = emit->true_pos;
+ unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
+ struct src_register prescale_scale = src_register( SVGA3DREG_CONST,
+ offset + 0 );
+ struct src_register prescale_trans = src_register( SVGA3DREG_CONST,
+ offset + 1 );
+
+ /* MUL temp_pos.xyz, temp_pos, prescale.scale
+ * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos
+ * --> Note that prescale.trans.w == 0
+ */
+ if (!submit_op2( emit,
+ inst_token(SVGA3DOP_MUL),
+ writemask(temp_pos, TGSI_WRITEMASK_XYZ),
+ src(temp_pos),
+ prescale_scale ))
+ return FALSE;
+
+ if (!submit_op3( emit,
+ inst_token(SVGA3DOP_MAD),
+ pos,
+ swizzle(src(temp_pos), 3, 3, 3, 3),
+ prescale_trans,
+ src(temp_pos)))
+ return FALSE;
+ }
+ else {
+ SVGA3dShaderDestToken temp_pos = emit->temp_pos;
+ SVGA3dShaderDestToken pos = emit->true_pos;
+ struct src_register imm_0055 = emit->imm_0055;
+
+ /* Adjust GL clipping coordinate space to hardware (D3D-style):
+ *
+ * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos
+ * MOV result.position, temp_pos
+ */
+ if (!submit_op2( emit,
+ inst_token(SVGA3DOP_DP4),
+ writemask(temp_pos, TGSI_WRITEMASK_Z),
+ imm_0055,
+ src(temp_pos) ))
+ return FALSE;
+
+ if (!submit_op1( emit,
+ inst_token(SVGA3DOP_MOV),
+ pos,
+ src(temp_pos) ))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+/*
+ 0: IF VFACE :4
+ 1: COLOR = FrontColor;
+ 2: ELSE
+ 3: COLOR = BackColor;
+ 4: ENDIF
+ */
+static boolean emit_light_twoside( struct svga_shader_emitter *emit )
+{
+ struct src_register vface, zero;
+ struct src_register front[2];
+ struct src_register back[2];
+ SVGA3dShaderDestToken color[2];
+ int count = emit->internal_color_count;
+ int i;
+ SVGA3dShaderInstToken if_token;
+
+ if (count == 0)
+ return TRUE;
+
+ vface = get_vface( emit );
+ zero = get_zero_immediate( emit );
+
+ /* Can't use get_temp() to allocate the color reg as such
+ * temporaries will be reclaimed after each instruction by the call
+ * to reset_temp_regs().
+ */
+ for (i = 0; i < count; i++) {
+ color[i] = dst_register( SVGA3DREG_TEMP,
+ emit->nr_hw_temp++ );
+
+ front[i] = emit->input_map[emit->internal_color_idx[i]];
+
+ /* Back is always the next input:
+ */
+ back[i] = front[i];
+ back[i].base.num = front[i].base.num + 1;
+
+ /* Reassign the input_map to the actual front-face color:
+ */
+ emit->input_map[emit->internal_color_idx[i]] = src(color[i]);
+ }
+
+ if_token = inst_token( SVGA3DOP_IFC );
+
+ if (emit->key.fkey.front_cw)
+ if_token.control = SVGA3DOPCOMP_GT;
+ else
+ if_token.control = SVGA3DOPCOMP_LT;
+
+ zero = scalar(zero, TGSI_SWIZZLE_X);
+
+ if (!(emit_instruction( emit, if_token ) &&
+ emit_src( emit, vface ) &&
+ emit_src( emit, zero ) ))
+ return FALSE;
+
+ for (i = 0; i < count; i++) {
+ if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] ))
+ return FALSE;
+ }
+
+ if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE))))
+ return FALSE;
+
+ for (i = 0; i < count; i++) {
+ if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] ))
+ return FALSE;
+ }
+
+ if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) ))
+ return FALSE;
+
+ return TRUE;
+}
+
+/*
+ 0: SETP_GT TEMP, VFACE, 0
+ where TEMP is a fake frontface register
+ */
+static boolean emit_frontface( struct svga_shader_emitter *emit )
+{
+ struct src_register vface, zero;
+ SVGA3dShaderDestToken temp;
+ struct src_register pass, fail;
+
+ vface = get_vface( emit );
+ zero = get_zero_immediate( emit );
+
+ /* Can't use get_temp() to allocate the fake frontface reg as such
+ * temporaries will be reclaimed after each instruction by the call
+ * to reset_temp_regs().
+ */
+ temp = dst_register( SVGA3DREG_TEMP,
+ emit->nr_hw_temp++ );
+
+ if (emit->key.fkey.front_cw) {
+ pass = scalar( zero, TGSI_SWIZZLE_W );
+ fail = scalar( zero, TGSI_SWIZZLE_X );
+ } else {
+ pass = scalar( zero, TGSI_SWIZZLE_X );
+ fail = scalar( zero, TGSI_SWIZZLE_W );
+ }
+
+ if (!emit_conditional(emit, PIPE_FUNC_GREATER,
+ temp, vface, scalar( zero, TGSI_SWIZZLE_X ),
+ pass, fail))
+ return FALSE;
+
+ /* Reassign the input_map to the actual front-face color:
+ */
+ emit->input_map[emit->internal_frontface_idx] = src(temp);
+
+ return TRUE;
+}
+
+static INLINE boolean
+needs_to_create_zero( struct svga_shader_emitter *emit )
+{
+ int i;
+
+ if (emit->unit == PIPE_SHADER_FRAGMENT) {
+ if (!emit->use_sm30)
+ return TRUE;
+
+ if (emit->key.fkey.light_twoside)
+ return TRUE;
+
+ if (emit->emit_frontface)
+ return TRUE;
+
+ if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1)
+ return TRUE;
+ }
+
+ if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_KILP] >= 1)
+ return TRUE;
+
+ for (i = 0; i < emit->key.fkey.num_textures; i++) {
+ if (emit->key.fkey.tex[i].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static INLINE boolean
+needs_to_create_loop_const( struct svga_shader_emitter *emit )
+{
+ return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1);
+}
+
+static INLINE boolean
+needs_to_create_sincos_consts( struct svga_shader_emitter *emit )
+{
+ return !emit->use_sm30 && (emit->info.opcode_count[TGSI_OPCODE_SIN] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_COS] >= 1 ||
+ emit->info.opcode_count[TGSI_OPCODE_SCS] >= 1);
+}
+
+static INLINE boolean
+needs_to_create_arl_consts( struct svga_shader_emitter *emit )
+{
+ return (emit->num_arl_consts > 0);
+}
+
+static INLINE boolean
+pre_parse_add_indirect( struct svga_shader_emitter *emit,
+ int num, int current_arl)
+{
+ int i;
+ assert(num < 0);
+
+ for (i = 0; i < emit->num_arl_consts; ++i) {
+ if (emit->arl_consts[i].arl_num == current_arl)
+ break;
+ }
+ /* new entry */
+ if (emit->num_arl_consts == i) {
+ ++emit->num_arl_consts;
+ }
+ emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ?
+ num :
+ emit->arl_consts[i].number;
+ emit->arl_consts[i].arl_num = current_arl;
+ return TRUE;
+}
+
+static boolean
+pre_parse_instruction( struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn,
+ int current_arl)
+{
+ if (insn->Src[0].Register.Indirect &&
+ insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) {
+ const struct tgsi_full_src_register *reg = &insn->Src[0];
+ if (reg->Register.Index < 0) {
+ pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
+ }
+ }
+
+ if (insn->Src[1].Register.Indirect &&
+ insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) {
+ const struct tgsi_full_src_register *reg = &insn->Src[1];
+ if (reg->Register.Index < 0) {
+ pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
+ }
+ }
+
+ if (insn->Src[2].Register.Indirect &&
+ insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) {
+ const struct tgsi_full_src_register *reg = &insn->Src[2];
+ if (reg->Register.Index < 0) {
+ pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
+ }
+ }
+
+ return TRUE;
+}
+
+static boolean
+pre_parse_tokens( struct svga_shader_emitter *emit,
+ const struct tgsi_token *tokens )
+{
+ struct tgsi_parse_context parse;
+ int current_arl = 0;
+
+ tgsi_parse_init( &parse, tokens );
+
+ while (!tgsi_parse_end_of_tokens( &parse )) {
+ tgsi_parse_token( &parse );
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ if (parse.FullToken.FullInstruction.Instruction.Opcode ==
+ TGSI_OPCODE_ARL) {
+ ++current_arl;
+ }
+ if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction,
+ current_arl ))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+
+ }
+ return TRUE;
+}
+
+static boolean svga_shader_emit_helpers( struct svga_shader_emitter *emit )
+
+{
+ if (needs_to_create_zero( emit )) {
+ create_zero_immediate( emit );
+ }
+ if (needs_to_create_loop_const( emit )) {
+ create_loop_const( emit );
+ }
+ if (needs_to_create_sincos_consts( emit )) {
+ create_sincos_consts( emit );
+ }
+ if (needs_to_create_arl_consts( emit )) {
+ create_arl_consts( emit );
+ }
+
+ if (emit->unit == PIPE_SHADER_FRAGMENT) {
+ if (!emit_ps_preamble( emit ))
+ return FALSE;
+
+ if (emit->key.fkey.light_twoside) {
+ if (!emit_light_twoside( emit ))
+ return FALSE;
+ }
+ if (emit->emit_frontface) {
+ if (!emit_frontface( emit ))
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit,
+ const struct tgsi_token *tokens )
+{
+ struct tgsi_parse_context parse;
+ boolean ret = TRUE;
+ boolean helpers_emitted = FALSE;
+ unsigned line_nr = 0;
+
+ tgsi_parse_init( &parse, tokens );
+ emit->internal_imm_count = 0;
+
+ if (emit->unit == PIPE_SHADER_VERTEX) {
+ ret = emit_vs_preamble( emit );
+ if (!ret)
+ goto done;
+ }
+
+ pre_parse_tokens(emit, tokens);
+
+ while (!tgsi_parse_end_of_tokens( &parse )) {
+ tgsi_parse_token( &parse );
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate );
+ if (!ret)
+ goto done;
+ break;
+
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ if (emit->use_sm30)
+ ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration );
+ else
+ ret = svga_translate_decl_sm20( emit, &parse.FullToken.FullDeclaration );
+ if (!ret)
+ goto done;
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ if (!helpers_emitted) {
+ if (!svga_shader_emit_helpers( emit ))
+ goto done;
+ helpers_emitted = TRUE;
+ }
+ ret = svga_emit_instruction( emit,
+ line_nr++,
+ &parse.FullToken.FullInstruction );
+ if (!ret)
+ goto done;
+ break;
+ default:
+ break;
+ }
+
+ reset_temp_regs( emit );
+ }
+
+ /* Need to terminate the current subroutine. Note that the
+ * hardware doesn't tolerate shaders without sub-routines
+ * terminating with RET+END.
+ */
+ if (!emit->in_main_func) {
+ ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) );
+ if (!ret)
+ goto done;
+ }
+
+ /* Need to terminate the whole shader:
+ */
+ ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) );
+ if (!ret)
+ goto done;
+
+done:
+ assert(ret);
+ tgsi_parse_free( &parse );
+ return ret;
+}
+
diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h
new file mode 100644
index 00000000000..59f299c1858
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_winsys.h
@@ -0,0 +1,299 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * VMware SVGA specific winsys interface.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ *
+ * Documentation taken from the VMware SVGA DDK.
+ */
+
+#ifndef SVGA_WINSYS_H_
+#define SVGA_WINSYS_H_
+
+
+#include "svga_types.h"
+#include "svga_reg.h"
+#include "svga3d_reg.h"
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_defines.h"
+
+
+struct svga_winsys_screen;
+struct svga_winsys_buffer;
+struct pipe_screen;
+struct pipe_context;
+struct pipe_fence_handle;
+struct pipe_texture;
+struct svga_region;
+
+
+#define SVGA_BUFFER_USAGE_PINNED (PIPE_BUFFER_USAGE_CUSTOM << 0)
+#define SVGA_BUFFER_USAGE_WRAPPED (PIPE_BUFFER_USAGE_CUSTOM << 1)
+
+
+/** Opaque surface handle */
+struct svga_winsys_surface;
+
+/** Opaque buffer handle */
+struct svga_winsys_handle;
+
+
+/**
+ * SVGA per-context winsys interface.
+ */
+struct svga_winsys_context
+{
+ void
+ (*destroy)(struct svga_winsys_context *swc);
+
+ void *
+ (*reserve)(struct svga_winsys_context *swc,
+ uint32_t nr_bytes, uint32_t nr_relocs );
+
+ /**
+ * Emit a relocation for a host surface.
+ *
+ * @param flags PIPE_BUFFER_USAGE_GPU_READ/WRITE
+ *
+ * NOTE: Order of this call does matter. It should be the same order
+ * as relocations appear in the command buffer.
+ */
+ void
+ (*surface_relocation)(struct svga_winsys_context *swc,
+ uint32 *sid,
+ struct svga_winsys_surface *surface,
+ unsigned flags);
+
+ /**
+ * Emit a relocation for a guest memory region.
+ *
+ * @param flags PIPE_BUFFER_USAGE_GPU_READ/WRITE
+ *
+ * NOTE: Order of this call does matter. It should be the same order
+ * as relocations appear in the command buffer.
+ */
+ void
+ (*region_relocation)(struct svga_winsys_context *swc,
+ struct SVGAGuestPtr *ptr,
+ struct svga_winsys_buffer *buffer,
+ uint32 offset,
+ unsigned flags);
+
+ void
+ (*commit)(struct svga_winsys_context *swc);
+
+ enum pipe_error
+ (*flush)(struct svga_winsys_context *swc,
+ struct pipe_fence_handle **pfence);
+
+ /**
+ * Context ID used to fill in the commands
+ *
+ * Context IDs are arbitrary small non-negative integers,
+ * global to the entire SVGA device.
+ */
+ uint32 cid;
+};
+
+
+/**
+ * SVGA per-screen winsys interface.
+ */
+struct svga_winsys_screen
+{
+ void
+ (*destroy)(struct svga_winsys_screen *sws);
+
+ boolean
+ (*get_cap)(struct svga_winsys_screen *sws,
+ SVGA3dDevCapIndex index,
+ SVGA3dDevCapResult *result);
+
+ /**
+ * Create a new context.
+ *
+ * Context objects encapsulate all render state, and shader
+ * objects are per-context.
+ *
+ * Surfaces are not per-context. The same surface can be shared
+ * between multiple contexts, and surface operations can occur
+ * without a context.
+ */
+ struct svga_winsys_context *
+ (*context_create)(struct svga_winsys_screen *sws);
+
+
+ /**
+ * This creates a "surface" object in the SVGA3D device,
+ * and returns the surface ID (sid). Surfaces are generic
+ * containers for host VRAM objects like textures, vertex
+ * buffers, and depth/stencil buffers.
+ *
+ * Surfaces are hierarchial:
+ *
+ * - Surface may have multiple faces (for cube maps)
+ *
+ * - Each face has a list of mipmap levels
+ *
+ * - Each mipmap image may have multiple volume
+ * slices, if the image is three dimensional.
+ *
+ * - Each slice is a 2D array of 'blocks'
+ *
+ * - Each block may be one or more pixels.
+ * (Usually 1, more for DXT or YUV formats.)
+ *
+ * Surfaces are generic host VRAM objects. The SVGA3D device
+ * may optimize surfaces according to the format they were
+ * created with, but this format does not limit the ways in
+ * which the surface may be used. For example, a depth surface
+ * can be used as a texture, or a floating point image may
+ * be used as a vertex buffer. Some surface usages may be
+ * lower performance, due to software emulation, but any
+ * usage should work with any surface.
+ */
+ struct svga_winsys_surface *
+ (*surface_create)(struct svga_winsys_screen *sws,
+ SVGA3dSurfaceFlags flags,
+ SVGA3dSurfaceFormat format,
+ SVGA3dSize size,
+ uint32 numFaces,
+ uint32 numMipLevels);
+
+ /**
+ * Whether this surface is sitting in a validate list
+ */
+ boolean
+ (*surface_is_flushed)(struct svga_winsys_screen *sws,
+ struct svga_winsys_surface *surface);
+
+ /**
+ * Reference a SVGA3D surface object. This allows sharing of a
+ * surface between different objects.
+ */
+ void
+ (*surface_reference)(struct svga_winsys_screen *sws,
+ struct svga_winsys_surface **pdst,
+ struct svga_winsys_surface *src);
+
+ /**
+ * Buffer management. Buffer attributes are mostly fixed over its lifetime.
+ *
+ * Remember that gallium gets to choose the interface it needs, and the
+ * window systems must then implement that interface (rather than the
+ * other way around...).
+ *
+ * usage is a bitmask of PIPE_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This
+ * usage argument is only an optimization hint, not a guarantee, therefore
+ * proper behavior must be observed in all circumstances.
+ *
+ * alignment indicates the client's alignment requirements, eg for
+ * SSE instructions.
+ */
+ struct svga_winsys_buffer *
+ (*buffer_create)( struct svga_winsys_screen *sws,
+ unsigned alignment,
+ unsigned usage,
+ unsigned size );
+
+ /**
+ * Map the entire data store of a buffer object into the client's address.
+ * flags is a bitmask of:
+ * - PIPE_BUFFER_USAGE_CPU_READ/WRITE
+ * - PIPE_BUFFER_USAGE_DONTBLOCK
+ * - PIPE_BUFFER_USAGE_UNSYNCHRONIZED
+ */
+ void *
+ (*buffer_map)( struct svga_winsys_screen *sws,
+ struct svga_winsys_buffer *buf,
+ unsigned usage );
+
+ void
+ (*buffer_unmap)( struct svga_winsys_screen *sws,
+ struct svga_winsys_buffer *buf );
+
+ void
+ (*buffer_destroy)( struct svga_winsys_screen *sws,
+ struct svga_winsys_buffer *buf );
+
+
+ /**
+ * Reference a fence object.
+ */
+ void
+ (*fence_reference)( struct svga_winsys_screen *sws,
+ struct pipe_fence_handle **pdst,
+ struct pipe_fence_handle *src );
+
+ /**
+ * Checks whether the fence has been signalled.
+ * \param flags driver-specific meaning
+ * \return zero on success.
+ */
+ int (*fence_signalled)( struct svga_winsys_screen *sws,
+ struct pipe_fence_handle *fence,
+ unsigned flag );
+
+ /**
+ * Wait for the fence to finish.
+ * \param flags driver-specific meaning
+ * \return zero on success.
+ */
+ int (*fence_finish)( struct svga_winsys_screen *sws,
+ struct pipe_fence_handle *fence,
+ unsigned flag );
+
+};
+
+
+struct pipe_context *
+svga_context_create(struct pipe_screen *screen);
+
+struct pipe_screen *
+svga_screen_create(struct svga_winsys_screen *sws);
+
+struct svga_winsys_screen *
+svga_winsys_screen(struct pipe_screen *screen);
+
+struct pipe_buffer *
+svga_screen_buffer_wrap_surface(struct pipe_screen *screen,
+ enum SVGA3dSurfaceFormat format,
+ struct svga_winsys_surface *srf);
+
+struct svga_winsys_surface *
+svga_screen_texture_get_winsys_surface(struct pipe_texture *texture);
+struct svga_winsys_surface *
+svga_screen_buffer_get_winsys_surface(struct pipe_buffer *buffer);
+
+boolean
+svga_screen_buffer_from_texture(struct pipe_texture *texture,
+ struct pipe_buffer **buffer,
+ unsigned *stride);
+
+#endif /* SVGA_WINSYS_H_ */
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c
new file mode 100644
index 00000000000..d59fb89a58c
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.c
@@ -0,0 +1,1784 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * Dump SVGA commands.
+ *
+ * Generated automatically from svga3d_reg.h by svga_dump.py.
+ */
+
+#include "svga_types.h"
+#include "svga_shader_dump.h"
+#include "svga3d_reg.h"
+
+#include "util/u_debug.h"
+#include "svga_dump.h"
+
+static void
+dump_SVGA3dVertexDecl(const SVGA3dVertexDecl *cmd)
+{
+ switch((*cmd).identity.type) {
+ case SVGA3D_DECLTYPE_FLOAT1:
+ _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT1\n");
+ break;
+ case SVGA3D_DECLTYPE_FLOAT2:
+ _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT2\n");
+ break;
+ case SVGA3D_DECLTYPE_FLOAT3:
+ _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT3\n");
+ break;
+ case SVGA3D_DECLTYPE_FLOAT4:
+ _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT4\n");
+ break;
+ case SVGA3D_DECLTYPE_D3DCOLOR:
+ _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_D3DCOLOR\n");
+ break;
+ case SVGA3D_DECLTYPE_UBYTE4:
+ _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4\n");
+ break;
+ case SVGA3D_DECLTYPE_SHORT2:
+ _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2\n");
+ break;
+ case SVGA3D_DECLTYPE_SHORT4:
+ _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4\n");
+ break;
+ case SVGA3D_DECLTYPE_UBYTE4N:
+ _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4N\n");
+ break;
+ case SVGA3D_DECLTYPE_SHORT2N:
+ _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2N\n");
+ break;
+ case SVGA3D_DECLTYPE_SHORT4N:
+ _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4N\n");
+ break;
+ case SVGA3D_DECLTYPE_USHORT2N:
+ _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT2N\n");
+ break;
+ case SVGA3D_DECLTYPE_USHORT4N:
+ _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT4N\n");
+ break;
+ case SVGA3D_DECLTYPE_UDEC3:
+ _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UDEC3\n");
+ break;
+ case SVGA3D_DECLTYPE_DEC3N:
+ _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_DEC3N\n");
+ break;
+ case SVGA3D_DECLTYPE_FLOAT16_2:
+ _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_2\n");
+ break;
+ case SVGA3D_DECLTYPE_FLOAT16_4:
+ _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_4\n");
+ break;
+ case SVGA3D_DECLTYPE_MAX:
+ _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_MAX\n");
+ break;
+ default:
+ _debug_printf("\t\t.identity.type = %i\n", (*cmd).identity.type);
+ break;
+ }
+ switch((*cmd).identity.method) {
+ case SVGA3D_DECLMETHOD_DEFAULT:
+ _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_DEFAULT\n");
+ break;
+ case SVGA3D_DECLMETHOD_PARTIALU:
+ _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALU\n");
+ break;
+ case SVGA3D_DECLMETHOD_PARTIALV:
+ _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALV\n");
+ break;
+ case SVGA3D_DECLMETHOD_CROSSUV:
+ _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_CROSSUV\n");
+ break;
+ case SVGA3D_DECLMETHOD_UV:
+ _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_UV\n");
+ break;
+ case SVGA3D_DECLMETHOD_LOOKUP:
+ _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUP\n");
+ break;
+ case SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED:
+ _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED\n");
+ break;
+ default:
+ _debug_printf("\t\t.identity.method = %i\n", (*cmd).identity.method);
+ break;
+ }
+ switch((*cmd).identity.usage) {
+ case SVGA3D_DECLUSAGE_POSITION:
+ _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITION\n");
+ break;
+ case SVGA3D_DECLUSAGE_BLENDWEIGHT:
+ _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDWEIGHT\n");
+ break;
+ case SVGA3D_DECLUSAGE_BLENDINDICES:
+ _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDINDICES\n");
+ break;
+ case SVGA3D_DECLUSAGE_NORMAL:
+ _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_NORMAL\n");
+ break;
+ case SVGA3D_DECLUSAGE_PSIZE:
+ _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_PSIZE\n");
+ break;
+ case SVGA3D_DECLUSAGE_TEXCOORD:
+ _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TEXCOORD\n");
+ break;
+ case SVGA3D_DECLUSAGE_TANGENT:
+ _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TANGENT\n");
+ break;
+ case SVGA3D_DECLUSAGE_BINORMAL:
+ _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BINORMAL\n");
+ break;
+ case SVGA3D_DECLUSAGE_TESSFACTOR:
+ _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TESSFACTOR\n");
+ break;
+ case SVGA3D_DECLUSAGE_POSITIONT:
+ _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITIONT\n");
+ break;
+ case SVGA3D_DECLUSAGE_COLOR:
+ _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_COLOR\n");
+ break;
+ case SVGA3D_DECLUSAGE_FOG:
+ _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_FOG\n");
+ break;
+ case SVGA3D_DECLUSAGE_DEPTH:
+ _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_DEPTH\n");
+ break;
+ case SVGA3D_DECLUSAGE_SAMPLE:
+ _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_SAMPLE\n");
+ break;
+ case SVGA3D_DECLUSAGE_MAX:
+ _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_MAX\n");
+ break;
+ default:
+ _debug_printf("\t\t.identity.usage = %i\n", (*cmd).identity.usage);
+ break;
+ }
+ _debug_printf("\t\t.identity.usageIndex = %u\n", (*cmd).identity.usageIndex);
+ _debug_printf("\t\t.array.surfaceId = %u\n", (*cmd).array.surfaceId);
+ _debug_printf("\t\t.array.offset = %u\n", (*cmd).array.offset);
+ _debug_printf("\t\t.array.stride = %u\n", (*cmd).array.stride);
+ _debug_printf("\t\t.rangeHint.first = %u\n", (*cmd).rangeHint.first);
+ _debug_printf("\t\t.rangeHint.last = %u\n", (*cmd).rangeHint.last);
+}
+
+static void
+dump_SVGA3dTextureState(const SVGA3dTextureState *cmd)
+{
+ _debug_printf("\t\t.stage = %u\n", (*cmd).stage);
+ switch((*cmd).name) {
+ case SVGA3D_TS_INVALID:
+ _debug_printf("\t\t.name = SVGA3D_TS_INVALID\n");
+ break;
+ case SVGA3D_TS_BIND_TEXTURE:
+ _debug_printf("\t\t.name = SVGA3D_TS_BIND_TEXTURE\n");
+ break;
+ case SVGA3D_TS_COLOROP:
+ _debug_printf("\t\t.name = SVGA3D_TS_COLOROP\n");
+ break;
+ case SVGA3D_TS_COLORARG1:
+ _debug_printf("\t\t.name = SVGA3D_TS_COLORARG1\n");
+ break;
+ case SVGA3D_TS_COLORARG2:
+ _debug_printf("\t\t.name = SVGA3D_TS_COLORARG2\n");
+ break;
+ case SVGA3D_TS_ALPHAOP:
+ _debug_printf("\t\t.name = SVGA3D_TS_ALPHAOP\n");
+ break;
+ case SVGA3D_TS_ALPHAARG1:
+ _debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG1\n");
+ break;
+ case SVGA3D_TS_ALPHAARG2:
+ _debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG2\n");
+ break;
+ case SVGA3D_TS_ADDRESSU:
+ _debug_printf("\t\t.name = SVGA3D_TS_ADDRESSU\n");
+ break;
+ case SVGA3D_TS_ADDRESSV:
+ _debug_printf("\t\t.name = SVGA3D_TS_ADDRESSV\n");
+ break;
+ case SVGA3D_TS_MIPFILTER:
+ _debug_printf("\t\t.name = SVGA3D_TS_MIPFILTER\n");
+ break;
+ case SVGA3D_TS_MAGFILTER:
+ _debug_printf("\t\t.name = SVGA3D_TS_MAGFILTER\n");
+ break;
+ case SVGA3D_TS_MINFILTER:
+ _debug_printf("\t\t.name = SVGA3D_TS_MINFILTER\n");
+ break;
+ case SVGA3D_TS_BORDERCOLOR:
+ _debug_printf("\t\t.name = SVGA3D_TS_BORDERCOLOR\n");
+ break;
+ case SVGA3D_TS_TEXCOORDINDEX:
+ _debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDINDEX\n");
+ break;
+ case SVGA3D_TS_TEXTURETRANSFORMFLAGS:
+ _debug_printf("\t\t.name = SVGA3D_TS_TEXTURETRANSFORMFLAGS\n");
+ break;
+ case SVGA3D_TS_TEXCOORDGEN:
+ _debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDGEN\n");
+ break;
+ case SVGA3D_TS_BUMPENVMAT00:
+ _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT00\n");
+ break;
+ case SVGA3D_TS_BUMPENVMAT01:
+ _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT01\n");
+ break;
+ case SVGA3D_TS_BUMPENVMAT10:
+ _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT10\n");
+ break;
+ case SVGA3D_TS_BUMPENVMAT11:
+ _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT11\n");
+ break;
+ case SVGA3D_TS_TEXTURE_MIPMAP_LEVEL:
+ _debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_MIPMAP_LEVEL\n");
+ break;
+ case SVGA3D_TS_TEXTURE_LOD_BIAS:
+ _debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_LOD_BIAS\n");
+ break;
+ case SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL:
+ _debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL\n");
+ break;
+ case SVGA3D_TS_ADDRESSW:
+ _debug_printf("\t\t.name = SVGA3D_TS_ADDRESSW\n");
+ break;
+ case SVGA3D_TS_GAMMA:
+ _debug_printf("\t\t.name = SVGA3D_TS_GAMMA\n");
+ break;
+ case SVGA3D_TS_BUMPENVLSCALE:
+ _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLSCALE\n");
+ break;
+ case SVGA3D_TS_BUMPENVLOFFSET:
+ _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLOFFSET\n");
+ break;
+ case SVGA3D_TS_COLORARG0:
+ _debug_printf("\t\t.name = SVGA3D_TS_COLORARG0\n");
+ break;
+ case SVGA3D_TS_ALPHAARG0:
+ _debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG0\n");
+ break;
+ case SVGA3D_TS_MAX:
+ _debug_printf("\t\t.name = SVGA3D_TS_MAX\n");
+ break;
+ default:
+ _debug_printf("\t\t.name = %i\n", (*cmd).name);
+ break;
+ }
+ _debug_printf("\t\t.value = %u\n", (*cmd).value);
+ _debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue);
+}
+
+static void
+dump_SVGA3dCopyBox(const SVGA3dCopyBox *cmd)
+{
+ _debug_printf("\t\t.x = %u\n", (*cmd).x);
+ _debug_printf("\t\t.y = %u\n", (*cmd).y);
+ _debug_printf("\t\t.z = %u\n", (*cmd).z);
+ _debug_printf("\t\t.w = %u\n", (*cmd).w);
+ _debug_printf("\t\t.h = %u\n", (*cmd).h);
+ _debug_printf("\t\t.d = %u\n", (*cmd).d);
+ _debug_printf("\t\t.srcx = %u\n", (*cmd).srcx);
+ _debug_printf("\t\t.srcy = %u\n", (*cmd).srcy);
+ _debug_printf("\t\t.srcz = %u\n", (*cmd).srcz);
+}
+
+static void
+dump_SVGA3dCmdSetClipPlane(const SVGA3dCmdSetClipPlane *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+ _debug_printf("\t\t.index = %u\n", (*cmd).index);
+ _debug_printf("\t\t.plane[0] = %f\n", (*cmd).plane[0]);
+ _debug_printf("\t\t.plane[1] = %f\n", (*cmd).plane[1]);
+ _debug_printf("\t\t.plane[2] = %f\n", (*cmd).plane[2]);
+ _debug_printf("\t\t.plane[3] = %f\n", (*cmd).plane[3]);
+}
+
+static void
+dump_SVGA3dCmdWaitForQuery(const SVGA3dCmdWaitForQuery *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+ switch((*cmd).type) {
+ case SVGA3D_QUERYTYPE_OCCLUSION:
+ _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n");
+ break;
+ case SVGA3D_QUERYTYPE_MAX:
+ _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n");
+ break;
+ default:
+ _debug_printf("\t\t.type = %i\n", (*cmd).type);
+ break;
+ }
+ _debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId);
+ _debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset);
+}
+
+static void
+dump_SVGA3dCmdSetRenderTarget(const SVGA3dCmdSetRenderTarget *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+ switch((*cmd).type) {
+ case SVGA3D_RT_DEPTH:
+ _debug_printf("\t\t.type = SVGA3D_RT_DEPTH\n");
+ break;
+ case SVGA3D_RT_STENCIL:
+ _debug_printf("\t\t.type = SVGA3D_RT_STENCIL\n");
+ break;
+ default:
+ _debug_printf("\t\t.type = SVGA3D_RT_COLOR%u\n", (*cmd).type - SVGA3D_RT_COLOR0);
+ break;
+ }
+ _debug_printf("\t\t.target.sid = %u\n", (*cmd).target.sid);
+ _debug_printf("\t\t.target.face = %u\n", (*cmd).target.face);
+ _debug_printf("\t\t.target.mipmap = %u\n", (*cmd).target.mipmap);
+}
+
+static void
+dump_SVGA3dCmdSetTextureState(const SVGA3dCmdSetTextureState *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+}
+
+static void
+dump_SVGA3dCmdSurfaceCopy(const SVGA3dCmdSurfaceCopy *cmd)
+{
+ _debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid);
+ _debug_printf("\t\t.src.face = %u\n", (*cmd).src.face);
+ _debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap);
+ _debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid);
+ _debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face);
+ _debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap);
+}
+
+static void
+dump_SVGA3dCmdSetMaterial(const SVGA3dCmdSetMaterial *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+ switch((*cmd).face) {
+ case SVGA3D_FACE_INVALID:
+ _debug_printf("\t\t.face = SVGA3D_FACE_INVALID\n");
+ break;
+ case SVGA3D_FACE_NONE:
+ _debug_printf("\t\t.face = SVGA3D_FACE_NONE\n");
+ break;
+ case SVGA3D_FACE_FRONT:
+ _debug_printf("\t\t.face = SVGA3D_FACE_FRONT\n");
+ break;
+ case SVGA3D_FACE_BACK:
+ _debug_printf("\t\t.face = SVGA3D_FACE_BACK\n");
+ break;
+ case SVGA3D_FACE_FRONT_BACK:
+ _debug_printf("\t\t.face = SVGA3D_FACE_FRONT_BACK\n");
+ break;
+ case SVGA3D_FACE_MAX:
+ _debug_printf("\t\t.face = SVGA3D_FACE_MAX\n");
+ break;
+ default:
+ _debug_printf("\t\t.face = %i\n", (*cmd).face);
+ break;
+ }
+ _debug_printf("\t\t.material.diffuse[0] = %f\n", (*cmd).material.diffuse[0]);
+ _debug_printf("\t\t.material.diffuse[1] = %f\n", (*cmd).material.diffuse[1]);
+ _debug_printf("\t\t.material.diffuse[2] = %f\n", (*cmd).material.diffuse[2]);
+ _debug_printf("\t\t.material.diffuse[3] = %f\n", (*cmd).material.diffuse[3]);
+ _debug_printf("\t\t.material.ambient[0] = %f\n", (*cmd).material.ambient[0]);
+ _debug_printf("\t\t.material.ambient[1] = %f\n", (*cmd).material.ambient[1]);
+ _debug_printf("\t\t.material.ambient[2] = %f\n", (*cmd).material.ambient[2]);
+ _debug_printf("\t\t.material.ambient[3] = %f\n", (*cmd).material.ambient[3]);
+ _debug_printf("\t\t.material.specular[0] = %f\n", (*cmd).material.specular[0]);
+ _debug_printf("\t\t.material.specular[1] = %f\n", (*cmd).material.specular[1]);
+ _debug_printf("\t\t.material.specular[2] = %f\n", (*cmd).material.specular[2]);
+ _debug_printf("\t\t.material.specular[3] = %f\n", (*cmd).material.specular[3]);
+ _debug_printf("\t\t.material.emissive[0] = %f\n", (*cmd).material.emissive[0]);
+ _debug_printf("\t\t.material.emissive[1] = %f\n", (*cmd).material.emissive[1]);
+ _debug_printf("\t\t.material.emissive[2] = %f\n", (*cmd).material.emissive[2]);
+ _debug_printf("\t\t.material.emissive[3] = %f\n", (*cmd).material.emissive[3]);
+ _debug_printf("\t\t.material.shininess = %f\n", (*cmd).material.shininess);
+}
+
+static void
+dump_SVGA3dCmdSetLightData(const SVGA3dCmdSetLightData *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+ _debug_printf("\t\t.index = %u\n", (*cmd).index);
+ switch((*cmd).data.type) {
+ case SVGA3D_LIGHTTYPE_INVALID:
+ _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_INVALID\n");
+ break;
+ case SVGA3D_LIGHTTYPE_POINT:
+ _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_POINT\n");
+ break;
+ case SVGA3D_LIGHTTYPE_SPOT1:
+ _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT1\n");
+ break;
+ case SVGA3D_LIGHTTYPE_SPOT2:
+ _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT2\n");
+ break;
+ case SVGA3D_LIGHTTYPE_DIRECTIONAL:
+ _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_DIRECTIONAL\n");
+ break;
+ case SVGA3D_LIGHTTYPE_MAX:
+ _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_MAX\n");
+ break;
+ default:
+ _debug_printf("\t\t.data.type = %i\n", (*cmd).data.type);
+ break;
+ }
+ _debug_printf("\t\t.data.inWorldSpace = %u\n", (*cmd).data.inWorldSpace);
+ _debug_printf("\t\t.data.diffuse[0] = %f\n", (*cmd).data.diffuse[0]);
+ _debug_printf("\t\t.data.diffuse[1] = %f\n", (*cmd).data.diffuse[1]);
+ _debug_printf("\t\t.data.diffuse[2] = %f\n", (*cmd).data.diffuse[2]);
+ _debug_printf("\t\t.data.diffuse[3] = %f\n", (*cmd).data.diffuse[3]);
+ _debug_printf("\t\t.data.specular[0] = %f\n", (*cmd).data.specular[0]);
+ _debug_printf("\t\t.data.specular[1] = %f\n", (*cmd).data.specular[1]);
+ _debug_printf("\t\t.data.specular[2] = %f\n", (*cmd).data.specular[2]);
+ _debug_printf("\t\t.data.specular[3] = %f\n", (*cmd).data.specular[3]);
+ _debug_printf("\t\t.data.ambient[0] = %f\n", (*cmd).data.ambient[0]);
+ _debug_printf("\t\t.data.ambient[1] = %f\n", (*cmd).data.ambient[1]);
+ _debug_printf("\t\t.data.ambient[2] = %f\n", (*cmd).data.ambient[2]);
+ _debug_printf("\t\t.data.ambient[3] = %f\n", (*cmd).data.ambient[3]);
+ _debug_printf("\t\t.data.position[0] = %f\n", (*cmd).data.position[0]);
+ _debug_printf("\t\t.data.position[1] = %f\n", (*cmd).data.position[1]);
+ _debug_printf("\t\t.data.position[2] = %f\n", (*cmd).data.position[2]);
+ _debug_printf("\t\t.data.position[3] = %f\n", (*cmd).data.position[3]);
+ _debug_printf("\t\t.data.direction[0] = %f\n", (*cmd).data.direction[0]);
+ _debug_printf("\t\t.data.direction[1] = %f\n", (*cmd).data.direction[1]);
+ _debug_printf("\t\t.data.direction[2] = %f\n", (*cmd).data.direction[2]);
+ _debug_printf("\t\t.data.direction[3] = %f\n", (*cmd).data.direction[3]);
+ _debug_printf("\t\t.data.range = %f\n", (*cmd).data.range);
+ _debug_printf("\t\t.data.falloff = %f\n", (*cmd).data.falloff);
+ _debug_printf("\t\t.data.attenuation0 = %f\n", (*cmd).data.attenuation0);
+ _debug_printf("\t\t.data.attenuation1 = %f\n", (*cmd).data.attenuation1);
+ _debug_printf("\t\t.data.attenuation2 = %f\n", (*cmd).data.attenuation2);
+ _debug_printf("\t\t.data.theta = %f\n", (*cmd).data.theta);
+ _debug_printf("\t\t.data.phi = %f\n", (*cmd).data.phi);
+}
+
+static void
+dump_SVGA3dCmdSetViewport(const SVGA3dCmdSetViewport *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+ _debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x);
+ _debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y);
+ _debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w);
+ _debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h);
+}
+
+static void
+dump_SVGA3dCmdSetScissorRect(const SVGA3dCmdSetScissorRect *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+ _debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x);
+ _debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y);
+ _debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w);
+ _debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h);
+}
+
+static void
+dump_SVGA3dCopyRect(const SVGA3dCopyRect *cmd)
+{
+ _debug_printf("\t\t.x = %u\n", (*cmd).x);
+ _debug_printf("\t\t.y = %u\n", (*cmd).y);
+ _debug_printf("\t\t.w = %u\n", (*cmd).w);
+ _debug_printf("\t\t.h = %u\n", (*cmd).h);
+ _debug_printf("\t\t.srcx = %u\n", (*cmd).srcx);
+ _debug_printf("\t\t.srcy = %u\n", (*cmd).srcy);
+}
+
+static void
+dump_SVGA3dCmdSetShader(const SVGA3dCmdSetShader *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+ switch((*cmd).type) {
+ case SVGA3D_SHADERTYPE_COMPILED_DX8:
+ _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
+ break;
+ case SVGA3D_SHADERTYPE_VS:
+ _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
+ break;
+ case SVGA3D_SHADERTYPE_PS:
+ _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
+ break;
+ case SVGA3D_SHADERTYPE_MAX:
+ _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
+ break;
+ default:
+ _debug_printf("\t\t.type = %i\n", (*cmd).type);
+ break;
+ }
+ _debug_printf("\t\t.shid = %u\n", (*cmd).shid);
+}
+
+static void
+dump_SVGA3dCmdEndQuery(const SVGA3dCmdEndQuery *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+ switch((*cmd).type) {
+ case SVGA3D_QUERYTYPE_OCCLUSION:
+ _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n");
+ break;
+ case SVGA3D_QUERYTYPE_MAX:
+ _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n");
+ break;
+ default:
+ _debug_printf("\t\t.type = %i\n", (*cmd).type);
+ break;
+ }
+ _debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId);
+ _debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset);
+}
+
+static void
+dump_SVGA3dSize(const SVGA3dSize *cmd)
+{
+ _debug_printf("\t\t.width = %u\n", (*cmd).width);
+ _debug_printf("\t\t.height = %u\n", (*cmd).height);
+ _debug_printf("\t\t.depth = %u\n", (*cmd).depth);
+}
+
+static void
+dump_SVGA3dCmdDestroySurface(const SVGA3dCmdDestroySurface *cmd)
+{
+ _debug_printf("\t\t.sid = %u\n", (*cmd).sid);
+}
+
+static void
+dump_SVGA3dCmdDefineContext(const SVGA3dCmdDefineContext *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+}
+
+static void
+dump_SVGA3dRect(const SVGA3dRect *cmd)
+{
+ _debug_printf("\t\t.x = %u\n", (*cmd).x);
+ _debug_printf("\t\t.y = %u\n", (*cmd).y);
+ _debug_printf("\t\t.w = %u\n", (*cmd).w);
+ _debug_printf("\t\t.h = %u\n", (*cmd).h);
+}
+
+static void
+dump_SVGA3dCmdBeginQuery(const SVGA3dCmdBeginQuery *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+ switch((*cmd).type) {
+ case SVGA3D_QUERYTYPE_OCCLUSION:
+ _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n");
+ break;
+ case SVGA3D_QUERYTYPE_MAX:
+ _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n");
+ break;
+ default:
+ _debug_printf("\t\t.type = %i\n", (*cmd).type);
+ break;
+ }
+}
+
+static void
+dump_SVGA3dRenderState(const SVGA3dRenderState *cmd)
+{
+ switch((*cmd).state) {
+ case SVGA3D_RS_INVALID:
+ _debug_printf("\t\t.state = SVGA3D_RS_INVALID\n");
+ break;
+ case SVGA3D_RS_ZENABLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_ZENABLE\n");
+ break;
+ case SVGA3D_RS_ZWRITEENABLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_ZWRITEENABLE\n");
+ break;
+ case SVGA3D_RS_ALPHATESTENABLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_ALPHATESTENABLE\n");
+ break;
+ case SVGA3D_RS_DITHERENABLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_DITHERENABLE\n");
+ break;
+ case SVGA3D_RS_BLENDENABLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_BLENDENABLE\n");
+ break;
+ case SVGA3D_RS_FOGENABLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_FOGENABLE\n");
+ break;
+ case SVGA3D_RS_SPECULARENABLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_SPECULARENABLE\n");
+ break;
+ case SVGA3D_RS_STENCILENABLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE\n");
+ break;
+ case SVGA3D_RS_LIGHTINGENABLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_LIGHTINGENABLE\n");
+ break;
+ case SVGA3D_RS_NORMALIZENORMALS:
+ _debug_printf("\t\t.state = SVGA3D_RS_NORMALIZENORMALS\n");
+ break;
+ case SVGA3D_RS_POINTSPRITEENABLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_POINTSPRITEENABLE\n");
+ break;
+ case SVGA3D_RS_POINTSCALEENABLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_POINTSCALEENABLE\n");
+ break;
+ case SVGA3D_RS_STENCILREF:
+ _debug_printf("\t\t.state = SVGA3D_RS_STENCILREF\n");
+ break;
+ case SVGA3D_RS_STENCILMASK:
+ _debug_printf("\t\t.state = SVGA3D_RS_STENCILMASK\n");
+ break;
+ case SVGA3D_RS_STENCILWRITEMASK:
+ _debug_printf("\t\t.state = SVGA3D_RS_STENCILWRITEMASK\n");
+ break;
+ case SVGA3D_RS_FOGSTART:
+ _debug_printf("\t\t.state = SVGA3D_RS_FOGSTART\n");
+ break;
+ case SVGA3D_RS_FOGEND:
+ _debug_printf("\t\t.state = SVGA3D_RS_FOGEND\n");
+ break;
+ case SVGA3D_RS_FOGDENSITY:
+ _debug_printf("\t\t.state = SVGA3D_RS_FOGDENSITY\n");
+ break;
+ case SVGA3D_RS_POINTSIZE:
+ _debug_printf("\t\t.state = SVGA3D_RS_POINTSIZE\n");
+ break;
+ case SVGA3D_RS_POINTSIZEMIN:
+ _debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMIN\n");
+ break;
+ case SVGA3D_RS_POINTSIZEMAX:
+ _debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMAX\n");
+ break;
+ case SVGA3D_RS_POINTSCALE_A:
+ _debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_A\n");
+ break;
+ case SVGA3D_RS_POINTSCALE_B:
+ _debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_B\n");
+ break;
+ case SVGA3D_RS_POINTSCALE_C:
+ _debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_C\n");
+ break;
+ case SVGA3D_RS_FOGCOLOR:
+ _debug_printf("\t\t.state = SVGA3D_RS_FOGCOLOR\n");
+ break;
+ case SVGA3D_RS_AMBIENT:
+ _debug_printf("\t\t.state = SVGA3D_RS_AMBIENT\n");
+ break;
+ case SVGA3D_RS_CLIPPLANEENABLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_CLIPPLANEENABLE\n");
+ break;
+ case SVGA3D_RS_FOGMODE:
+ _debug_printf("\t\t.state = SVGA3D_RS_FOGMODE\n");
+ break;
+ case SVGA3D_RS_FILLMODE:
+ _debug_printf("\t\t.state = SVGA3D_RS_FILLMODE\n");
+ break;
+ case SVGA3D_RS_SHADEMODE:
+ _debug_printf("\t\t.state = SVGA3D_RS_SHADEMODE\n");
+ break;
+ case SVGA3D_RS_LINEPATTERN:
+ _debug_printf("\t\t.state = SVGA3D_RS_LINEPATTERN\n");
+ break;
+ case SVGA3D_RS_SRCBLEND:
+ _debug_printf("\t\t.state = SVGA3D_RS_SRCBLEND\n");
+ break;
+ case SVGA3D_RS_DSTBLEND:
+ _debug_printf("\t\t.state = SVGA3D_RS_DSTBLEND\n");
+ break;
+ case SVGA3D_RS_BLENDEQUATION:
+ _debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATION\n");
+ break;
+ case SVGA3D_RS_CULLMODE:
+ _debug_printf("\t\t.state = SVGA3D_RS_CULLMODE\n");
+ break;
+ case SVGA3D_RS_ZFUNC:
+ _debug_printf("\t\t.state = SVGA3D_RS_ZFUNC\n");
+ break;
+ case SVGA3D_RS_ALPHAFUNC:
+ _debug_printf("\t\t.state = SVGA3D_RS_ALPHAFUNC\n");
+ break;
+ case SVGA3D_RS_STENCILFUNC:
+ _debug_printf("\t\t.state = SVGA3D_RS_STENCILFUNC\n");
+ break;
+ case SVGA3D_RS_STENCILFAIL:
+ _debug_printf("\t\t.state = SVGA3D_RS_STENCILFAIL\n");
+ break;
+ case SVGA3D_RS_STENCILZFAIL:
+ _debug_printf("\t\t.state = SVGA3D_RS_STENCILZFAIL\n");
+ break;
+ case SVGA3D_RS_STENCILPASS:
+ _debug_printf("\t\t.state = SVGA3D_RS_STENCILPASS\n");
+ break;
+ case SVGA3D_RS_ALPHAREF:
+ _debug_printf("\t\t.state = SVGA3D_RS_ALPHAREF\n");
+ break;
+ case SVGA3D_RS_FRONTWINDING:
+ _debug_printf("\t\t.state = SVGA3D_RS_FRONTWINDING\n");
+ break;
+ case SVGA3D_RS_COORDINATETYPE:
+ _debug_printf("\t\t.state = SVGA3D_RS_COORDINATETYPE\n");
+ break;
+ case SVGA3D_RS_ZBIAS:
+ _debug_printf("\t\t.state = SVGA3D_RS_ZBIAS\n");
+ break;
+ case SVGA3D_RS_RANGEFOGENABLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_RANGEFOGENABLE\n");
+ break;
+ case SVGA3D_RS_COLORWRITEENABLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE\n");
+ break;
+ case SVGA3D_RS_VERTEXMATERIALENABLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_VERTEXMATERIALENABLE\n");
+ break;
+ case SVGA3D_RS_DIFFUSEMATERIALSOURCE:
+ _debug_printf("\t\t.state = SVGA3D_RS_DIFFUSEMATERIALSOURCE\n");
+ break;
+ case SVGA3D_RS_SPECULARMATERIALSOURCE:
+ _debug_printf("\t\t.state = SVGA3D_RS_SPECULARMATERIALSOURCE\n");
+ break;
+ case SVGA3D_RS_AMBIENTMATERIALSOURCE:
+ _debug_printf("\t\t.state = SVGA3D_RS_AMBIENTMATERIALSOURCE\n");
+ break;
+ case SVGA3D_RS_EMISSIVEMATERIALSOURCE:
+ _debug_printf("\t\t.state = SVGA3D_RS_EMISSIVEMATERIALSOURCE\n");
+ break;
+ case SVGA3D_RS_TEXTUREFACTOR:
+ _debug_printf("\t\t.state = SVGA3D_RS_TEXTUREFACTOR\n");
+ break;
+ case SVGA3D_RS_LOCALVIEWER:
+ _debug_printf("\t\t.state = SVGA3D_RS_LOCALVIEWER\n");
+ break;
+ case SVGA3D_RS_SCISSORTESTENABLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_SCISSORTESTENABLE\n");
+ break;
+ case SVGA3D_RS_BLENDCOLOR:
+ _debug_printf("\t\t.state = SVGA3D_RS_BLENDCOLOR\n");
+ break;
+ case SVGA3D_RS_STENCILENABLE2SIDED:
+ _debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE2SIDED\n");
+ break;
+ case SVGA3D_RS_CCWSTENCILFUNC:
+ _debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFUNC\n");
+ break;
+ case SVGA3D_RS_CCWSTENCILFAIL:
+ _debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFAIL\n");
+ break;
+ case SVGA3D_RS_CCWSTENCILZFAIL:
+ _debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILZFAIL\n");
+ break;
+ case SVGA3D_RS_CCWSTENCILPASS:
+ _debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILPASS\n");
+ break;
+ case SVGA3D_RS_VERTEXBLEND:
+ _debug_printf("\t\t.state = SVGA3D_RS_VERTEXBLEND\n");
+ break;
+ case SVGA3D_RS_SLOPESCALEDEPTHBIAS:
+ _debug_printf("\t\t.state = SVGA3D_RS_SLOPESCALEDEPTHBIAS\n");
+ break;
+ case SVGA3D_RS_DEPTHBIAS:
+ _debug_printf("\t\t.state = SVGA3D_RS_DEPTHBIAS\n");
+ break;
+ case SVGA3D_RS_OUTPUTGAMMA:
+ _debug_printf("\t\t.state = SVGA3D_RS_OUTPUTGAMMA\n");
+ break;
+ case SVGA3D_RS_ZVISIBLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_ZVISIBLE\n");
+ break;
+ case SVGA3D_RS_LASTPIXEL:
+ _debug_printf("\t\t.state = SVGA3D_RS_LASTPIXEL\n");
+ break;
+ case SVGA3D_RS_CLIPPING:
+ _debug_printf("\t\t.state = SVGA3D_RS_CLIPPING\n");
+ break;
+ case SVGA3D_RS_WRAP0:
+ _debug_printf("\t\t.state = SVGA3D_RS_WRAP0\n");
+ break;
+ case SVGA3D_RS_WRAP1:
+ _debug_printf("\t\t.state = SVGA3D_RS_WRAP1\n");
+ break;
+ case SVGA3D_RS_WRAP2:
+ _debug_printf("\t\t.state = SVGA3D_RS_WRAP2\n");
+ break;
+ case SVGA3D_RS_WRAP3:
+ _debug_printf("\t\t.state = SVGA3D_RS_WRAP3\n");
+ break;
+ case SVGA3D_RS_WRAP4:
+ _debug_printf("\t\t.state = SVGA3D_RS_WRAP4\n");
+ break;
+ case SVGA3D_RS_WRAP5:
+ _debug_printf("\t\t.state = SVGA3D_RS_WRAP5\n");
+ break;
+ case SVGA3D_RS_WRAP6:
+ _debug_printf("\t\t.state = SVGA3D_RS_WRAP6\n");
+ break;
+ case SVGA3D_RS_WRAP7:
+ _debug_printf("\t\t.state = SVGA3D_RS_WRAP7\n");
+ break;
+ case SVGA3D_RS_WRAP8:
+ _debug_printf("\t\t.state = SVGA3D_RS_WRAP8\n");
+ break;
+ case SVGA3D_RS_WRAP9:
+ _debug_printf("\t\t.state = SVGA3D_RS_WRAP9\n");
+ break;
+ case SVGA3D_RS_WRAP10:
+ _debug_printf("\t\t.state = SVGA3D_RS_WRAP10\n");
+ break;
+ case SVGA3D_RS_WRAP11:
+ _debug_printf("\t\t.state = SVGA3D_RS_WRAP11\n");
+ break;
+ case SVGA3D_RS_WRAP12:
+ _debug_printf("\t\t.state = SVGA3D_RS_WRAP12\n");
+ break;
+ case SVGA3D_RS_WRAP13:
+ _debug_printf("\t\t.state = SVGA3D_RS_WRAP13\n");
+ break;
+ case SVGA3D_RS_WRAP14:
+ _debug_printf("\t\t.state = SVGA3D_RS_WRAP14\n");
+ break;
+ case SVGA3D_RS_WRAP15:
+ _debug_printf("\t\t.state = SVGA3D_RS_WRAP15\n");
+ break;
+ case SVGA3D_RS_MULTISAMPLEANTIALIAS:
+ _debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEANTIALIAS\n");
+ break;
+ case SVGA3D_RS_MULTISAMPLEMASK:
+ _debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEMASK\n");
+ break;
+ case SVGA3D_RS_INDEXEDVERTEXBLENDENABLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_INDEXEDVERTEXBLENDENABLE\n");
+ break;
+ case SVGA3D_RS_TWEENFACTOR:
+ _debug_printf("\t\t.state = SVGA3D_RS_TWEENFACTOR\n");
+ break;
+ case SVGA3D_RS_ANTIALIASEDLINEENABLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_ANTIALIASEDLINEENABLE\n");
+ break;
+ case SVGA3D_RS_COLORWRITEENABLE1:
+ _debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE1\n");
+ break;
+ case SVGA3D_RS_COLORWRITEENABLE2:
+ _debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE2\n");
+ break;
+ case SVGA3D_RS_COLORWRITEENABLE3:
+ _debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE3\n");
+ break;
+ case SVGA3D_RS_SEPARATEALPHABLENDENABLE:
+ _debug_printf("\t\t.state = SVGA3D_RS_SEPARATEALPHABLENDENABLE\n");
+ break;
+ case SVGA3D_RS_SRCBLENDALPHA:
+ _debug_printf("\t\t.state = SVGA3D_RS_SRCBLENDALPHA\n");
+ break;
+ case SVGA3D_RS_DSTBLENDALPHA:
+ _debug_printf("\t\t.state = SVGA3D_RS_DSTBLENDALPHA\n");
+ break;
+ case SVGA3D_RS_BLENDEQUATIONALPHA:
+ _debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATIONALPHA\n");
+ break;
+ case SVGA3D_RS_MAX:
+ _debug_printf("\t\t.state = SVGA3D_RS_MAX\n");
+ break;
+ default:
+ _debug_printf("\t\t.state = %i\n", (*cmd).state);
+ break;
+ }
+ _debug_printf("\t\t.uintValue = %u\n", (*cmd).uintValue);
+ _debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue);
+}
+
+static void
+dump_SVGA3dVertexDivisor(const SVGA3dVertexDivisor *cmd)
+{
+ _debug_printf("\t\t.value = %u\n", (*cmd).value);
+ _debug_printf("\t\t.count = %u\n", (*cmd).count);
+ _debug_printf("\t\t.indexedData = %u\n", (*cmd).indexedData);
+ _debug_printf("\t\t.instanceData = %u\n", (*cmd).instanceData);
+}
+
+static void
+dump_SVGA3dCmdDefineShader(const SVGA3dCmdDefineShader *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+ _debug_printf("\t\t.shid = %u\n", (*cmd).shid);
+ switch((*cmd).type) {
+ case SVGA3D_SHADERTYPE_COMPILED_DX8:
+ _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
+ break;
+ case SVGA3D_SHADERTYPE_VS:
+ _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
+ break;
+ case SVGA3D_SHADERTYPE_PS:
+ _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
+ break;
+ case SVGA3D_SHADERTYPE_MAX:
+ _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
+ break;
+ default:
+ _debug_printf("\t\t.type = %i\n", (*cmd).type);
+ break;
+ }
+}
+
+static void
+dump_SVGA3dCmdSetShaderConst(const SVGA3dCmdSetShaderConst *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+ _debug_printf("\t\t.reg = %u\n", (*cmd).reg);
+ switch((*cmd).type) {
+ case SVGA3D_SHADERTYPE_COMPILED_DX8:
+ _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
+ break;
+ case SVGA3D_SHADERTYPE_VS:
+ _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
+ break;
+ case SVGA3D_SHADERTYPE_PS:
+ _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
+ break;
+ case SVGA3D_SHADERTYPE_MAX:
+ _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
+ break;
+ default:
+ _debug_printf("\t\t.type = %i\n", (*cmd).type);
+ break;
+ }
+ switch((*cmd).ctype) {
+ case SVGA3D_CONST_TYPE_FLOAT:
+ _debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_FLOAT\n");
+ _debug_printf("\t\t.values[0] = %f\n", *(const float *)&(*cmd).values[0]);
+ _debug_printf("\t\t.values[1] = %f\n", *(const float *)&(*cmd).values[1]);
+ _debug_printf("\t\t.values[2] = %f\n", *(const float *)&(*cmd).values[2]);
+ _debug_printf("\t\t.values[3] = %f\n", *(const float *)&(*cmd).values[3]);
+ break;
+ case SVGA3D_CONST_TYPE_INT:
+ _debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_INT\n");
+ _debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]);
+ _debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]);
+ _debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]);
+ _debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]);
+ break;
+ case SVGA3D_CONST_TYPE_BOOL:
+ _debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_BOOL\n");
+ _debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]);
+ _debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]);
+ _debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]);
+ _debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]);
+ break;
+ default:
+ _debug_printf("\t\t.ctype = %i\n", (*cmd).ctype);
+ _debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]);
+ _debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]);
+ _debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]);
+ _debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]);
+ break;
+ }
+}
+
+static void
+dump_SVGA3dCmdSetZRange(const SVGA3dCmdSetZRange *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+ _debug_printf("\t\t.zRange.min = %f\n", (*cmd).zRange.min);
+ _debug_printf("\t\t.zRange.max = %f\n", (*cmd).zRange.max);
+}
+
+static void
+dump_SVGA3dCmdDrawPrimitives(const SVGA3dCmdDrawPrimitives *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+ _debug_printf("\t\t.numVertexDecls = %u\n", (*cmd).numVertexDecls);
+ _debug_printf("\t\t.numRanges = %u\n", (*cmd).numRanges);
+}
+
+static void
+dump_SVGA3dCmdSetLightEnabled(const SVGA3dCmdSetLightEnabled *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+ _debug_printf("\t\t.index = %u\n", (*cmd).index);
+ _debug_printf("\t\t.enabled = %u\n", (*cmd).enabled);
+}
+
+static void
+dump_SVGA3dPrimitiveRange(const SVGA3dPrimitiveRange *cmd)
+{
+ switch((*cmd).primType) {
+ case SVGA3D_PRIMITIVE_INVALID:
+ _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_INVALID\n");
+ break;
+ case SVGA3D_PRIMITIVE_TRIANGLELIST:
+ _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLELIST\n");
+ break;
+ case SVGA3D_PRIMITIVE_POINTLIST:
+ _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_POINTLIST\n");
+ break;
+ case SVGA3D_PRIMITIVE_LINELIST:
+ _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINELIST\n");
+ break;
+ case SVGA3D_PRIMITIVE_LINESTRIP:
+ _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINESTRIP\n");
+ break;
+ case SVGA3D_PRIMITIVE_TRIANGLESTRIP:
+ _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLESTRIP\n");
+ break;
+ case SVGA3D_PRIMITIVE_TRIANGLEFAN:
+ _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLEFAN\n");
+ break;
+ case SVGA3D_PRIMITIVE_MAX:
+ _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_MAX\n");
+ break;
+ default:
+ _debug_printf("\t\t.primType = %i\n", (*cmd).primType);
+ break;
+ }
+ _debug_printf("\t\t.primitiveCount = %u\n", (*cmd).primitiveCount);
+ _debug_printf("\t\t.indexArray.surfaceId = %u\n", (*cmd).indexArray.surfaceId);
+ _debug_printf("\t\t.indexArray.offset = %u\n", (*cmd).indexArray.offset);
+ _debug_printf("\t\t.indexArray.stride = %u\n", (*cmd).indexArray.stride);
+ _debug_printf("\t\t.indexWidth = %u\n", (*cmd).indexWidth);
+ _debug_printf("\t\t.indexBias = %i\n", (*cmd).indexBias);
+}
+
+static void
+dump_SVGA3dCmdPresent(const SVGA3dCmdPresent *cmd)
+{
+ _debug_printf("\t\t.sid = %u\n", (*cmd).sid);
+}
+
+static void
+dump_SVGA3dCmdSetRenderState(const SVGA3dCmdSetRenderState *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+}
+
+static void
+dump_SVGA3dCmdSurfaceStretchBlt(const SVGA3dCmdSurfaceStretchBlt *cmd)
+{
+ _debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid);
+ _debug_printf("\t\t.src.face = %u\n", (*cmd).src.face);
+ _debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap);
+ _debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid);
+ _debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face);
+ _debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap);
+ _debug_printf("\t\t.boxSrc.x = %u\n", (*cmd).boxSrc.x);
+ _debug_printf("\t\t.boxSrc.y = %u\n", (*cmd).boxSrc.y);
+ _debug_printf("\t\t.boxSrc.z = %u\n", (*cmd).boxSrc.z);
+ _debug_printf("\t\t.boxSrc.w = %u\n", (*cmd).boxSrc.w);
+ _debug_printf("\t\t.boxSrc.h = %u\n", (*cmd).boxSrc.h);
+ _debug_printf("\t\t.boxSrc.d = %u\n", (*cmd).boxSrc.d);
+ _debug_printf("\t\t.boxDest.x = %u\n", (*cmd).boxDest.x);
+ _debug_printf("\t\t.boxDest.y = %u\n", (*cmd).boxDest.y);
+ _debug_printf("\t\t.boxDest.z = %u\n", (*cmd).boxDest.z);
+ _debug_printf("\t\t.boxDest.w = %u\n", (*cmd).boxDest.w);
+ _debug_printf("\t\t.boxDest.h = %u\n", (*cmd).boxDest.h);
+ _debug_printf("\t\t.boxDest.d = %u\n", (*cmd).boxDest.d);
+ switch((*cmd).mode) {
+ case SVGA3D_STRETCH_BLT_POINT:
+ _debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_POINT\n");
+ break;
+ case SVGA3D_STRETCH_BLT_LINEAR:
+ _debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_LINEAR\n");
+ break;
+ case SVGA3D_STRETCH_BLT_MAX:
+ _debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_MAX\n");
+ break;
+ default:
+ _debug_printf("\t\t.mode = %i\n", (*cmd).mode);
+ break;
+ }
+}
+
+static void
+dump_SVGA3dCmdSurfaceDMA(const SVGA3dCmdSurfaceDMA *cmd)
+{
+ _debug_printf("\t\t.guest.ptr.gmrId = %u\n", (*cmd).guest.ptr.gmrId);
+ _debug_printf("\t\t.guest.ptr.offset = %u\n", (*cmd).guest.ptr.offset);
+ _debug_printf("\t\t.guest.pitch = %u\n", (*cmd).guest.pitch);
+ _debug_printf("\t\t.host.sid = %u\n", (*cmd).host.sid);
+ _debug_printf("\t\t.host.face = %u\n", (*cmd).host.face);
+ _debug_printf("\t\t.host.mipmap = %u\n", (*cmd).host.mipmap);
+ switch((*cmd).transfer) {
+ case SVGA3D_WRITE_HOST_VRAM:
+ _debug_printf("\t\t.transfer = SVGA3D_WRITE_HOST_VRAM\n");
+ break;
+ case SVGA3D_READ_HOST_VRAM:
+ _debug_printf("\t\t.transfer = SVGA3D_READ_HOST_VRAM\n");
+ break;
+ default:
+ _debug_printf("\t\t.transfer = %i\n", (*cmd).transfer);
+ break;
+ }
+}
+
+static void
+dump_SVGA3dCmdSurfaceDMASuffix(const SVGA3dCmdSurfaceDMASuffix *cmd)
+{
+ _debug_printf("\t\t.suffixSize = %u\n", (*cmd).suffixSize);
+ _debug_printf("\t\t.maximumOffset = %u\n", (*cmd).maximumOffset);
+ _debug_printf("\t\t.flags.discard = %u\n", (*cmd).flags.discard);
+ _debug_printf("\t\t.flags.unsynchronized = %u\n", (*cmd).flags.unsynchronized);
+}
+
+static void
+dump_SVGA3dCmdSetTransform(const SVGA3dCmdSetTransform *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+ switch((*cmd).type) {
+ case SVGA3D_TRANSFORM_INVALID:
+ _debug_printf("\t\t.type = SVGA3D_TRANSFORM_INVALID\n");
+ break;
+ case SVGA3D_TRANSFORM_WORLD:
+ _debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD\n");
+ break;
+ case SVGA3D_TRANSFORM_VIEW:
+ _debug_printf("\t\t.type = SVGA3D_TRANSFORM_VIEW\n");
+ break;
+ case SVGA3D_TRANSFORM_PROJECTION:
+ _debug_printf("\t\t.type = SVGA3D_TRANSFORM_PROJECTION\n");
+ break;
+ case SVGA3D_TRANSFORM_TEXTURE0:
+ _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE0\n");
+ break;
+ case SVGA3D_TRANSFORM_TEXTURE1:
+ _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE1\n");
+ break;
+ case SVGA3D_TRANSFORM_TEXTURE2:
+ _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE2\n");
+ break;
+ case SVGA3D_TRANSFORM_TEXTURE3:
+ _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE3\n");
+ break;
+ case SVGA3D_TRANSFORM_TEXTURE4:
+ _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE4\n");
+ break;
+ case SVGA3D_TRANSFORM_TEXTURE5:
+ _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE5\n");
+ break;
+ case SVGA3D_TRANSFORM_TEXTURE6:
+ _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE6\n");
+ break;
+ case SVGA3D_TRANSFORM_TEXTURE7:
+ _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE7\n");
+ break;
+ case SVGA3D_TRANSFORM_WORLD1:
+ _debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD1\n");
+ break;
+ case SVGA3D_TRANSFORM_WORLD2:
+ _debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD2\n");
+ break;
+ case SVGA3D_TRANSFORM_WORLD3:
+ _debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD3\n");
+ break;
+ case SVGA3D_TRANSFORM_MAX:
+ _debug_printf("\t\t.type = SVGA3D_TRANSFORM_MAX\n");
+ break;
+ default:
+ _debug_printf("\t\t.type = %i\n", (*cmd).type);
+ break;
+ }
+ _debug_printf("\t\t.matrix[0] = %f\n", (*cmd).matrix[0]);
+ _debug_printf("\t\t.matrix[1] = %f\n", (*cmd).matrix[1]);
+ _debug_printf("\t\t.matrix[2] = %f\n", (*cmd).matrix[2]);
+ _debug_printf("\t\t.matrix[3] = %f\n", (*cmd).matrix[3]);
+ _debug_printf("\t\t.matrix[4] = %f\n", (*cmd).matrix[4]);
+ _debug_printf("\t\t.matrix[5] = %f\n", (*cmd).matrix[5]);
+ _debug_printf("\t\t.matrix[6] = %f\n", (*cmd).matrix[6]);
+ _debug_printf("\t\t.matrix[7] = %f\n", (*cmd).matrix[7]);
+ _debug_printf("\t\t.matrix[8] = %f\n", (*cmd).matrix[8]);
+ _debug_printf("\t\t.matrix[9] = %f\n", (*cmd).matrix[9]);
+ _debug_printf("\t\t.matrix[10] = %f\n", (*cmd).matrix[10]);
+ _debug_printf("\t\t.matrix[11] = %f\n", (*cmd).matrix[11]);
+ _debug_printf("\t\t.matrix[12] = %f\n", (*cmd).matrix[12]);
+ _debug_printf("\t\t.matrix[13] = %f\n", (*cmd).matrix[13]);
+ _debug_printf("\t\t.matrix[14] = %f\n", (*cmd).matrix[14]);
+ _debug_printf("\t\t.matrix[15] = %f\n", (*cmd).matrix[15]);
+}
+
+static void
+dump_SVGA3dCmdDestroyShader(const SVGA3dCmdDestroyShader *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+ _debug_printf("\t\t.shid = %u\n", (*cmd).shid);
+ switch((*cmd).type) {
+ case SVGA3D_SHADERTYPE_COMPILED_DX8:
+ _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
+ break;
+ case SVGA3D_SHADERTYPE_VS:
+ _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
+ break;
+ case SVGA3D_SHADERTYPE_PS:
+ _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
+ break;
+ case SVGA3D_SHADERTYPE_MAX:
+ _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
+ break;
+ default:
+ _debug_printf("\t\t.type = %i\n", (*cmd).type);
+ break;
+ }
+}
+
+static void
+dump_SVGA3dCmdDestroyContext(const SVGA3dCmdDestroyContext *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+}
+
+static void
+dump_SVGA3dCmdClear(const SVGA3dCmdClear *cmd)
+{
+ _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+ switch((*cmd).clearFlag) {
+ case SVGA3D_CLEAR_COLOR:
+ _debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_COLOR\n");
+ break;
+ case SVGA3D_CLEAR_DEPTH:
+ _debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_DEPTH\n");
+ break;
+ case SVGA3D_CLEAR_STENCIL:
+ _debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_STENCIL\n");
+ break;
+ default:
+ _debug_printf("\t\t.clearFlag = %i\n", (*cmd).clearFlag);
+ break;
+ }
+ _debug_printf("\t\t.color = %u\n", (*cmd).color);
+ _debug_printf("\t\t.depth = %f\n", (*cmd).depth);
+ _debug_printf("\t\t.stencil = %u\n", (*cmd).stencil);
+}
+
+static void
+dump_SVGA3dCmdDefineSurface(const SVGA3dCmdDefineSurface *cmd)
+{
+ _debug_printf("\t\t.sid = %u\n", (*cmd).sid);
+ switch((*cmd).surfaceFlags) {
+ case SVGA3D_SURFACE_CUBEMAP:
+ _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_CUBEMAP\n");
+ break;
+ case SVGA3D_SURFACE_HINT_STATIC:
+ _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_STATIC\n");
+ break;
+ case SVGA3D_SURFACE_HINT_DYNAMIC:
+ _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_DYNAMIC\n");
+ break;
+ case SVGA3D_SURFACE_HINT_INDEXBUFFER:
+ _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_INDEXBUFFER\n");
+ break;
+ case SVGA3D_SURFACE_HINT_VERTEXBUFFER:
+ _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_VERTEXBUFFER\n");
+ break;
+ default:
+ _debug_printf("\t\t.surfaceFlags = %i\n", (*cmd).surfaceFlags);
+ break;
+ }
+ switch((*cmd).format) {
+ case SVGA3D_FORMAT_INVALID:
+ _debug_printf("\t\t.format = SVGA3D_FORMAT_INVALID\n");
+ break;
+ case SVGA3D_X8R8G8B8:
+ _debug_printf("\t\t.format = SVGA3D_X8R8G8B8\n");
+ break;
+ case SVGA3D_A8R8G8B8:
+ _debug_printf("\t\t.format = SVGA3D_A8R8G8B8\n");
+ break;
+ case SVGA3D_R5G6B5:
+ _debug_printf("\t\t.format = SVGA3D_R5G6B5\n");
+ break;
+ case SVGA3D_X1R5G5B5:
+ _debug_printf("\t\t.format = SVGA3D_X1R5G5B5\n");
+ break;
+ case SVGA3D_A1R5G5B5:
+ _debug_printf("\t\t.format = SVGA3D_A1R5G5B5\n");
+ break;
+ case SVGA3D_A4R4G4B4:
+ _debug_printf("\t\t.format = SVGA3D_A4R4G4B4\n");
+ break;
+ case SVGA3D_Z_D32:
+ _debug_printf("\t\t.format = SVGA3D_Z_D32\n");
+ break;
+ case SVGA3D_Z_D16:
+ _debug_printf("\t\t.format = SVGA3D_Z_D16\n");
+ break;
+ case SVGA3D_Z_D24S8:
+ _debug_printf("\t\t.format = SVGA3D_Z_D24S8\n");
+ break;
+ case SVGA3D_Z_D15S1:
+ _debug_printf("\t\t.format = SVGA3D_Z_D15S1\n");
+ break;
+ case SVGA3D_LUMINANCE8:
+ _debug_printf("\t\t.format = SVGA3D_LUMINANCE8\n");
+ break;
+ case SVGA3D_LUMINANCE4_ALPHA4:
+ _debug_printf("\t\t.format = SVGA3D_LUMINANCE4_ALPHA4\n");
+ break;
+ case SVGA3D_LUMINANCE16:
+ _debug_printf("\t\t.format = SVGA3D_LUMINANCE16\n");
+ break;
+ case SVGA3D_LUMINANCE8_ALPHA8:
+ _debug_printf("\t\t.format = SVGA3D_LUMINANCE8_ALPHA8\n");
+ break;
+ case SVGA3D_DXT1:
+ _debug_printf("\t\t.format = SVGA3D_DXT1\n");
+ break;
+ case SVGA3D_DXT2:
+ _debug_printf("\t\t.format = SVGA3D_DXT2\n");
+ break;
+ case SVGA3D_DXT3:
+ _debug_printf("\t\t.format = SVGA3D_DXT3\n");
+ break;
+ case SVGA3D_DXT4:
+ _debug_printf("\t\t.format = SVGA3D_DXT4\n");
+ break;
+ case SVGA3D_DXT5:
+ _debug_printf("\t\t.format = SVGA3D_DXT5\n");
+ break;
+ case SVGA3D_BUMPU8V8:
+ _debug_printf("\t\t.format = SVGA3D_BUMPU8V8\n");
+ break;
+ case SVGA3D_BUMPL6V5U5:
+ _debug_printf("\t\t.format = SVGA3D_BUMPL6V5U5\n");
+ break;
+ case SVGA3D_BUMPX8L8V8U8:
+ _debug_printf("\t\t.format = SVGA3D_BUMPX8L8V8U8\n");
+ break;
+ case SVGA3D_BUMPL8V8U8:
+ _debug_printf("\t\t.format = SVGA3D_BUMPL8V8U8\n");
+ break;
+ case SVGA3D_ARGB_S10E5:
+ _debug_printf("\t\t.format = SVGA3D_ARGB_S10E5\n");
+ break;
+ case SVGA3D_ARGB_S23E8:
+ _debug_printf("\t\t.format = SVGA3D_ARGB_S23E8\n");
+ break;
+ case SVGA3D_A2R10G10B10:
+ _debug_printf("\t\t.format = SVGA3D_A2R10G10B10\n");
+ break;
+ case SVGA3D_V8U8:
+ _debug_printf("\t\t.format = SVGA3D_V8U8\n");
+ break;
+ case SVGA3D_Q8W8V8U8:
+ _debug_printf("\t\t.format = SVGA3D_Q8W8V8U8\n");
+ break;
+ case SVGA3D_CxV8U8:
+ _debug_printf("\t\t.format = SVGA3D_CxV8U8\n");
+ break;
+ case SVGA3D_X8L8V8U8:
+ _debug_printf("\t\t.format = SVGA3D_X8L8V8U8\n");
+ break;
+ case SVGA3D_A2W10V10U10:
+ _debug_printf("\t\t.format = SVGA3D_A2W10V10U10\n");
+ break;
+ case SVGA3D_ALPHA8:
+ _debug_printf("\t\t.format = SVGA3D_ALPHA8\n");
+ break;
+ case SVGA3D_R_S10E5:
+ _debug_printf("\t\t.format = SVGA3D_R_S10E5\n");
+ break;
+ case SVGA3D_R_S23E8:
+ _debug_printf("\t\t.format = SVGA3D_R_S23E8\n");
+ break;
+ case SVGA3D_RG_S10E5:
+ _debug_printf("\t\t.format = SVGA3D_RG_S10E5\n");
+ break;
+ case SVGA3D_RG_S23E8:
+ _debug_printf("\t\t.format = SVGA3D_RG_S23E8\n");
+ break;
+ case SVGA3D_BUFFER:
+ _debug_printf("\t\t.format = SVGA3D_BUFFER\n");
+ break;
+ case SVGA3D_Z_D24X8:
+ _debug_printf("\t\t.format = SVGA3D_Z_D24X8\n");
+ break;
+ case SVGA3D_FORMAT_MAX:
+ _debug_printf("\t\t.format = SVGA3D_FORMAT_MAX\n");
+ break;
+ default:
+ _debug_printf("\t\t.format = %i\n", (*cmd).format);
+ break;
+ }
+ _debug_printf("\t\t.face[0].numMipLevels = %u\n", (*cmd).face[0].numMipLevels);
+ _debug_printf("\t\t.face[1].numMipLevels = %u\n", (*cmd).face[1].numMipLevels);
+ _debug_printf("\t\t.face[2].numMipLevels = %u\n", (*cmd).face[2].numMipLevels);
+ _debug_printf("\t\t.face[3].numMipLevels = %u\n", (*cmd).face[3].numMipLevels);
+ _debug_printf("\t\t.face[4].numMipLevels = %u\n", (*cmd).face[4].numMipLevels);
+ _debug_printf("\t\t.face[5].numMipLevels = %u\n", (*cmd).face[5].numMipLevels);
+}
+
+static void
+dump_SVGASignedRect(const SVGASignedRect *cmd)
+{
+ _debug_printf("\t\t.left = %i\n", (*cmd).left);
+ _debug_printf("\t\t.top = %i\n", (*cmd).top);
+ _debug_printf("\t\t.right = %i\n", (*cmd).right);
+ _debug_printf("\t\t.bottom = %i\n", (*cmd).bottom);
+}
+
+static void
+dump_SVGA3dCmdBlitSurfaceToScreen(const SVGA3dCmdBlitSurfaceToScreen *cmd)
+{
+ _debug_printf("\t\t.srcImage.sid = %u\n", (*cmd).srcImage.sid);
+ _debug_printf("\t\t.srcImage.face = %u\n", (*cmd).srcImage.face);
+ _debug_printf("\t\t.srcImage.mipmap = %u\n", (*cmd).srcImage.mipmap);
+ _debug_printf("\t\t.srcRect.left = %i\n", (*cmd).srcRect.left);
+ _debug_printf("\t\t.srcRect.top = %i\n", (*cmd).srcRect.top);
+ _debug_printf("\t\t.srcRect.right = %i\n", (*cmd).srcRect.right);
+ _debug_printf("\t\t.srcRect.bottom = %i\n", (*cmd).srcRect.bottom);
+ _debug_printf("\t\t.destScreenId = %u\n", (*cmd).destScreenId);
+ _debug_printf("\t\t.destRect.left = %i\n", (*cmd).destRect.left);
+ _debug_printf("\t\t.destRect.top = %i\n", (*cmd).destRect.top);
+ _debug_printf("\t\t.destRect.right = %i\n", (*cmd).destRect.right);
+ _debug_printf("\t\t.destRect.bottom = %i\n", (*cmd).destRect.bottom);
+}
+
+
+void
+svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size)
+{
+ const uint8_t *body = (const uint8_t *)data;
+ const uint8_t *next = body + size;
+
+ switch(cmd_id) {
+ case SVGA_3D_CMD_SURFACE_DEFINE:
+ _debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n");
+ {
+ const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body;
+ dump_SVGA3dCmdDefineSurface(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dSize) <= next) {
+ dump_SVGA3dSize((const SVGA3dSize *)body);
+ body += sizeof(SVGA3dSize);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_SURFACE_DESTROY:
+ _debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n");
+ {
+ const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body;
+ dump_SVGA3dCmdDestroySurface(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SURFACE_COPY:
+ _debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n");
+ {
+ const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body;
+ dump_SVGA3dCmdSurfaceCopy(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dCopyBox) <= next) {
+ dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body);
+ body += sizeof(SVGA3dCopyBox);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_SURFACE_STRETCHBLT:
+ _debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n");
+ {
+ const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body;
+ dump_SVGA3dCmdSurfaceStretchBlt(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SURFACE_DMA:
+ _debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n");
+ {
+ const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body;
+ dump_SVGA3dCmdSurfaceDMA(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dCopyBox) <= next) {
+ dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body);
+ body += sizeof(SVGA3dCopyBox);
+ }
+ while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) {
+ dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body);
+ body += sizeof(SVGA3dCmdSurfaceDMASuffix);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_CONTEXT_DEFINE:
+ _debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n");
+ {
+ const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body;
+ dump_SVGA3dCmdDefineContext(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_CONTEXT_DESTROY:
+ _debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n");
+ {
+ const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body;
+ dump_SVGA3dCmdDestroyContext(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETTRANSFORM:
+ _debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n");
+ {
+ const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body;
+ dump_SVGA3dCmdSetTransform(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETZRANGE:
+ _debug_printf("\tSVGA_3D_CMD_SETZRANGE\n");
+ {
+ const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body;
+ dump_SVGA3dCmdSetZRange(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETRENDERSTATE:
+ _debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n");
+ {
+ const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body;
+ dump_SVGA3dCmdSetRenderState(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dRenderState) <= next) {
+ dump_SVGA3dRenderState((const SVGA3dRenderState *)body);
+ body += sizeof(SVGA3dRenderState);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_SETRENDERTARGET:
+ _debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n");
+ {
+ const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body;
+ dump_SVGA3dCmdSetRenderTarget(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETTEXTURESTATE:
+ _debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n");
+ {
+ const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body;
+ dump_SVGA3dCmdSetTextureState(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dTextureState) <= next) {
+ dump_SVGA3dTextureState((const SVGA3dTextureState *)body);
+ body += sizeof(SVGA3dTextureState);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_SETMATERIAL:
+ _debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n");
+ {
+ const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body;
+ dump_SVGA3dCmdSetMaterial(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETLIGHTDATA:
+ _debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n");
+ {
+ const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body;
+ dump_SVGA3dCmdSetLightData(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETLIGHTENABLED:
+ _debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n");
+ {
+ const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body;
+ dump_SVGA3dCmdSetLightEnabled(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETVIEWPORT:
+ _debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n");
+ {
+ const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body;
+ dump_SVGA3dCmdSetViewport(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SETCLIPPLANE:
+ _debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n");
+ {
+ const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body;
+ dump_SVGA3dCmdSetClipPlane(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_CLEAR:
+ _debug_printf("\tSVGA_3D_CMD_CLEAR\n");
+ {
+ const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body;
+ dump_SVGA3dCmdClear(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dRect) <= next) {
+ dump_SVGA3dRect((const SVGA3dRect *)body);
+ body += sizeof(SVGA3dRect);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_PRESENT:
+ _debug_printf("\tSVGA_3D_CMD_PRESENT\n");
+ {
+ const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body;
+ dump_SVGA3dCmdPresent(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGA3dCopyRect) <= next) {
+ dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body);
+ body += sizeof(SVGA3dCopyRect);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_SHADER_DEFINE:
+ _debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n");
+ {
+ const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body;
+ dump_SVGA3dCmdDefineShader(cmd);
+ body = (const uint8_t *)&cmd[1];
+ svga_shader_dump((const uint32_t *)body,
+ (unsigned)(next - body)/sizeof(uint32_t),
+ FALSE );
+ body = next;
+ }
+ break;
+ case SVGA_3D_CMD_SHADER_DESTROY:
+ _debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n");
+ {
+ const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body;
+ dump_SVGA3dCmdDestroyShader(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SET_SHADER:
+ _debug_printf("\tSVGA_3D_CMD_SET_SHADER\n");
+ {
+ const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body;
+ dump_SVGA3dCmdSetShader(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_SET_SHADER_CONST:
+ _debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n");
+ {
+ const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body;
+ dump_SVGA3dCmdSetShaderConst(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_DRAW_PRIMITIVES:
+ _debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n");
+ {
+ const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body;
+ unsigned i, j;
+ dump_SVGA3dCmdDrawPrimitives(cmd);
+ body = (const uint8_t *)&cmd[1];
+ for(i = 0; i < cmd->numVertexDecls; ++i) {
+ dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body);
+ body += sizeof(SVGA3dVertexDecl);
+ }
+ for(j = 0; j < cmd->numRanges; ++j) {
+ dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body);
+ body += sizeof(SVGA3dPrimitiveRange);
+ }
+ while(body + sizeof(SVGA3dVertexDivisor) <= next) {
+ dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body);
+ body += sizeof(SVGA3dVertexDivisor);
+ }
+ }
+ break;
+ case SVGA_3D_CMD_SETSCISSORRECT:
+ _debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n");
+ {
+ const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body;
+ dump_SVGA3dCmdSetScissorRect(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_BEGIN_QUERY:
+ _debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n");
+ {
+ const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body;
+ dump_SVGA3dCmdBeginQuery(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_END_QUERY:
+ _debug_printf("\tSVGA_3D_CMD_END_QUERY\n");
+ {
+ const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body;
+ dump_SVGA3dCmdEndQuery(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_WAIT_FOR_QUERY:
+ _debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n");
+ {
+ const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body;
+ dump_SVGA3dCmdWaitForQuery(cmd);
+ body = (const uint8_t *)&cmd[1];
+ }
+ break;
+ case SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN:
+ _debug_printf("\tSVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN\n");
+ {
+ const SVGA3dCmdBlitSurfaceToScreen *cmd = (const SVGA3dCmdBlitSurfaceToScreen *)body;
+ dump_SVGA3dCmdBlitSurfaceToScreen(cmd);
+ body = (const uint8_t *)&cmd[1];
+ while(body + sizeof(SVGASignedRect) <= next) {
+ dump_SVGASignedRect((const SVGASignedRect *)body);
+ body += sizeof(SVGASignedRect);
+ }
+ }
+ break;
+ default:
+ _debug_printf("\t0x%08x\n", cmd_id);
+ break;
+ }
+
+ while(body + sizeof(uint32_t) <= next) {
+ _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
+ body += sizeof(uint32_t);
+ }
+ while(body + sizeof(uint32_t) <= next)
+ _debug_printf("\t\t0x%02x\n", *body++);
+}
+
+
+void
+svga_dump_commands(const void *commands, uint32_t size)
+{
+ const uint8_t *next = commands;
+ const uint8_t *last = next + size;
+
+ assert(size % sizeof(uint32_t) == 0);
+
+ while(next < last) {
+ const uint32_t cmd_id = *(const uint32_t *)next;
+
+ if(SVGA_3D_CMD_BASE <= cmd_id && cmd_id < SVGA_3D_CMD_MAX) {
+ const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next;
+ const uint8_t *body = (const uint8_t *)&header[1];
+
+ next = body + header->size;
+ if(next > last)
+ break;
+
+ svga_dump_command(cmd_id, body, header->size);
+ }
+ else if(cmd_id == SVGA_CMD_FENCE) {
+ _debug_printf("\tSVGA_CMD_FENCE\n");
+ _debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]);
+ next += 2*sizeof(uint32_t);
+ }
+ else {
+ _debug_printf("\t0x%08x\n", cmd_id);
+ next += sizeof(uint32_t);
+ }
+ }
+}
+
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.h b/src/gallium/drivers/svga/svgadump/svga_dump.h
new file mode 100644
index 00000000000..ca0154361cc
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.h
@@ -0,0 +1,37 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_DUMP_H_
+#define SVGA_DUMP_H_
+
+#include "pipe/p_compiler.h"
+
+void
+svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size);
+
+void
+svga_dump_commands(const void *commands, uint32_t size);
+
+#endif /* SVGA_DUMP_H_ */
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py
new file mode 100755
index 00000000000..0bc0b3ae317
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.py
@@ -0,0 +1,340 @@
+#!/usr/bin/env python
+'''
+Generates dumper for the SVGA 3D command stream using pygccxml.
+
+Jose Fonseca <jfonseca@vmware.com>
+'''
+
+copyright = '''
+/**********************************************************
+ * Copyright 2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+ '''
+
+import os
+import sys
+
+from pygccxml import parser
+from pygccxml import declarations
+
+from pygccxml.declarations import algorithm
+from pygccxml.declarations import decl_visitor
+from pygccxml.declarations import type_traits
+from pygccxml.declarations import type_visitor
+
+
+enums = True
+
+
+class decl_dumper_t(decl_visitor.decl_visitor_t):
+
+ def __init__(self, instance = '', decl = None):
+ decl_visitor.decl_visitor_t.__init__(self)
+ self._instance = instance
+ self.decl = decl
+
+ def clone(self):
+ return decl_dumper_t(self._instance, self.decl)
+
+ def visit_class(self):
+ class_ = self.decl
+ assert self.decl.class_type in ('struct', 'union')
+
+ for variable in class_.variables():
+ if variable.name != '':
+ #print 'variable = %r' % variable.name
+ dump_type(self._instance + '.' + variable.name, variable.type)
+
+ def visit_enumeration(self):
+ if enums:
+ print ' switch(%s) {' % ("(*cmd)" + self._instance,)
+ for name, value in self.decl.values:
+ print ' case %s:' % (name,)
+ print ' _debug_printf("\\t\\t%s = %s\\n");' % (self._instance, name)
+ print ' break;'
+ print ' default:'
+ print ' _debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance)
+ print ' break;'
+ print ' }'
+ else:
+ print ' _debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance)
+
+
+def dump_decl(instance, decl):
+ dumper = decl_dumper_t(instance, decl)
+ algorithm.apply_visitor(dumper, decl)
+
+
+class type_dumper_t(type_visitor.type_visitor_t):
+
+ def __init__(self, instance, type_):
+ type_visitor.type_visitor_t.__init__(self)
+ self.instance = instance
+ self.type = type_
+
+ def clone(self):
+ return type_dumper_t(self.instance, self.type)
+
+ def visit_char(self):
+ self.print_instance('%i')
+
+ def visit_unsigned_char(self):
+ self.print_instance('%u')
+
+ def visit_signed_char(self):
+ self.print_instance('%i')
+
+ def visit_wchar(self):
+ self.print_instance('%i')
+
+ def visit_short_int(self):
+ self.print_instance('%i')
+
+ def visit_short_unsigned_int(self):
+ self.print_instance('%u')
+
+ def visit_bool(self):
+ self.print_instance('%i')
+
+ def visit_int(self):
+ self.print_instance('%i')
+
+ def visit_unsigned_int(self):
+ self.print_instance('%u')
+
+ def visit_long_int(self):
+ self.print_instance('%li')
+
+ def visit_long_unsigned_int(self):
+ self.print_instance('%lu')
+
+ def visit_long_long_int(self):
+ self.print_instance('%lli')
+
+ def visit_long_long_unsigned_int(self):
+ self.print_instance('%llu')
+
+ def visit_float(self):
+ self.print_instance('%f')
+
+ def visit_double(self):
+ self.print_instance('%f')
+
+ def visit_array(self):
+ for i in range(type_traits.array_size(self.type)):
+ dump_type(self.instance + '[%i]' % i, type_traits.base_type(self.type))
+
+ def visit_pointer(self):
+ self.print_instance('%p')
+
+ def visit_declarated(self):
+ #print 'decl = %r' % self.type.decl_string
+ decl = type_traits.remove_declarated(self.type)
+ dump_decl(self.instance, decl)
+
+ def print_instance(self, format):
+ print ' _debug_printf("\\t\\t%s = %s\\n", %s);' % (self.instance, format, "(*cmd)" + self.instance)
+
+
+def dump_type(instance, type_):
+ type_ = type_traits.remove_alias(type_)
+ visitor = type_dumper_t(instance, type_)
+ algorithm.apply_visitor(visitor, type_)
+
+
+def dump_struct(decls, class_):
+ print 'static void'
+ print 'dump_%s(const %s *cmd)' % (class_.name, class_.name)
+ print '{'
+ dump_decl('', class_)
+ print '}'
+ print ''
+
+
+cmds = [
+ ('SVGA_3D_CMD_SURFACE_DEFINE', 'SVGA3dCmdDefineSurface', (), 'SVGA3dSize'),
+ ('SVGA_3D_CMD_SURFACE_DESTROY', 'SVGA3dCmdDestroySurface', (), None),
+ ('SVGA_3D_CMD_SURFACE_COPY', 'SVGA3dCmdSurfaceCopy', (), 'SVGA3dCopyBox'),
+ ('SVGA_3D_CMD_SURFACE_STRETCHBLT', 'SVGA3dCmdSurfaceStretchBlt', (), None),
+ ('SVGA_3D_CMD_SURFACE_DMA', 'SVGA3dCmdSurfaceDMA', (), 'SVGA3dCopyBox'),
+ ('SVGA_3D_CMD_CONTEXT_DEFINE', 'SVGA3dCmdDefineContext', (), None),
+ ('SVGA_3D_CMD_CONTEXT_DESTROY', 'SVGA3dCmdDestroyContext', (), None),
+ ('SVGA_3D_CMD_SETTRANSFORM', 'SVGA3dCmdSetTransform', (), None),
+ ('SVGA_3D_CMD_SETZRANGE', 'SVGA3dCmdSetZRange', (), None),
+ ('SVGA_3D_CMD_SETRENDERSTATE', 'SVGA3dCmdSetRenderState', (), 'SVGA3dRenderState'),
+ ('SVGA_3D_CMD_SETRENDERTARGET', 'SVGA3dCmdSetRenderTarget', (), None),
+ ('SVGA_3D_CMD_SETTEXTURESTATE', 'SVGA3dCmdSetTextureState', (), 'SVGA3dTextureState'),
+ ('SVGA_3D_CMD_SETMATERIAL', 'SVGA3dCmdSetMaterial', (), None),
+ ('SVGA_3D_CMD_SETLIGHTDATA', 'SVGA3dCmdSetLightData', (), None),
+ ('SVGA_3D_CMD_SETLIGHTENABLED', 'SVGA3dCmdSetLightEnabled', (), None),
+ ('SVGA_3D_CMD_SETVIEWPORT', 'SVGA3dCmdSetViewport', (), None),
+ ('SVGA_3D_CMD_SETCLIPPLANE', 'SVGA3dCmdSetClipPlane', (), None),
+ ('SVGA_3D_CMD_CLEAR', 'SVGA3dCmdClear', (), 'SVGA3dRect'),
+ ('SVGA_3D_CMD_PRESENT', 'SVGA3dCmdPresent', (), 'SVGA3dCopyRect'),
+ ('SVGA_3D_CMD_SHADER_DEFINE', 'SVGA3dCmdDefineShader', (), None),
+ ('SVGA_3D_CMD_SHADER_DESTROY', 'SVGA3dCmdDestroyShader', (), None),
+ ('SVGA_3D_CMD_SET_SHADER', 'SVGA3dCmdSetShader', (), None),
+ ('SVGA_3D_CMD_SET_SHADER_CONST', 'SVGA3dCmdSetShaderConst', (), None),
+ ('SVGA_3D_CMD_DRAW_PRIMITIVES', 'SVGA3dCmdDrawPrimitives', (('SVGA3dVertexDecl', 'numVertexDecls'), ('SVGA3dPrimitiveRange', 'numRanges')), 'SVGA3dVertexDivisor'),
+ ('SVGA_3D_CMD_SETSCISSORRECT', 'SVGA3dCmdSetScissorRect', (), None),
+ ('SVGA_3D_CMD_BEGIN_QUERY', 'SVGA3dCmdBeginQuery', (), None),
+ ('SVGA_3D_CMD_END_QUERY', 'SVGA3dCmdEndQuery', (), None),
+ ('SVGA_3D_CMD_WAIT_FOR_QUERY', 'SVGA3dCmdWaitForQuery', (), None),
+ #('SVGA_3D_CMD_PRESENT_READBACK', None, (), None),
+ ('SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN', 'SVGA3dCmdBlitSurfaceToScreen', (), 'SVGASignedRect'),
+]
+
+def dump_cmds():
+ print r'''
+void
+svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size)
+{
+ const uint8_t *body = (const uint8_t *)data;
+ const uint8_t *next = body + size;
+'''
+ print ' switch(cmd_id) {'
+ indexes = 'ijklmn'
+ for id, header, body, footer in cmds:
+ print ' case %s:' % id
+ print ' _debug_printf("\\t%s\\n");' % id
+ print ' {'
+ print ' const %s *cmd = (const %s *)body;' % (header, header)
+ if len(body):
+ print ' unsigned ' + ', '.join(indexes[:len(body)]) + ';'
+ print ' dump_%s(cmd);' % header
+ print ' body = (const uint8_t *)&cmd[1];'
+ for i in range(len(body)):
+ struct, count = body[i]
+ idx = indexes[i]
+ print ' for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx)
+ print ' dump_%s((const %s *)body);' % (struct, struct)
+ print ' body += sizeof(%s);' % struct
+ print ' }'
+ if footer is not None:
+ print ' while(body + sizeof(%s) <= next) {' % footer
+ print ' dump_%s((const %s *)body);' % (footer, footer)
+ print ' body += sizeof(%s);' % footer
+ print ' }'
+ if id == 'SVGA_3D_CMD_SHADER_DEFINE':
+ print ' svga_shader_dump((const uint32_t *)body,'
+ print ' (unsigned)(next - body)/sizeof(uint32_t),'
+ print ' FALSE);'
+ print ' body = next;'
+ print ' }'
+ print ' break;'
+ print ' default:'
+ print ' _debug_printf("\\t0x%08x\\n", cmd_id);'
+ print ' break;'
+ print ' }'
+ print r'''
+ while(body + sizeof(uint32_t) <= next) {
+ _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
+ body += sizeof(uint32_t);
+ }
+ while(body + sizeof(uint32_t) <= next)
+ _debug_printf("\t\t0x%02x\n", *body++);
+}
+'''
+ print r'''
+void
+svga_dump_commands(const void *commands, uint32_t size)
+{
+ const uint8_t *next = commands;
+ const uint8_t *last = next + size;
+
+ assert(size % sizeof(uint32_t) == 0);
+
+ while(next < last) {
+ const uint32_t cmd_id = *(const uint32_t *)next;
+
+ if(SVGA_3D_CMD_BASE <= cmd_id && cmd_id < SVGA_3D_CMD_MAX) {
+ const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next;
+ const uint8_t *body = (const uint8_t *)&header[1];
+
+ next = body + header->size;
+ if(next > last)
+ break;
+
+ svga_dump_command(cmd_id, body, header->size);
+ }
+ else if(cmd_id == SVGA_CMD_FENCE) {
+ _debug_printf("\tSVGA_CMD_FENCE\n");
+ _debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]);
+ next += 2*sizeof(uint32_t);
+ }
+ else {
+ _debug_printf("\t0x%08x\n", cmd_id);
+ next += sizeof(uint32_t);
+ }
+ }
+}
+'''
+
+def main():
+ print copyright.strip()
+ print
+ print '/**'
+ print ' * @file'
+ print ' * Dump SVGA commands.'
+ print ' *'
+ print ' * Generated automatically from svga3d_reg.h by svga_dump.py.'
+ print ' */'
+ print
+ print '#include "svga_types.h"'
+ print '#include "svga_shader_dump.h"'
+ print '#include "svga3d_reg.h"'
+ print
+ print '#include "util/u_debug.h"'
+ print '#include "svga_dump.h"'
+ print
+
+ config = parser.config_t(
+ include_paths = ['../../../include', '../include'],
+ compiler = 'gcc',
+ )
+
+ headers = [
+ 'svga_types.h',
+ 'svga3d_reg.h',
+ ]
+
+ decls = parser.parse(headers, config, parser.COMPILATION_MODE.ALL_AT_ONCE)
+ global_ns = declarations.get_global_namespace(decls)
+
+ names = set()
+ for id, header, body, footer in cmds:
+ names.add(header)
+ for struct, count in body:
+ names.add(struct)
+ if footer is not None:
+ names.add(footer)
+
+ for class_ in global_ns.classes(lambda decl: decl.name in names):
+ dump_struct(decls, class_)
+
+ dump_cmds()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader.h b/src/gallium/drivers/svga/svgadump/svga_shader.h
new file mode 100644
index 00000000000..9217af2dd99
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader.h
@@ -0,0 +1,220 @@
+/**********************************************************
+ * Copyright 2007-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Token Definitions
+ *
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#ifndef ST_SHADER_SVGA_H
+#define ST_SHADER_SVGA_H
+
+#include "pipe/p_compiler.h"
+
+struct sh_op
+{
+ unsigned opcode:16;
+ unsigned control:8;
+ unsigned length:4;
+ unsigned predicated:1;
+ unsigned unused:1;
+ unsigned coissue:1;
+ unsigned is_reg:1;
+};
+
+struct sh_reg
+{
+ unsigned number:11;
+ unsigned type_hi:2;
+ unsigned relative:1;
+ unsigned unused:14;
+ unsigned type_lo:3;
+ unsigned is_reg:1;
+};
+
+static INLINE unsigned
+sh_reg_type( struct sh_reg reg )
+{
+ return reg.type_lo | (reg.type_hi << 3);
+}
+
+struct sh_cdata
+{
+ float xyzw[4];
+};
+
+struct sh_def
+{
+ struct sh_op op;
+ struct sh_reg reg;
+ struct sh_cdata cdata;
+};
+
+struct sh_defb
+{
+ struct sh_op op;
+ struct sh_reg reg;
+ uint data;
+};
+
+struct sh_idata
+{
+ int xyzw[4];
+};
+
+struct sh_defi
+{
+ struct sh_op op;
+ struct sh_reg reg;
+ struct sh_idata idata;
+};
+
+#define PS_TEXTURETYPE_UNKNOWN SVGA3DSAMP_UNKNOWN
+#define PS_TEXTURETYPE_2D SVGA3DSAMP_2D
+#define PS_TEXTURETYPE_CUBE SVGA3DSAMP_CUBE
+#define PS_TEXTURETYPE_VOLUME SVGA3DSAMP_VOLUME
+
+struct ps_sampleinfo
+{
+ unsigned unused:27;
+ unsigned texture_type:4;
+ unsigned is_reg:1;
+};
+
+struct vs_semantic
+{
+ unsigned usage:5;
+ unsigned unused1:11;
+ unsigned usage_index:4;
+ unsigned unused2:12;
+};
+
+struct sh_dstreg
+{
+ unsigned number:11;
+ unsigned type_hi:2;
+ unsigned relative:1;
+ unsigned unused:2;
+ unsigned write_mask:4;
+ unsigned modifier:4;
+ unsigned shift_scale:4;
+ unsigned type_lo:3;
+ unsigned is_reg:1;
+};
+
+static INLINE unsigned
+sh_dstreg_type( struct sh_dstreg reg )
+{
+ return reg.type_lo | (reg.type_hi << 3);
+}
+
+struct sh_dcl
+{
+ struct sh_op op;
+ union {
+ struct {
+ struct ps_sampleinfo sampleinfo;
+ } ps;
+ struct {
+ struct vs_semantic semantic;
+ } vs;
+ } u;
+ struct sh_dstreg reg;
+};
+
+
+struct sh_srcreg
+{
+ unsigned number:11;
+ unsigned type_hi:2;
+ unsigned relative:1;
+ unsigned unused:2;
+ unsigned swizzle_x:2;
+ unsigned swizzle_y:2;
+ unsigned swizzle_z:2;
+ unsigned swizzle_w:2;
+ unsigned modifier:4;
+ unsigned type_lo:3;
+ unsigned is_reg:1;
+};
+
+static INLINE unsigned
+sh_srcreg_type( struct sh_srcreg reg )
+{
+ return reg.type_lo | (reg.type_hi << 3);
+}
+
+struct sh_dstop
+{
+ struct sh_op op;
+ struct sh_dstreg dst;
+};
+
+struct sh_srcop
+{
+ struct sh_op op;
+ struct sh_srcreg src;
+};
+
+struct sh_src2op
+{
+ struct sh_op op;
+ struct sh_srcreg src0;
+ struct sh_srcreg src1;
+};
+
+struct sh_unaryop
+{
+ struct sh_op op;
+ struct sh_dstreg dst;
+ struct sh_srcreg src;
+};
+
+struct sh_binaryop
+{
+ struct sh_op op;
+ struct sh_dstreg dst;
+ struct sh_srcreg src0;
+ struct sh_srcreg src1;
+};
+
+struct sh_trinaryop
+{
+ struct sh_op op;
+ struct sh_dstreg dst;
+ struct sh_srcreg src0;
+ struct sh_srcreg src1;
+ struct sh_srcreg src2;
+};
+
+struct sh_comment
+{
+ unsigned opcode:16;
+ unsigned size:16;
+};
+
+#endif /* ST_SHADER_SVGA_H */
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
new file mode 100644
index 00000000000..70e27d86d30
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
@@ -0,0 +1,654 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Dump Facilities
+ *
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#include "svga_shader.h"
+#include "svga_shader_dump.h"
+#include "svga_shader_op.h"
+#include "util/u_debug.h"
+
+#include "../svga_hw_reg.h"
+#include "svga3d_shaderdefs.h"
+
+struct dump_info
+{
+ SVGA3dShaderVersion version;
+ boolean is_ps;
+};
+
+static void dump_op( struct sh_op op, const char *mnemonic )
+{
+ assert( op.predicated == 0 );
+ assert( op.is_reg == 0 );
+
+ if (op.coissue)
+ _debug_printf( "+" );
+ _debug_printf( "%s", mnemonic );
+ switch (op.control) {
+ case 0:
+ break;
+ case SVGA3DOPCONT_PROJECT:
+ _debug_printf( "p" );
+ break;
+ case SVGA3DOPCONT_BIAS:
+ _debug_printf( "b" );
+ break;
+ default:
+ assert( 0 );
+ }
+}
+
+
+static void dump_comp_op( struct sh_op op, const char *mnemonic )
+{
+ assert( op.is_reg == 0 );
+
+ if (op.coissue)
+ _debug_printf( "+" );
+ _debug_printf( "%s", mnemonic );
+ switch (op.control) {
+ case SVGA3DOPCOMP_RESERVED0:
+ break;
+ case SVGA3DOPCOMP_GT:
+ _debug_printf("_gt");
+ break;
+ case SVGA3DOPCOMP_EQ:
+ _debug_printf("_eq");
+ break;
+ case SVGA3DOPCOMP_GE:
+ _debug_printf("_ge");
+ break;
+ case SVGA3DOPCOMP_LT:
+ _debug_printf("_lt");
+ break;
+ case SVGA3DOPCOMPC_NE:
+ _debug_printf("_ne");
+ break;
+ case SVGA3DOPCOMP_LE:
+ _debug_printf("_le");
+ break;
+ case SVGA3DOPCOMP_RESERVED1:
+ default:
+ assert( 0 );
+ }
+}
+
+
+static void dump_reg( struct sh_reg reg, struct sh_srcreg *indreg, const struct dump_info *di )
+{
+ assert( sh_reg_type( reg ) == SVGA3DREG_CONST || reg.relative == 0 );
+ assert( reg.is_reg == 1 );
+
+ switch (sh_reg_type( reg )) {
+ case SVGA3DREG_TEMP:
+ _debug_printf( "r%u", reg.number );
+ break;
+
+ case SVGA3DREG_INPUT:
+ _debug_printf( "v%u", reg.number );
+ break;
+
+ case SVGA3DREG_CONST:
+ if (reg.relative) {
+ if (sh_srcreg_type( *indreg ) == SVGA3DREG_LOOP)
+ _debug_printf( "c[aL+%u]", reg.number );
+ else
+ _debug_printf( "c[a%u.x+%u]", indreg->number, reg.number );
+ }
+ else
+ _debug_printf( "c%u", reg.number );
+ break;
+
+ case SVGA3DREG_ADDR: /* VS */
+ /* SVGA3DREG_TEXTURE */ /* PS */
+ if (di->is_ps)
+ _debug_printf( "t%u", reg.number );
+ else
+ _debug_printf( "a%u", reg.number );
+ break;
+
+ case SVGA3DREG_RASTOUT:
+ switch (reg.number) {
+ case 0 /*POSITION*/:
+ _debug_printf( "oPos" );
+ break;
+ case 1 /*FOG*/:
+ _debug_printf( "oFog" );
+ break;
+ case 2 /*POINT_SIZE*/:
+ _debug_printf( "oPts" );
+ break;
+ default:
+ assert( 0 );
+ _debug_printf( "???" );
+ }
+ break;
+
+ case SVGA3DREG_ATTROUT:
+ assert( reg.number < 2 );
+ _debug_printf( "oD%u", reg.number );
+ break;
+
+ case SVGA3DREG_TEXCRDOUT:
+ /* SVGA3DREG_OUTPUT */
+ _debug_printf( "oT%u", reg.number );
+ break;
+
+ case SVGA3DREG_COLOROUT:
+ _debug_printf( "oC%u", reg.number );
+ break;
+
+ case SVGA3DREG_DEPTHOUT:
+ _debug_printf( "oD%u", reg.number );
+ break;
+
+ case SVGA3DREG_SAMPLER:
+ _debug_printf( "s%u", reg.number );
+ break;
+
+ case SVGA3DREG_CONSTBOOL:
+ assert( !reg.relative );
+ _debug_printf( "b%u", reg.number );
+ break;
+
+ case SVGA3DREG_CONSTINT:
+ assert( !reg.relative );
+ _debug_printf( "i%u", reg.number );
+ break;
+
+ case SVGA3DREG_LOOP:
+ assert( reg.number == 0 );
+ _debug_printf( "aL" );
+ break;
+
+ case SVGA3DREG_MISCTYPE:
+ switch (reg.number) {
+ case SVGA3DMISCREG_POSITION:
+ _debug_printf( "vPos" );
+ break;
+ case SVGA3DMISCREG_FACE:
+ _debug_printf( "vFace" );
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+
+ case SVGA3DREG_LABEL:
+ _debug_printf( "l%u", reg.number );
+ break;
+
+ case SVGA3DREG_PREDICATE:
+ _debug_printf( "p%u", reg.number );
+ break;
+
+
+ default:
+ assert( 0 );
+ _debug_printf( "???" );
+ }
+}
+
+static void dump_cdata( struct sh_cdata cdata )
+{
+ _debug_printf( "%f, %f, %f, %f", cdata.xyzw[0], cdata.xyzw[1], cdata.xyzw[2], cdata.xyzw[3] );
+}
+
+static void dump_idata( struct sh_idata idata )
+{
+ _debug_printf( "%d, %d, %d, %d", idata.xyzw[0], idata.xyzw[1], idata.xyzw[2], idata.xyzw[3] );
+}
+
+static void dump_bdata( boolean bdata )
+{
+ _debug_printf( bdata ? "TRUE" : "FALSE" );
+}
+
+static void dump_sampleinfo( struct ps_sampleinfo sampleinfo )
+{
+ switch (sampleinfo.texture_type) {
+ case SVGA3DSAMP_2D:
+ _debug_printf( "_2d" );
+ break;
+ case SVGA3DSAMP_CUBE:
+ _debug_printf( "_cube" );
+ break;
+ case SVGA3DSAMP_VOLUME:
+ _debug_printf( "_volume" );
+ break;
+ default:
+ assert( 0 );
+ }
+}
+
+
+static void dump_usageinfo( struct vs_semantic semantic )
+{
+ switch (semantic.usage) {
+ case SVGA3D_DECLUSAGE_POSITION:
+ _debug_printf("_position" );
+ break;
+ case SVGA3D_DECLUSAGE_BLENDWEIGHT:
+ _debug_printf("_blendweight" );
+ break;
+ case SVGA3D_DECLUSAGE_BLENDINDICES:
+ _debug_printf("_blendindices" );
+ break;
+ case SVGA3D_DECLUSAGE_NORMAL:
+ _debug_printf("_normal" );
+ break;
+ case SVGA3D_DECLUSAGE_PSIZE:
+ _debug_printf("_psize" );
+ break;
+ case SVGA3D_DECLUSAGE_TEXCOORD:
+ _debug_printf("_texcoord");
+ break;
+ case SVGA3D_DECLUSAGE_TANGENT:
+ _debug_printf("_tangent" );
+ break;
+ case SVGA3D_DECLUSAGE_BINORMAL:
+ _debug_printf("_binormal" );
+ break;
+ case SVGA3D_DECLUSAGE_TESSFACTOR:
+ _debug_printf("_tessfactor" );
+ break;
+ case SVGA3D_DECLUSAGE_POSITIONT:
+ _debug_printf("_positiont" );
+ break;
+ case SVGA3D_DECLUSAGE_COLOR:
+ _debug_printf("_color" );
+ break;
+ case SVGA3D_DECLUSAGE_FOG:
+ _debug_printf("_fog" );
+ break;
+ case SVGA3D_DECLUSAGE_DEPTH:
+ _debug_printf("_depth" );
+ break;
+ case SVGA3D_DECLUSAGE_SAMPLE:
+ _debug_printf("_sample");
+ break;
+ default:
+ assert( 0 );
+ return;
+ }
+
+ if (semantic.usage_index != 0) {
+ _debug_printf("%d", semantic.usage_index );
+ }
+}
+
+static void dump_dstreg( struct sh_dstreg dstreg, const struct dump_info *di )
+{
+ union {
+ struct sh_reg reg;
+ struct sh_dstreg dstreg;
+ } u;
+
+ assert( (dstreg.modifier & (SVGA3DDSTMOD_SATURATE | SVGA3DDSTMOD_PARTIALPRECISION)) == dstreg.modifier );
+
+ if (dstreg.modifier & SVGA3DDSTMOD_SATURATE)
+ _debug_printf( "_sat" );
+ if (dstreg.modifier & SVGA3DDSTMOD_PARTIALPRECISION)
+ _debug_printf( "_pp" );
+ switch (dstreg.shift_scale) {
+ case 0:
+ break;
+ case 1:
+ _debug_printf( "_x2" );
+ break;
+ case 2:
+ _debug_printf( "_x4" );
+ break;
+ case 3:
+ _debug_printf( "_x8" );
+ break;
+ case 13:
+ _debug_printf( "_d8" );
+ break;
+ case 14:
+ _debug_printf( "_d4" );
+ break;
+ case 15:
+ _debug_printf( "_d2" );
+ break;
+ default:
+ assert( 0 );
+ }
+ _debug_printf( " " );
+
+ u.dstreg = dstreg;
+ dump_reg( u.reg, NULL, di );
+ if (dstreg.write_mask != SVGA3DWRITEMASK_ALL) {
+ _debug_printf( "." );
+ if (dstreg.write_mask & SVGA3DWRITEMASK_0)
+ _debug_printf( "x" );
+ if (dstreg.write_mask & SVGA3DWRITEMASK_1)
+ _debug_printf( "y" );
+ if (dstreg.write_mask & SVGA3DWRITEMASK_2)
+ _debug_printf( "z" );
+ if (dstreg.write_mask & SVGA3DWRITEMASK_3)
+ _debug_printf( "w" );
+ }
+}
+
+static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, const struct dump_info *di )
+{
+ union {
+ struct sh_reg reg;
+ struct sh_srcreg srcreg;
+ } u;
+
+ switch (srcreg.modifier) {
+ case SVGA3DSRCMOD_NEG:
+ case SVGA3DSRCMOD_BIASNEG:
+ case SVGA3DSRCMOD_SIGNNEG:
+ case SVGA3DSRCMOD_X2NEG:
+ _debug_printf( "-" );
+ break;
+ case SVGA3DSRCMOD_ABS:
+ _debug_printf( "|" );
+ break;
+ case SVGA3DSRCMOD_ABSNEG:
+ _debug_printf( "-|" );
+ break;
+ case SVGA3DSRCMOD_COMP:
+ _debug_printf( "1-" );
+ break;
+ case SVGA3DSRCMOD_NOT:
+ _debug_printf( "!" );
+ }
+
+ u.srcreg = srcreg;
+ dump_reg( u.reg, indreg, di );
+ switch (srcreg.modifier) {
+ case SVGA3DSRCMOD_NONE:
+ case SVGA3DSRCMOD_NEG:
+ case SVGA3DSRCMOD_COMP:
+ case SVGA3DSRCMOD_NOT:
+ break;
+ case SVGA3DSRCMOD_ABS:
+ case SVGA3DSRCMOD_ABSNEG:
+ _debug_printf( "|" );
+ break;
+ case SVGA3DSRCMOD_BIAS:
+ case SVGA3DSRCMOD_BIASNEG:
+ _debug_printf( "_bias" );
+ break;
+ case SVGA3DSRCMOD_SIGN:
+ case SVGA3DSRCMOD_SIGNNEG:
+ _debug_printf( "_bx2" );
+ break;
+ case SVGA3DSRCMOD_X2:
+ case SVGA3DSRCMOD_X2NEG:
+ _debug_printf( "_x2" );
+ break;
+ case SVGA3DSRCMOD_DZ:
+ _debug_printf( "_dz" );
+ break;
+ case SVGA3DSRCMOD_DW:
+ _debug_printf( "_dw" );
+ break;
+ default:
+ assert( 0 );
+ }
+ if (srcreg.swizzle_x != 0 || srcreg.swizzle_y != 1 || srcreg.swizzle_z != 2 || srcreg.swizzle_w != 3) {
+ _debug_printf( "." );
+ if (srcreg.swizzle_x == srcreg.swizzle_y && srcreg.swizzle_y == srcreg.swizzle_z && srcreg.swizzle_z == srcreg.swizzle_w) {
+ _debug_printf( "%c", "xyzw"[srcreg.swizzle_x] );
+ }
+ else {
+ _debug_printf( "%c", "xyzw"[srcreg.swizzle_x] );
+ _debug_printf( "%c", "xyzw"[srcreg.swizzle_y] );
+ _debug_printf( "%c", "xyzw"[srcreg.swizzle_z] );
+ _debug_printf( "%c", "xyzw"[srcreg.swizzle_w] );
+ }
+ }
+}
+
+void
+svga_shader_dump(
+ const unsigned *assem,
+ unsigned dwords,
+ unsigned do_binary )
+{
+ const unsigned *start = assem;
+ boolean finished = FALSE;
+ struct dump_info di;
+ unsigned i;
+
+ if (do_binary) {
+ for (i = 0; i < dwords; i++)
+ _debug_printf(" 0x%08x,\n", assem[i]);
+
+ _debug_printf("\n\n");
+ }
+
+ di.version.value = *assem++;
+ di.is_ps = (di.version.type == SVGA3D_PS_TYPE);
+
+ _debug_printf(
+ "%s_%u_%u\n",
+ di.is_ps ? "ps" : "vs",
+ di.version.major,
+ di.version.minor );
+
+ while (!finished) {
+ struct sh_op op = *(struct sh_op *) assem;
+
+ if (assem - start >= dwords) {
+ _debug_printf("... ran off end of buffer\n");
+ assert(0);
+ return;
+ }
+
+ switch (op.opcode) {
+ case SVGA3DOP_DCL:
+ {
+ struct sh_dcl dcl = *(struct sh_dcl *) assem;
+
+ _debug_printf( "dcl" );
+ if (sh_dstreg_type( dcl.reg ) == SVGA3DREG_SAMPLER)
+ dump_sampleinfo( dcl.u.ps.sampleinfo );
+ else if (di.is_ps) {
+ if (di.version.major == 3 &&
+ sh_dstreg_type( dcl.reg ) != SVGA3DREG_MISCTYPE)
+ dump_usageinfo( dcl.u.vs.semantic );
+ }
+ else
+ dump_usageinfo( dcl.u.vs.semantic );
+ dump_dstreg( dcl.reg, &di );
+ _debug_printf( "\n" );
+ assem += sizeof( struct sh_dcl ) / sizeof( unsigned );
+ }
+ break;
+
+ case SVGA3DOP_DEFB:
+ {
+ struct sh_defb defb = *(struct sh_defb *) assem;
+
+ _debug_printf( "defb " );
+ dump_reg( defb.reg, NULL, &di );
+ _debug_printf( ", " );
+ dump_bdata( defb.data );
+ _debug_printf( "\n" );
+ assem += sizeof( struct sh_defb ) / sizeof( unsigned );
+ }
+ break;
+
+ case SVGA3DOP_DEFI:
+ {
+ struct sh_defi defi = *(struct sh_defi *) assem;
+
+ _debug_printf( "defi " );
+ dump_reg( defi.reg, NULL, &di );
+ _debug_printf( ", " );
+ dump_idata( defi.idata );
+ _debug_printf( "\n" );
+ assem += sizeof( struct sh_defi ) / sizeof( unsigned );
+ }
+ break;
+
+ case SVGA3DOP_TEXCOORD:
+ assert( di.is_ps );
+ dump_op( op, "texcoord" );
+ if (0) {
+ struct sh_dstop dstop = *(struct sh_dstop *) assem;
+ dump_dstreg( dstop.dst, &di );
+ assem += sizeof( struct sh_dstop ) / sizeof( unsigned );
+ }
+ else {
+ struct sh_unaryop unaryop = *(struct sh_unaryop *) assem;
+ dump_dstreg( unaryop.dst, &di );
+ _debug_printf( ", " );
+ dump_srcreg( unaryop.src, NULL, &di );
+ assem += sizeof( struct sh_unaryop ) / sizeof( unsigned );
+ }
+ _debug_printf( "\n" );
+ break;
+
+ case SVGA3DOP_TEX:
+ assert( di.is_ps );
+ if (0) {
+ dump_op( op, "tex" );
+ if (0) {
+ struct sh_dstop dstop = *(struct sh_dstop *) assem;
+
+ dump_dstreg( dstop.dst, &di );
+ assem += sizeof( struct sh_dstop ) / sizeof( unsigned );
+ }
+ else {
+ struct sh_unaryop unaryop = *(struct sh_unaryop *) assem;
+
+ dump_dstreg( unaryop.dst, &di );
+ _debug_printf( ", " );
+ dump_srcreg( unaryop.src, NULL, &di );
+ assem += sizeof( struct sh_unaryop ) / sizeof( unsigned );
+ }
+ }
+ else {
+ struct sh_binaryop binaryop = *(struct sh_binaryop *) assem;
+
+ dump_op( op, "texld" );
+ dump_dstreg( binaryop.dst, &di );
+ _debug_printf( ", " );
+ dump_srcreg( binaryop.src0, NULL, &di );
+ _debug_printf( ", " );
+ dump_srcreg( binaryop.src1, NULL, &di );
+ assem += sizeof( struct sh_binaryop ) / sizeof( unsigned );
+ }
+ _debug_printf( "\n" );
+ break;
+
+ case SVGA3DOP_DEF:
+ {
+ struct sh_def def = *(struct sh_def *) assem;
+
+ _debug_printf( "def " );
+ dump_reg( def.reg, NULL, &di );
+ _debug_printf( ", " );
+ dump_cdata( def.cdata );
+ _debug_printf( "\n" );
+ assem += sizeof( struct sh_def ) / sizeof( unsigned );
+ }
+ break;
+
+ case SVGA3DOP_PHASE:
+ _debug_printf( "phase\n" );
+ assem += sizeof( struct sh_op ) / sizeof( unsigned );
+ break;
+
+ case SVGA3DOP_COMMENT:
+ {
+ struct sh_comment comment = *(struct sh_comment *)assem;
+
+ /* Ignore comment contents. */
+ assem += sizeof(struct sh_comment) / sizeof(unsigned) + comment.size;
+ }
+ break;
+
+ case SVGA3DOP_RET:
+ _debug_printf( "ret\n" );
+ assem += sizeof( struct sh_op ) / sizeof( unsigned );
+ break;
+
+ case SVGA3DOP_END:
+ _debug_printf( "end\n" );
+ finished = TRUE;
+ break;
+
+ default:
+ {
+ const struct sh_opcode_info *info = svga_opcode_info( op.opcode );
+ uint i;
+ uint num_src = info->num_src + op.predicated;
+ boolean not_first_arg = FALSE;
+
+ assert( info->num_dst <= 1 );
+
+ if (op.opcode == SVGA3DOP_SINCOS && di.version.major < 3)
+ num_src += 2;
+
+ dump_comp_op( op, info->mnemonic );
+ assem += sizeof( struct sh_op ) / sizeof( unsigned );
+
+ if (info->num_dst > 0) {
+ struct sh_dstreg dstreg = *(struct sh_dstreg *) assem;
+
+ dump_dstreg( dstreg, &di );
+ assem += sizeof( struct sh_dstreg ) / sizeof( unsigned );
+ not_first_arg = TRUE;
+ }
+
+ for (i = 0; i < num_src; i++) {
+ struct sh_srcreg srcreg;
+ struct sh_srcreg indreg;
+
+ srcreg = *(struct sh_srcreg *) assem;
+ assem += sizeof( struct sh_srcreg ) / sizeof( unsigned );
+ if (srcreg.relative && !di.is_ps && di.version.major >= 2) {
+ indreg = *(struct sh_srcreg *) assem;
+ assem += sizeof( struct sh_srcreg ) / sizeof( unsigned );
+ }
+
+ if (not_first_arg)
+ _debug_printf( ", " );
+ else
+ _debug_printf( " " );
+ dump_srcreg( srcreg, &indreg, &di );
+ not_first_arg = TRUE;
+ }
+
+ _debug_printf( "\n" );
+ }
+ }
+ }
+}
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.h b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h
new file mode 100644
index 00000000000..a2657acb2f1
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h
@@ -0,0 +1,42 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Dump Facilities
+ *
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#ifndef SVGA_SHADER_DUMP_H
+#define SVGA_SHADER_DUMP_H
+
+void
+svga_shader_dump(
+ const unsigned *assem,
+ unsigned dwords,
+ unsigned do_binary );
+
+#endif /* SVGA_SHADER_DUMP_H */
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.c b/src/gallium/drivers/svga/svgadump/svga_shader_op.c
new file mode 100644
index 00000000000..8343bfdaab4
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.c
@@ -0,0 +1,168 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Token Opcode Info
+ *
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#include "util/u_debug.h"
+#include "svga_shader_op.h"
+
+#include "../svga_hw_reg.h"
+#include "svga3d_shaderdefs.h"
+
+#define SVGA3DOP_INVALID SVGA3DOP_END
+#define TGSI_OPCODE_INVALID TGSI_OPCODE_LAST
+
+static struct sh_opcode_info opcode_info[] =
+{
+ { "nop", 0, 0, SVGA3DOP_NOP },
+ { "mov", 1, 1, SVGA3DOP_MOV, },
+ { "add", 1, 2, SVGA3DOP_ADD, },
+ { "sub", 1, 2, SVGA3DOP_SUB, },
+ { "mad", 1, 3, SVGA3DOP_MAD, },
+ { "mul", 1, 2, SVGA3DOP_MUL, },
+ { "rcp", 1, 1, SVGA3DOP_RCP, },
+ { "rsq", 1, 1, SVGA3DOP_RSQ, },
+ { "dp3", 1, 2, SVGA3DOP_DP3, },
+ { "dp4", 1, 2, SVGA3DOP_DP4, },
+ { "min", 1, 2, SVGA3DOP_MIN, },
+ { "max", 1, 2, SVGA3DOP_MAX, },
+ { "slt", 1, 2, SVGA3DOP_SLT, },
+ { "sge", 1, 2, SVGA3DOP_SGE, },
+ { "exp", 1, 1, SVGA3DOP_EXP, },
+ { "log", 1, 1, SVGA3DOP_LOG, },
+ { "lit", 1, 1, SVGA3DOP_LIT, },
+ { "dst", 1, 2, SVGA3DOP_DST, },
+ { "lrp", 1, 3, SVGA3DOP_LRP, },
+ { "frc", 1, 1, SVGA3DOP_FRC, },
+ { "m4x4", 1, 2, SVGA3DOP_M4x4, },
+ { "m4x3", 1, 2, SVGA3DOP_M4x3, },
+ { "m3x4", 1, 2, SVGA3DOP_M3x4, },
+ { "m3x3", 1, 2, SVGA3DOP_M3x3, },
+ { "m3x2", 1, 2, SVGA3DOP_M3x2, },
+ { "call", 0, 1, SVGA3DOP_CALL, },
+ { "callnz", 0, 2, SVGA3DOP_CALLNZ, },
+ { "loop", 0, 2, SVGA3DOP_LOOP, },
+ { "ret", 0, 0, SVGA3DOP_RET, },
+ { "endloop", 0, 0, SVGA3DOP_ENDLOOP, },
+ { "label", 0, 1, SVGA3DOP_LABEL, },
+ { "dcl", 0, 0, SVGA3DOP_DCL, },
+ { "pow", 1, 2, SVGA3DOP_POW, },
+ { "crs", 1, 2, SVGA3DOP_CRS, },
+ { "sgn", 1, 3, SVGA3DOP_SGN, },
+ { "abs", 1, 1, SVGA3DOP_ABS, },
+ { "nrm", 1, 1, SVGA3DOP_NRM, }, /* 3-componenet normalization */
+ { "sincos", 1, 1, SVGA3DOP_SINCOS, },
+ { "rep", 0, 1, SVGA3DOP_REP, },
+ { "endrep", 0, 0, SVGA3DOP_ENDREP, },
+ { "if", 0, 1, SVGA3DOP_IF, },
+ { "ifc", 0, 2, SVGA3DOP_IFC, },
+ { "else", 0, 0, SVGA3DOP_ELSE, },
+ { "endif", 0, 0, SVGA3DOP_ENDIF, },
+ { "break", 0, 0, SVGA3DOP_BREAK, },
+ { "breakc", 0, 0, SVGA3DOP_BREAKC, },
+ { "mova", 1, 1, SVGA3DOP_MOVA, },
+ { "defb", 0, 0, SVGA3DOP_DEFB, },
+ { "defi", 0, 0, SVGA3DOP_DEFI, },
+ { "???", 0, 0, SVGA3DOP_INVALID, },
+ { "???", 0, 0, SVGA3DOP_INVALID, },
+ { "???", 0, 0, SVGA3DOP_INVALID, },
+ { "???", 0, 0, SVGA3DOP_INVALID, },
+ { "???", 0, 0, SVGA3DOP_INVALID, },
+ { "???", 0, 0, SVGA3DOP_INVALID, },
+ { "???", 0, 0, SVGA3DOP_INVALID, },
+ { "???", 0, 0, SVGA3DOP_INVALID, },
+ { "???", 0, 0, SVGA3DOP_INVALID, },
+ { "???", 0, 0, SVGA3DOP_INVALID, },
+ { "???", 0, 0, SVGA3DOP_INVALID, },
+ { "???", 0, 0, SVGA3DOP_INVALID, },
+ { "???", 0, 0, SVGA3DOP_INVALID, },
+ { "???", 0, 0, SVGA3DOP_INVALID, },
+ { "???", 0, 0, SVGA3DOP_INVALID, },
+ { "texcoord", 0, 0, SVGA3DOP_TEXCOORD, },
+ { "texkill", 1, 0, SVGA3DOP_TEXKILL, },
+ { "tex", 0, 0, SVGA3DOP_TEX, },
+ { "texbem", 1, 1, SVGA3DOP_TEXBEM, },
+ { "texbeml", 1, 1, SVGA3DOP_TEXBEML, },
+ { "texreg2ar", 1, 1, SVGA3DOP_TEXREG2AR, },
+ { "texreg2gb", 1, 1, SVGA3DOP_TEXREG2GB, },
+ { "texm3x2pad", 1, 1, SVGA3DOP_TEXM3x2PAD, },
+ { "texm3x2tex", 1, 1, SVGA3DOP_TEXM3x2TEX, },
+ { "texm3x3pad", 1, 1, SVGA3DOP_TEXM3x3PAD, },
+ { "texm3x3tex", 1, 1, SVGA3DOP_TEXM3x3TEX, },
+ { "reserved0", 0, 0, SVGA3DOP_RESERVED0, },
+ { "texm3x3spec", 1, 2, SVGA3DOP_TEXM3x3SPEC, },
+ { "texm3x3vspec", 1, 1, SVGA3DOP_TEXM3x3VSPEC,},
+ { "expp", 1, 1, SVGA3DOP_EXPP, },
+ { "logp", 1, 1, SVGA3DOP_LOGP, },
+ { "cnd", 1, 3, SVGA3DOP_CND, },
+ { "def", 0, 0, SVGA3DOP_DEF, },
+ { "texreg2rgb", 1, 1, SVGA3DOP_TEXREG2RGB, },
+ { "texdp3tex", 1, 1, SVGA3DOP_TEXDP3TEX, },
+ { "texm3x2depth", 1, 1, SVGA3DOP_TEXM3x2DEPTH,},
+ { "texdp3", 1, 1, SVGA3DOP_TEXDP3, },
+ { "texm3x3", 1, 1, SVGA3DOP_TEXM3x3, },
+ { "texdepth", 1, 0, SVGA3DOP_TEXDEPTH, },
+ { "cmp", 1, 3, SVGA3DOP_CMP, },
+ { "bem", 1, 2, SVGA3DOP_BEM, },
+ { "dp2add", 1, 3, SVGA3DOP_DP2ADD, },
+ { "dsx", 1, 1, SVGA3DOP_INVALID, },
+ { "dsy", 1, 1, SVGA3DOP_INVALID, },
+ { "texldd", 1, 1, SVGA3DOP_INVALID, },
+ { "setp", 1, 2, SVGA3DOP_SETP, },
+ { "texldl", 1, 1, SVGA3DOP_INVALID, },
+ { "breakp", 1, 1, SVGA3DOP_INVALID, },
+};
+
+const struct sh_opcode_info *svga_opcode_info( uint op )
+{
+ struct sh_opcode_info *info;
+
+ if (op >= sizeof( opcode_info ) / sizeof( opcode_info[0] )) {
+ /* The opcode is either PHASE, COMMENT, END or out of range.
+ */
+ assert( 0 );
+ return NULL;
+ }
+
+ info = &opcode_info[op];
+
+ if (info->svga_opcode == SVGA3DOP_INVALID) {
+ /* No valid information. Please provide number of dst/src registers.
+ */
+ assert( 0 );
+ return NULL;
+ }
+
+ /* Sanity check.
+ */
+ assert( op == info->svga_opcode );
+
+ return info;
+}
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.h b/src/gallium/drivers/svga/svgadump/svga_shader_op.h
new file mode 100644
index 00000000000..e558de02c53
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.h
@@ -0,0 +1,46 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Token Opcode Info
+ *
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#ifndef SVGA_SHADER_OP_H
+#define SVGA_SHADER_OP_H
+
+struct sh_opcode_info
+{
+ const char *mnemonic;
+ unsigned num_dst:8;
+ unsigned num_src:8;
+ unsigned svga_opcode:16;
+};
+
+const struct sh_opcode_info *svga_opcode_info( unsigned op );
+
+#endif /* SVGA_SHADER_OP_H */
diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README
index 1000c31e49a..203c3851bc3 100644
--- a/src/gallium/drivers/trace/README
+++ b/src/gallium/drivers/trace/README
@@ -24,11 +24,10 @@ ensure the right libGL.so is being picked by doing
ldd progs/trivial/tri
-== Traceing ==
+== Tracing ==
-For traceing then do
+For tracing then do
- export XMESA_TRACE=y
GALLIUM_TRACE=tri.trace progs/trivial/tri
which should create a tri.trace file, which is an XML file. You can view copying
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index bf470b46ae1..075e4f9a0b2 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -52,6 +52,7 @@ trace_buffer_unwrap(struct trace_context *tr_ctx,
assert(tr_buf->buffer);
assert(tr_buf->buffer->screen == tr_scr->screen);
+ (void) tr_scr;
return tr_buf->buffer;
}
@@ -90,30 +91,12 @@ trace_surface_unwrap(struct trace_context *tr_ctx,
assert(tr_surf->surface);
assert(tr_surf->surface->texture->screen == tr_scr->screen);
+ (void) tr_scr;
return tr_surf->surface;
}
static INLINE void
-trace_context_set_edgeflags(struct pipe_context *_pipe,
- const unsigned *bitfield)
-{
- struct trace_context *tr_ctx = trace_context(_pipe);
- struct pipe_context *pipe = tr_ctx->pipe;
-
- trace_dump_call_begin("pipe_context", "set_edgeflags");
-
- trace_dump_arg(ptr, pipe);
- /* FIXME: we don't know how big this array is */
- trace_dump_arg(ptr, bitfield);
-
- pipe->set_edgeflags(pipe, bitfield);;
-
- trace_dump_call_end();
-}
-
-
-static INLINE void
trace_context_draw_block(struct trace_context *tr_ctx, int flag)
{
int k;
@@ -143,10 +126,16 @@ trace_context_draw_block(struct trace_context *tr_ctx, int flag)
for (k = 0; k < tr_ctx->curr.nr_cbufs; k++)
if (tr_ctx->draw_rule.surf == tr_ctx->curr.cbufs[k])
block = TRUE;
- if (tr_ctx->draw_rule.tex)
+ if (tr_ctx->draw_rule.tex) {
for (k = 0; k < tr_ctx->curr.num_texs; k++)
if (tr_ctx->draw_rule.tex == tr_ctx->curr.tex[k])
block = TRUE;
+ for (k = 0; k < tr_ctx->curr.num_vert_texs; k++) {
+ if (tr_ctx->draw_rule.tex == tr_ctx->curr.vert_tex[k]) {
+ block = TRUE;
+ }
+ }
+ }
if (block)
tr_ctx->draw_blocked |= (flag | 4);
@@ -172,16 +161,15 @@ trace_context_draw_block(struct trace_context *tr_ctx, int flag)
pipe_mutex_unlock(tr_ctx->draw_mutex);
}
-static INLINE boolean
+static INLINE void
trace_context_draw_arrays(struct pipe_context *_pipe,
unsigned mode, unsigned start, unsigned count)
{
struct trace_context *tr_ctx = trace_context(_pipe);
struct pipe_context *pipe = tr_ctx->pipe;
- boolean result;
if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled)
- return 0;
+ return;
trace_context_draw_block(tr_ctx, 1);
@@ -192,19 +180,15 @@ trace_context_draw_arrays(struct pipe_context *_pipe,
trace_dump_arg(uint, start);
trace_dump_arg(uint, count);
- result = pipe->draw_arrays(pipe, mode, start, count);;
-
- trace_dump_ret(bool, result);
+ pipe->draw_arrays(pipe, mode, start, count);
trace_dump_call_end();
trace_context_draw_block(tr_ctx, 2);
-
- return result;
}
-static INLINE boolean
+static INLINE void
trace_context_draw_elements(struct pipe_context *_pipe,
struct pipe_buffer *_indexBuffer,
unsigned indexSize,
@@ -214,10 +198,9 @@ trace_context_draw_elements(struct pipe_context *_pipe,
struct trace_buffer *tr_buf = trace_buffer(_indexBuffer);
struct pipe_context *pipe = tr_ctx->pipe;
struct pipe_buffer *indexBuffer = tr_buf->buffer;
- boolean result;
if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled)
- return 0;
+ return;
trace_context_draw_block(tr_ctx, 1);
@@ -232,19 +215,15 @@ trace_context_draw_elements(struct pipe_context *_pipe,
trace_dump_arg(uint, start);
trace_dump_arg(uint, count);
- result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count);;
-
- trace_dump_ret(bool, result);
+ pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count);
trace_dump_call_end();
trace_context_draw_block(tr_ctx, 2);
-
- return result;
}
-static INLINE boolean
+static INLINE void
trace_context_draw_range_elements(struct pipe_context *_pipe,
struct pipe_buffer *_indexBuffer,
unsigned indexSize,
@@ -258,10 +237,9 @@ trace_context_draw_range_elements(struct pipe_context *_pipe,
struct trace_buffer *tr_buf = trace_buffer(_indexBuffer);
struct pipe_context *pipe = tr_ctx->pipe;
struct pipe_buffer *indexBuffer = tr_buf->buffer;
- boolean result;
if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled)
- return 0;
+ return;
trace_context_draw_block(tr_ctx, 1);
@@ -278,18 +256,14 @@ trace_context_draw_range_elements(struct pipe_context *_pipe,
trace_dump_arg(uint, start);
trace_dump_arg(uint, count);
- result = pipe->draw_range_elements(pipe,
- indexBuffer,
- indexSize, minIndex, maxIndex,
- mode, start, count);
-
- trace_dump_ret(bool, result);
+ pipe->draw_range_elements(pipe,
+ indexBuffer,
+ indexSize, minIndex, maxIndex,
+ mode, start, count);
trace_dump_call_end();
trace_context_draw_block(tr_ctx, 2);
-
- return result;
}
@@ -306,7 +280,7 @@ trace_context_create_query(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(uint, query_type);
- result = pipe->create_query(pipe, query_type);;
+ result = pipe->create_query(pipe, query_type);
trace_dump_ret(ptr, result);
@@ -328,7 +302,7 @@ trace_context_destroy_query(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(ptr, query);
- pipe->destroy_query(pipe, query);;
+ pipe->destroy_query(pipe, query);
trace_dump_call_end();
}
@@ -346,7 +320,7 @@ trace_context_begin_query(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(ptr, query);
- pipe->begin_query(pipe, query);;
+ pipe->begin_query(pipe, query);
trace_dump_call_end();
}
@@ -385,7 +359,7 @@ trace_context_get_query_result(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
- _result = pipe->get_query_result(pipe, query, wait, presult);;
+ _result = pipe->get_query_result(pipe, query, wait, presult);
result = *presult;
trace_dump_arg(uint, result);
@@ -410,7 +384,7 @@ trace_context_create_blend_state(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(blend_state, state);
- result = pipe->create_blend_state(pipe, state);;
+ result = pipe->create_blend_state(pipe, state);
trace_dump_ret(ptr, result);
@@ -432,7 +406,7 @@ trace_context_bind_blend_state(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(ptr, state);
- pipe->bind_blend_state(pipe, state);;
+ pipe->bind_blend_state(pipe, state);
trace_dump_call_end();
}
@@ -450,7 +424,7 @@ trace_context_delete_blend_state(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(ptr, state);
- pipe->delete_blend_state(pipe, state);;
+ pipe->delete_blend_state(pipe, state);
trace_dump_call_end();
}
@@ -469,7 +443,7 @@ trace_context_create_sampler_state(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(sampler_state, state);
- result = pipe->create_sampler_state(pipe, state);;
+ result = pipe->create_sampler_state(pipe, state);
trace_dump_ret(ptr, result);
@@ -480,19 +454,40 @@ trace_context_create_sampler_state(struct pipe_context *_pipe,
static INLINE void
-trace_context_bind_sampler_states(struct pipe_context *_pipe,
- unsigned num_states, void **states)
+trace_context_bind_fragment_sampler_states(struct pipe_context *_pipe,
+ unsigned num_states,
+ void **states)
{
struct trace_context *tr_ctx = trace_context(_pipe);
struct pipe_context *pipe = tr_ctx->pipe;
- trace_dump_call_begin("pipe_context", "bind_sampler_states");
+ trace_dump_call_begin("pipe_context", "bind_fragment_sampler_states");
trace_dump_arg(ptr, pipe);
trace_dump_arg(uint, num_states);
trace_dump_arg_array(ptr, states, num_states);
- pipe->bind_sampler_states(pipe, num_states, states);;
+ pipe->bind_fragment_sampler_states(pipe, num_states, states);
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_bind_vertex_sampler_states(struct pipe_context *_pipe,
+ unsigned num_states,
+ void **states)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct pipe_context *pipe = tr_ctx->pipe;
+
+ trace_dump_call_begin("pipe_context", "bind_vertex_sampler_states");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(uint, num_states);
+ trace_dump_arg_array(ptr, states, num_states);
+
+ pipe->bind_vertex_sampler_states(pipe, num_states, states);
trace_dump_call_end();
}
@@ -510,7 +505,7 @@ trace_context_delete_sampler_state(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(ptr, state);
- pipe->delete_sampler_state(pipe, state);;
+ pipe->delete_sampler_state(pipe, state);
trace_dump_call_end();
}
@@ -529,7 +524,7 @@ trace_context_create_rasterizer_state(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(rasterizer_state, state);
- result = pipe->create_rasterizer_state(pipe, state);;
+ result = pipe->create_rasterizer_state(pipe, state);
trace_dump_ret(ptr, result);
@@ -551,7 +546,7 @@ trace_context_bind_rasterizer_state(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(ptr, state);
- pipe->bind_rasterizer_state(pipe, state);;
+ pipe->bind_rasterizer_state(pipe, state);
trace_dump_call_end();
}
@@ -569,7 +564,7 @@ trace_context_delete_rasterizer_state(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(ptr, state);
- pipe->delete_rasterizer_state(pipe, state);;
+ pipe->delete_rasterizer_state(pipe, state);
trace_dump_call_end();
}
@@ -585,7 +580,7 @@ trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe,
trace_dump_call_begin("pipe_context", "create_depth_stencil_alpha_state");
- result = pipe->create_depth_stencil_alpha_state(pipe, state);;
+ result = pipe->create_depth_stencil_alpha_state(pipe, state);
trace_dump_arg(ptr, pipe);
trace_dump_arg(depth_stencil_alpha_state, state);
@@ -610,7 +605,7 @@ trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(ptr, state);
- pipe->bind_depth_stencil_alpha_state(pipe, state);;
+ pipe->bind_depth_stencil_alpha_state(pipe, state);
trace_dump_call_end();
}
@@ -628,7 +623,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(ptr, state);
- pipe->delete_depth_stencil_alpha_state(pipe, state);;
+ pipe->delete_depth_stencil_alpha_state(pipe, state);
trace_dump_call_end();
}
@@ -647,7 +642,7 @@ trace_context_create_fs_state(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(shader_state, state);
- result = pipe->create_fs_state(pipe, state);;
+ result = pipe->create_fs_state(pipe, state);
trace_dump_ret(ptr, result);
@@ -750,7 +745,7 @@ trace_context_bind_vs_state(struct pipe_context *_pipe,
if (tr_shdr && tr_shdr->replaced)
state = tr_shdr->replaced;
- pipe->bind_vs_state(pipe, state);;
+ pipe->bind_vs_state(pipe, state);
trace_dump_call_end();
}
@@ -770,7 +765,7 @@ trace_context_delete_vs_state(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(ptr, state);
- pipe->delete_vs_state(pipe, state);;
+ pipe->delete_vs_state(pipe, state);
trace_dump_call_end();
@@ -790,7 +785,7 @@ trace_context_set_blend_color(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(blend_color, state);
- pipe->set_blend_color(pipe, state);;
+ pipe->set_blend_color(pipe, state);
trace_dump_call_end();
}
@@ -808,7 +803,7 @@ trace_context_set_clip_state(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(clip_state, state);
- pipe->set_clip_state(pipe, state);;
+ pipe->set_clip_state(pipe, state);
trace_dump_call_end();
}
@@ -880,7 +875,7 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(framebuffer_state, state);
- pipe->set_framebuffer_state(pipe, state);;
+ pipe->set_framebuffer_state(pipe, state);
trace_dump_call_end();
}
@@ -898,7 +893,7 @@ trace_context_set_polygon_stipple(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(poly_stipple, state);
- pipe->set_polygon_stipple(pipe, state);;
+ pipe->set_polygon_stipple(pipe, state);
trace_dump_call_end();
}
@@ -916,7 +911,7 @@ trace_context_set_scissor_state(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(scissor_state, state);
- pipe->set_scissor_state(pipe, state);;
+ pipe->set_scissor_state(pipe, state);
trace_dump_call_end();
}
@@ -934,16 +929,16 @@ trace_context_set_viewport_state(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(viewport_state, state);
- pipe->set_viewport_state(pipe, state);;
+ pipe->set_viewport_state(pipe, state);
trace_dump_call_end();
}
static INLINE void
-trace_context_set_sampler_textures(struct pipe_context *_pipe,
- unsigned num_textures,
- struct pipe_texture **textures)
+trace_context_set_fragment_sampler_textures(struct pipe_context *_pipe,
+ unsigned num_textures,
+ struct pipe_texture **textures)
{
struct trace_context *tr_ctx = trace_context(_pipe);
struct trace_texture *tr_tex;
@@ -959,13 +954,44 @@ trace_context_set_sampler_textures(struct pipe_context *_pipe,
}
textures = unwrapped_textures;
- trace_dump_call_begin("pipe_context", "set_sampler_textures");
+ trace_dump_call_begin("pipe_context", "set_fragment_sampler_textures");
+
+ trace_dump_arg(ptr, pipe);
+ trace_dump_arg(uint, num_textures);
+ trace_dump_arg_array(ptr, textures, num_textures);
+
+ pipe->set_fragment_sampler_textures(pipe, num_textures, textures);
+
+ trace_dump_call_end();
+}
+
+
+static INLINE void
+trace_context_set_vertex_sampler_textures(struct pipe_context *_pipe,
+ unsigned num_textures,
+ struct pipe_texture **textures)
+{
+ struct trace_context *tr_ctx = trace_context(_pipe);
+ struct trace_texture *tr_tex;
+ struct pipe_context *pipe = tr_ctx->pipe;
+ struct pipe_texture *unwrapped_textures[PIPE_MAX_VERTEX_SAMPLERS];
+ unsigned i;
+
+ tr_ctx->curr.num_vert_texs = num_textures;
+ for(i = 0; i < num_textures; ++i) {
+ tr_tex = trace_texture(textures[i]);
+ tr_ctx->curr.vert_tex[i] = tr_tex;
+ unwrapped_textures[i] = tr_tex ? tr_tex->texture : NULL;
+ }
+ textures = unwrapped_textures;
+
+ trace_dump_call_begin("pipe_context", "set_vertex_sampler_textures");
trace_dump_arg(ptr, pipe);
trace_dump_arg(uint, num_textures);
trace_dump_arg_array(ptr, textures, num_textures);
- pipe->set_sampler_textures(pipe, num_textures, textures);;
+ pipe->set_vertex_sampler_textures(pipe, num_textures, textures);
trace_dump_call_end();
}
@@ -1024,7 +1050,7 @@ trace_context_set_vertex_elements(struct pipe_context *_pipe,
trace_dump_struct_array(vertex_element, elements, num_elements);
trace_dump_arg_end();
- pipe->set_vertex_elements(pipe, num_elements, elements);;
+ pipe->set_vertex_elements(pipe, num_elements, elements);
trace_dump_call_end();
}
@@ -1085,7 +1111,7 @@ trace_context_surface_fill(struct pipe_context *_pipe,
trace_dump_arg(uint, width);
trace_dump_arg(uint, height);
- pipe->surface_fill(pipe, dst, dstx, dsty, width, height, value);;
+ pipe->surface_fill(pipe, dst, dstx, dsty, width, height, value);
trace_dump_call_end();
}
@@ -1128,7 +1154,7 @@ trace_context_flush(struct pipe_context *_pipe,
trace_dump_arg(ptr, pipe);
trace_dump_arg(uint, flags);
- pipe->flush(pipe, flags, fence);;
+ pipe->flush(pipe, flags, fence);
if(fence)
trace_dump_ret(ptr, *fence);
@@ -1240,7 +1266,6 @@ trace_context_create(struct pipe_screen *_screen,
tr_ctx->base.winsys = _screen->winsys;
tr_ctx->base.screen = _screen;
tr_ctx->base.destroy = trace_context_destroy;
- tr_ctx->base.set_edgeflags = trace_context_set_edgeflags;
tr_ctx->base.draw_arrays = trace_context_draw_arrays;
tr_ctx->base.draw_elements = trace_context_draw_elements;
tr_ctx->base.draw_range_elements = trace_context_draw_range_elements;
@@ -1253,7 +1278,8 @@ trace_context_create(struct pipe_screen *_screen,
tr_ctx->base.bind_blend_state = trace_context_bind_blend_state;
tr_ctx->base.delete_blend_state = trace_context_delete_blend_state;
tr_ctx->base.create_sampler_state = trace_context_create_sampler_state;
- tr_ctx->base.bind_sampler_states = trace_context_bind_sampler_states;
+ tr_ctx->base.bind_fragment_sampler_states = trace_context_bind_fragment_sampler_states;
+ tr_ctx->base.bind_vertex_sampler_states = trace_context_bind_vertex_sampler_states;
tr_ctx->base.delete_sampler_state = trace_context_delete_sampler_state;
tr_ctx->base.create_rasterizer_state = trace_context_create_rasterizer_state;
tr_ctx->base.bind_rasterizer_state = trace_context_bind_rasterizer_state;
@@ -1274,7 +1300,8 @@ trace_context_create(struct pipe_screen *_screen,
tr_ctx->base.set_polygon_stipple = trace_context_set_polygon_stipple;
tr_ctx->base.set_scissor_state = trace_context_set_scissor_state;
tr_ctx->base.set_viewport_state = trace_context_set_viewport_state;
- tr_ctx->base.set_sampler_textures = trace_context_set_sampler_textures;
+ tr_ctx->base.set_fragment_sampler_textures = trace_context_set_fragment_sampler_textures;
+ tr_ctx->base.set_vertex_sampler_textures = trace_context_set_vertex_sampler_textures;
tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers;
tr_ctx->base.set_vertex_elements = trace_context_set_vertex_elements;
if (pipe->surface_copy)
diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h
index 6febe4b4114..852b480765a 100644
--- a/src/gallium/drivers/trace/tr_context.h
+++ b/src/gallium/drivers/trace/tr_context.h
@@ -54,6 +54,9 @@ struct trace_context
struct trace_texture *tex[PIPE_MAX_SAMPLERS];
unsigned num_texs;
+ struct trace_texture *vert_tex[PIPE_MAX_VERTEX_SAMPLERS];
+ unsigned num_vert_texs;
+
unsigned nr_cbufs;
struct trace_texture *cbufs[PIPE_MAX_COLOR_BUFS];
struct trace_texture *zsbuf;
diff --git a/src/gallium/drivers/trace/tr_drm.c b/src/gallium/drivers/trace/tr_drm.c
index 781ca5d3bc0..48d1c4051cc 100644
--- a/src/gallium/drivers/trace/tr_drm.c
+++ b/src/gallium/drivers/trace/tr_drm.c
@@ -150,7 +150,9 @@ trace_drm_destroy(struct drm_api *_api)
{
struct trace_drm_api *tr_api = trace_drm_api(_api);
struct drm_api *api = tr_api->api;
- api->destroy(api);
+
+ if (api->destroy)
+ api->destroy(api);
free(tr_api);
}
diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c
index 7e2ccbcfdc5..0f45e211a32 100644
--- a/src/gallium/drivers/trace/tr_dump.c
+++ b/src/gallium/drivers/trace/tr_dump.c
@@ -40,7 +40,7 @@
#include "pipe/p_config.h"
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
#include <stdlib.h>
#endif
@@ -258,7 +258,7 @@ boolean trace_dump_trace_begin()
trace_dump_writes("<?xml-stylesheet type='text/xsl' href='trace.xsl'?>\n");
trace_dump_writes("<trace version='0.1'>\n");
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
/* Linux applications rarely cleanup GL / Gallium resources so catch
* application exit here */
atexit(trace_dump_trace_close);
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index bcf6751af4f..86237e03bcc 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -43,19 +43,6 @@ void trace_dump_format(enum pipe_format format)
}
-void trace_dump_block(const struct pipe_format_block *block)
-{
- if (!trace_dumping_enabled_locked())
- return;
-
- trace_dump_struct_begin("pipe_format_block");
- trace_dump_member(uint, block, size);
- trace_dump_member(uint, block, width);
- trace_dump_member(uint, block, height);
- trace_dump_struct_end();
-}
-
-
static void trace_dump_reference(const struct pipe_reference *reference)
{
if (!trace_dumping_enabled_locked())
@@ -83,19 +70,15 @@ void trace_dump_template(const struct pipe_texture *templat)
trace_dump_member(format, templat, format);
trace_dump_member_begin("width");
- trace_dump_array(uint, templat->width, 1);
+ trace_dump_uint(templat->width0);
trace_dump_member_end();
trace_dump_member_begin("height");
- trace_dump_array(uint, templat->height, 1);
+ trace_dump_uint(templat->height0);
trace_dump_member_end();
trace_dump_member_begin("depth");
- trace_dump_array(uint, templat->depth, 1);
- trace_dump_member_end();
-
- trace_dump_member_begin("block");
- trace_dump_block(&templat->block);
+ trace_dump_uint(templat->depth0);
trace_dump_member_end();
trace_dump_member(uint, templat, last_level);
@@ -426,7 +409,7 @@ void trace_dump_sampler_state(const struct pipe_sampler_state *state)
trace_dump_member(uint, state, min_img_filter);
trace_dump_member(uint, state, min_mip_filter);
trace_dump_member(uint, state, mag_img_filter);
- trace_dump_member(bool, state, compare_mode);
+ trace_dump_member(uint, state, compare_mode);
trace_dump_member(uint, state, compare_func);
trace_dump_member(bool, state, normalized_coords);
trace_dump_member(uint, state, prefilter);
@@ -483,16 +466,9 @@ void trace_dump_transfer(const struct pipe_transfer *state)
trace_dump_struct_begin("pipe_transfer");
- trace_dump_member(format, state, format);
trace_dump_member(uint, state, width);
trace_dump_member(uint, state, height);
- trace_dump_member_begin("block");
- trace_dump_block(&state->block);
- trace_dump_member_end();
-
- trace_dump_member(uint, state, nblocksx);
- trace_dump_member(uint, state, nblocksy);
trace_dump_member(uint, state, stride);
trace_dump_member(uint, state, usage);
diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h
index 05b821adb64..07ad6fbb205 100644
--- a/src/gallium/drivers/trace/tr_dump_state.h
+++ b/src/gallium/drivers/trace/tr_dump_state.h
@@ -35,11 +35,8 @@
void trace_dump_format(enum pipe_format format);
-void trace_dump_block(const struct pipe_format_block *block);
-
void trace_dump_template(const struct pipe_texture *templat);
-
void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state);
void trace_dump_poly_stipple(const struct pipe_poly_stipple *state);
diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c
index 81e0a6f3b00..0546aad9b50 100644
--- a/src/gallium/drivers/trace/tr_rbug.c
+++ b/src/gallium/drivers/trace/tr_rbug.c
@@ -26,6 +26,7 @@
**************************************************************************/
+#include "util/u_format.h"
#include "util/u_string.h"
#include "util/u_memory.h"
#include "util/u_simple_list.h"
@@ -44,7 +45,7 @@
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
# define sleep Sleep
-#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD)
+#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_APPLE)
void usleep(int);
# define sleep usleep
#else
@@ -179,7 +180,7 @@ static int
trace_rbug_texture_info(struct trace_rbug *tr_rbug, struct rbug_header *header, uint32_t serial)
{
struct trace_screen *tr_scr = tr_rbug->tr_scr;
- struct trace_texture *tr_tex;
+ struct trace_texture *tr_tex = NULL;
struct rbug_proto_texture_info *gpti = (struct rbug_proto_texture_info *)header;
struct tr_list *ptr;
struct pipe_texture *t;
@@ -200,10 +201,12 @@ trace_rbug_texture_info(struct trace_rbug *tr_rbug, struct rbug_header *header,
t = tr_tex->texture;
rbug_send_texture_info_reply(tr_rbug->con, serial,
t->target, t->format,
- t->width, t->last_level + 1,
- t->height, t->last_level + 1,
- t->depth, t->last_level + 1,
- t->block.width, t->block.height, t->block.size,
+ &t->width0, 1,
+ &t->height0, 1,
+ &t->depth0, 1,
+ util_format_get_blockwidth(t->format),
+ util_format_get_blockheight(t->format),
+ util_format_get_blocksize(t->format),
t->last_level,
t->nr_samples,
t->tex_usage,
@@ -220,7 +223,7 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header,
struct rbug_proto_texture_read *gptr = (struct rbug_proto_texture_read *)header;
struct trace_screen *tr_scr = tr_rbug->tr_scr;
- struct trace_texture *tr_tex;
+ struct trace_texture *tr_tex = NULL;
struct tr_list *ptr;
struct pipe_screen *screen = tr_scr->screen;
@@ -251,9 +254,12 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header,
map = screen->transfer_map(screen, t);
rbug_send_texture_read_reply(tr_rbug->con, serial,
- t->format,
- t->block.width, t->block.height, t->block.size,
- (uint8_t*)map, t->stride * t->nblocksy,
+ t->texture->format,
+ util_format_get_blockwidth(t->texture->format),
+ util_format_get_blockheight(t->texture->format),
+ util_format_get_blocksize(t->texture->format),
+ (uint8_t*)map,
+ t->stride * util_format_get_nblocksy(t->texture->format, t->height),
t->stride,
NULL);
diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index ab605c7fc87..117503aaff6 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -25,6 +25,7 @@
*
**************************************************************************/
+#include "util/u_format.h"
#include "util/u_memory.h"
#include "util/u_simple_list.h"
@@ -35,6 +36,7 @@
#include "tr_screen.h"
#include "pipe/p_inlines.h"
+#include "pipe/p_format.h"
static boolean trace = FALSE;
@@ -366,7 +368,8 @@ trace_screen_get_tex_transfer(struct pipe_screen *_screen,
trace_dump_call_end();
- result = trace_transfer_create(tr_tex, result);
+ if (result)
+ result = trace_transfer_create(tr_tex, result);
return result;
}
@@ -423,7 +426,7 @@ trace_screen_transfer_unmap(struct pipe_screen *_screen,
struct pipe_transfer *transfer = tr_trans->transfer;
if(tr_trans->map) {
- size_t size = transfer->nblocksy * transfer->stride;
+ size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->height) * transfer->stride;
trace_dump_call_begin("pipe_screen", "transfer_write");
diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h
index 1c16042ee5a..e2f981d0513 100644
--- a/src/gallium/drivers/trace/tr_state.h
+++ b/src/gallium/drivers/trace/tr_state.h
@@ -32,7 +32,7 @@ struct tgsi_token;
enum trace_shader_type {
TRACE_SHADER_FRAGMENT = 0,
TRACE_SHADER_VERTEX = 1,
- TRACE_SHADER_GEOMETRY = 2,
+ TRACE_SHADER_GEOMETRY = 2
};
struct trace_shader