summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/svga/include/VGPU10ShaderTokens.h2
-rw-r--r--src/gallium/drivers/svga/include/svga3d_types.h18
-rw-r--r--src/gallium/drivers/svga/meson.build2
-rw-r--r--src/gallium/drivers/svga/svga_cmd.h29
-rw-r--r--src/gallium/drivers/svga/svga_cmd_vgpu10.c158
-rw-r--r--src/gallium/drivers/svga/svga_context.c30
-rw-r--r--src/gallium/drivers/svga/svga_context.h263
-rw-r--r--src/gallium/drivers/svga/svga_debug.h1
-rw-r--r--src/gallium/drivers/svga/svga_draw.c156
-rw-r--r--src/gallium/drivers/svga/svga_draw.h3
-rw-r--r--src/gallium/drivers/svga/svga_draw_arrays.c19
-rw-r--r--src/gallium/drivers/svga/svga_draw_elements.c24
-rw-r--r--src/gallium/drivers/svga/svga_draw_private.h20
-rw-r--r--src/gallium/drivers/svga/svga_format.c18
-rw-r--r--src/gallium/drivers/svga/svga_link.c18
-rw-r--r--src/gallium/drivers/svga/svga_link.h1
-rw-r--r--src/gallium/drivers/svga/svga_pipe_blend.c304
-rw-r--r--src/gallium/drivers/svga/svga_pipe_blit.c31
-rw-r--r--src/gallium/drivers/svga/svga_pipe_clear.c68
-rw-r--r--src/gallium/drivers/svga/svga_pipe_draw.c209
-rw-r--r--src/gallium/drivers/svga/svga_pipe_fs.c38
-rw-r--r--src/gallium/drivers/svga/svga_pipe_misc.c17
-rw-r--r--src/gallium/drivers/svga/svga_pipe_query.c78
-rw-r--r--src/gallium/drivers/svga/svga_pipe_rasterizer.c18
-rw-r--r--src/gallium/drivers/svga/svga_pipe_streamout.c388
-rw-r--r--src/gallium/drivers/svga/svga_pipe_ts.c219
-rw-r--r--src/gallium/drivers/svga/svga_pipe_vs.c50
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer.c6
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer_upload.c5
-rw-r--r--src/gallium/drivers/svga/svga_resource_texture.c33
-rw-r--r--src/gallium/drivers/svga/svga_screen.c97
-rw-r--r--src/gallium/drivers/svga/svga_screen.h3
-rw-r--r--src/gallium/drivers/svga/svga_screen_cache.c16
-rw-r--r--src/gallium/drivers/svga/svga_shader.c105
-rw-r--r--src/gallium/drivers/svga/svga_shader.h168
-rw-r--r--src/gallium/drivers/svga/svga_state.c96
-rw-r--r--src/gallium/drivers/svga/svga_state.h19
-rw-r--r--src/gallium/drivers/svga/svga_state_constants.c219
-rw-r--r--src/gallium/drivers/svga/svga_state_framebuffer.c306
-rw-r--r--src/gallium/drivers/svga/svga_state_fs.c23
-rw-r--r--src/gallium/drivers/svga/svga_state_gs.c17
-rw-r--r--src/gallium/drivers/svga/svga_state_need_swtnl.c6
-rw-r--r--src/gallium/drivers/svga/svga_state_rss.c6
-rw-r--r--src/gallium/drivers/svga/svga_state_sampler.c28
-rw-r--r--src/gallium/drivers/svga/svga_state_tgsi_transform.c205
-rw-r--r--src/gallium/drivers/svga/svga_state_ts.c392
-rw-r--r--src/gallium/drivers/svga/svga_state_tss.c18
-rw-r--r--src/gallium/drivers/svga/svga_state_vdecl.c4
-rw-r--r--src/gallium/drivers/svga/svga_state_vs.c15
-rw-r--r--src/gallium/drivers/svga/svga_streamout.h19
-rw-r--r--src/gallium/drivers/svga/svga_surface.c10
-rw-r--r--src/gallium/drivers/svga/svga_surface.h2
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_backend.c39
-rw-r--r--src/gallium/drivers/svga/svga_swtnl_state.c38
-rw-r--r--src/gallium/drivers/svga/svga_tgsi.c20
-rw-r--r--src/gallium/drivers/svga/svga_tgsi.h2
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_vgpu10.c5368
-rw-r--r--src/gallium/drivers/svga/svga_winsys.h10
-rw-r--r--src/gallium/winsys/svga/drm/vmw_context.c16
-rw-r--r--src/gallium/winsys/svga/drm/vmw_shader.c53
-rw-r--r--src/gallium/winsys/svga/drm/vmw_shader.h8
61 files changed, 8033 insertions, 1521 deletions
diff --git a/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h b/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h
index 77af6d39a5a..e23ee53ffb1 100644
--- a/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h
+++ b/src/gallium/drivers/svga/include/VGPU10ShaderTokens.h
@@ -201,7 +201,7 @@ typedef enum {
VGPU10_OPCODE_DCL_GLOBAL_FLAGS = 106,
/* GL guest */
- VGPU10_OPCODE_IDIV = 107,
+ VGPU10_OPCODE_VMWARE = 107,
/* DX10.1 */
VGPU10_OPCODE_LOD = 108,
diff --git a/src/gallium/drivers/svga/include/svga3d_types.h b/src/gallium/drivers/svga/include/svga3d_types.h
index 48eafe72202..94262314e29 100644
--- a/src/gallium/drivers/svga/include/svga3d_types.h
+++ b/src/gallium/drivers/svga/include/svga3d_types.h
@@ -436,8 +436,9 @@ typedef uint32 SVGA3dSurfaceFlags;
* mob-backing to store all the samples.
*/
#define SVGA3D_SURFACE_MULTISAMPLE (CONST64U(1) << 32)
+#define SVGA3D_SURFACE_DRAWINDIRECT_ARGS (CONST64U(1) << 38)
-#define SVGA3D_SURFACE_FLAG_MAX (CONST64U(1) << 33)
+#define SVGA3D_SURFACE_FLAG_MAX (CONST64U(1) << 42)
/*
* Surface flags types:
@@ -464,7 +465,8 @@ typedef uint64 SVGA3dSurfaceAllFlags;
SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \
SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \
SVGA3D_SURFACE_VADECODE | \
- SVGA3D_SURFACE_MULTISAMPLE \
+ SVGA3D_SURFACE_MULTISAMPLE | \
+ SVGA3D_SURFACE_DRAWINDIRECT_ARGS \
)
#define SVGA3D_SURFACE_2D_DISALLOWED_MASK \
@@ -480,7 +482,8 @@ typedef uint64 SVGA3dSurfaceAllFlags;
SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \
SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \
SVGA3D_SURFACE_VADECODE | \
- SVGA3D_SURFACE_MULTISAMPLE \
+ SVGA3D_SURFACE_MULTISAMPLE | \
+ SVGA3D_SURFACE_DRAWINDIRECT_ARGS \
)
#define SVGA3D_SURFACE_BASICOPS_DISALLOWED_MASK \
@@ -508,7 +511,8 @@ typedef uint64 SVGA3dSurfaceAllFlags;
SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \
SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \
SVGA3D_SURFACE_VADECODE | \
- SVGA3D_SURFACE_MULTISAMPLE \
+ SVGA3D_SURFACE_MULTISAMPLE | \
+ SVGA3D_SURFACE_DRAWINDIRECT_ARGS \
)
#define SVGA3D_SURFACE_BUFFER_DISALLOWED_MASK \
@@ -527,7 +531,8 @@ typedef uint64 SVGA3dSurfaceAllFlags;
SVGA3D_SURFACE_VOLUME | \
SVGA3D_SURFACE_1D | \
SVGA3D_SURFACE_SCREENTARGET | \
- SVGA3D_SURFACE_MOB_PITCH \
+ SVGA3D_SURFACE_MOB_PITCH | \
+ SVGA3D_SURFACE_DRAWINDIRECT_ARGS \
)
#define SVGA3D_SURFACE_DX_ONLY_MASK \
@@ -636,7 +641,8 @@ typedef uint64 SVGA3dSurfaceAllFlags;
SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \
SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \
SVGA3D_SURFACE_VADECODE | \
- SVGA3D_SURFACE_MULTISAMPLE \
+ SVGA3D_SURFACE_MULTISAMPLE | \
+ SVGA3D_SURFACE_DRAWINDIRECT_ARGS \
)
diff --git a/src/gallium/drivers/svga/meson.build b/src/gallium/drivers/svga/meson.build
index 368d0c7f342..8dcdadd6e1d 100644
--- a/src/gallium/drivers/svga/meson.build
+++ b/src/gallium/drivers/svga/meson.build
@@ -36,6 +36,7 @@ files_svga = files(
'svga_pipe_flush.c',
'svga_pipe_fs.c',
'svga_pipe_gs.c',
+ 'svga_pipe_ts.c',
'svga_pipe_misc.c',
'svga_pipe_query.c',
'svga_pipe_rasterizer.c',
@@ -56,6 +57,7 @@ files_svga = files(
'svga_state_framebuffer.c',
'svga_state_fs.c',
'svga_state_gs.c',
+ 'svga_state_ts.c',
'svga_state_need_swtnl.c',
'svga_state_rss.c',
'svga_state_sampler.c',
diff --git a/src/gallium/drivers/svga/svga_cmd.h b/src/gallium/drivers/svga/svga_cmd.h
index f6cb4fc27c1..22a40cf05cb 100644
--- a/src/gallium/drivers/svga/svga_cmd.h
+++ b/src/gallium/drivers/svga/svga_cmd.h
@@ -697,4 +697,33 @@ SVGA3D_vgpu10_ResolveCopy(struct svga_winsys_context *swc,
struct svga_winsys_surface *src,
const SVGA3dSurfaceFormat copyFormat);
+enum pipe_error
+SVGA3D_sm5_DrawIndexedInstancedIndirect(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *argBuffer,
+ unsigned argOffset);
+
+enum pipe_error
+SVGA3D_sm5_DrawInstancedIndirect(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *argBuffer,
+ unsigned argOffset);
+
+enum pipe_error
+SVGA3D_sm5_Dispatch(struct svga_winsys_context *swc,
+ const uint32 threadGroupCount[3]);
+
+enum pipe_error
+SVGA3D_sm5_DispatchIndirect(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *argBuffer,
+ uint32 argOffset);
+
+enum pipe_error
+SVGA3D_sm5_DefineAndBindStreamOutput(struct svga_winsys_context *swc,
+ SVGA3dStreamOutputId soid,
+ uint32 numOutputStreamEntries,
+ uint32 numOutputStreamStrides,
+ uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS],
+ struct svga_winsys_buffer *declBuf,
+ uint32 rasterizedStream,
+ uint32 sizeInBytes);
+
#endif /* __SVGA3D_H__ */
diff --git a/src/gallium/drivers/svga/svga_cmd_vgpu10.c b/src/gallium/drivers/svga/svga_cmd_vgpu10.c
index 1ca050ecb7a..eb5a482d9ba 100644
--- a/src/gallium/drivers/svga/svga_cmd_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_cmd_vgpu10.c
@@ -1130,7 +1130,7 @@ SVGA3D_vgpu10_DefineStreamOutput(struct svga_winsys_context *swc,
memcpy(cmd->decl, decl,
sizeof(SVGA3dStreamOutputDeclarationEntry)
- * SVGA3D_MAX_STREAMOUT_DECLS);
+ * SVGA3D_MAX_DX10_STREAMOUT_DECLS);
cmd->rasterizedStream = 0;
swc->commit(swc);
@@ -1432,3 +1432,159 @@ SVGA3D_vgpu10_ResolveCopy(struct svga_winsys_context *swc,
return PIPE_OK;
}
+
+
+enum pipe_error
+SVGA3D_sm5_DrawIndexedInstancedIndirect(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *argBuffer,
+ unsigned argOffset)
+{
+ SVGA3dCmdDXDrawIndexedInstancedIndirect *cmd =
+ SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED_INDIRECT,
+ sizeof(SVGA3dCmdDXDrawIndexedInstancedIndirect),
+ 1); /* one relocation */
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ swc->surface_relocation(swc, &cmd->argsBufferSid, NULL, argBuffer,
+ SVGA_RELOC_READ);
+ cmd->byteOffsetForArgs = argOffset;
+
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_sm5_DrawInstancedIndirect(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *argBuffer,
+ unsigned argOffset)
+{
+ SVGA3dCmdDXDrawInstancedIndirect *cmd =
+ SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_DX_DRAW_INSTANCED_INDIRECT,
+ sizeof(SVGA3dCmdDXDrawInstancedIndirect),
+ 1); /* one relocation */
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ swc->surface_relocation(swc, &cmd->argsBufferSid, NULL, argBuffer,
+ SVGA_RELOC_READ);
+ cmd->byteOffsetForArgs = argOffset;
+
+ swc->commit(swc);
+
+ return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_sm5_Dispatch(struct svga_winsys_context *swc,
+ const uint32 threadGroupCount[3])
+{
+ SVGA3dCmdDXDispatch *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_DX_DISPATCH,
+ sizeof(SVGA3dCmdDXDispatch),
+ 0);
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ cmd->threadGroupCountX = threadGroupCount[0];
+ cmd->threadGroupCountY = threadGroupCount[1];
+ cmd->threadGroupCountZ = threadGroupCount[2];
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_sm5_DispatchIndirect(struct svga_winsys_context *swc,
+ struct svga_winsys_surface *argBuffer,
+ uint32 argOffset)
+{
+ SVGA3dCmdDXDispatchIndirect *cmd;
+
+ cmd = SVGA3D_FIFOReserve(swc,
+ SVGA_3D_CMD_DX_DISPATCH_INDIRECT,
+ sizeof(SVGA3dCmdDXDispatchIndirect),
+ 1);
+ if (!cmd)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ swc->surface_relocation(swc, &cmd->argsBufferSid, NULL, argBuffer,
+ SVGA_RELOC_READ);
+ cmd->byteOffsetForArgs = argOffset;
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
+
+
+/**
+ * We don't want any flush between DefineStreamOutputWithMob and
+ * BindStreamOutput because it will cause partial state in command
+ * buffer. This function make that sure there is enough room for
+ * both commands before issuing them
+ */
+
+enum pipe_error
+SVGA3D_sm5_DefineAndBindStreamOutput(struct svga_winsys_context *swc,
+ SVGA3dStreamOutputId soid,
+ uint32 numOutputStreamEntries,
+ uint32 numOutputStreamStrides,
+ uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS],
+ struct svga_winsys_buffer *declBuf,
+ uint32 rasterizedStream,
+ uint32 sizeInBytes)
+{
+ unsigned i;
+ SVGA3dCmdHeader *header;
+ SVGA3dCmdDXDefineStreamOutputWithMob *dcmd;
+ SVGA3dCmdDXBindStreamOutput *bcmd;
+
+ unsigned totalSize = 2 * sizeof(*header) +
+ sizeof(*dcmd) + sizeof(*bcmd);
+
+ /* Make sure there is room for both commands */
+ header = swc->reserve(swc, totalSize, 2);
+ if (!header)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ /* DXDefineStreamOutputWithMob command */
+ header->id = SVGA_3D_CMD_DX_DEFINE_STREAMOUTPUT_WITH_MOB;
+ header->size = sizeof(*dcmd);
+ dcmd = (SVGA3dCmdDXDefineStreamOutputWithMob *)(header + 1);
+ dcmd->soid= soid;
+ dcmd->numOutputStreamEntries = numOutputStreamEntries;
+ dcmd->numOutputStreamStrides = numOutputStreamStrides;
+ dcmd->rasterizedStream = rasterizedStream;
+
+ for (i = 0; i < ARRAY_SIZE(dcmd->streamOutputStrideInBytes); i++)
+ dcmd->streamOutputStrideInBytes[i] = streamOutputStrideInBytes[i];
+
+
+ /* DXBindStreamOutput command */
+ header = (SVGA3dCmdHeader *)(dcmd + 1);
+
+ header->id = SVGA_3D_CMD_DX_BIND_STREAMOUTPUT;
+ header->size = sizeof(*bcmd);
+ bcmd = (SVGA3dCmdDXBindStreamOutput *)(header + 1);
+
+ bcmd->soid = soid;
+ bcmd->offsetInBytes = 0;
+ swc->mob_relocation(swc, &bcmd->mobid,
+ &bcmd->offsetInBytes, declBuf, 0,
+ SVGA_RELOC_WRITE);
+
+ bcmd->sizeInBytes = sizeInBytes;
+ bcmd->offsetInBytes = 0;
+
+
+ swc->commit(swc);
+ return PIPE_OK;
+}
diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
index cdc222e2438..4ef99efe989 100644
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -44,6 +44,7 @@
#include "svga_debug.h"
#include "svga_state.h"
#include "svga_winsys.h"
+#include "svga_streamout.h"
#define CONST0_UPLOAD_DEFAULT_SIZE 65536
@@ -79,6 +80,9 @@ svga_destroy(struct pipe_context *pipe)
pipe->delete_blend_state(pipe, svga->noop_blend);
+ /* destroy stream output statistics queries */
+ svga_destroy_stream_output_queries(svga);
+
/* free query gb object */
if (svga->gb_query) {
pipe->destroy_query(pipe, NULL);
@@ -91,6 +95,7 @@ svga_destroy(struct pipe_context *pipe)
svga_cleanup_framebuffer(svga);
svga_cleanup_tss_binding(svga);
svga_cleanup_vertex_state(svga);
+ svga_cleanup_tcs_state(svga);
svga_destroy_swtnl(svga);
svga_hwtnl_destroy(svga->hwtnl);
@@ -174,12 +179,14 @@ svga_context_create(struct pipe_screen *screen, void *priv, unsigned flags)
svga_init_fs_functions(svga);
svga_init_vs_functions(svga);
svga_init_gs_functions(svga);
+ svga_init_ts_functions(svga);
svga_init_vertex_functions(svga);
svga_init_constbuffer_functions(svga);
svga_init_query_functions(svga);
svga_init_surface_functions(svga);
svga_init_stream_output_functions(svga);
svga_init_clear_functions(svga);
+ svga_init_tracked_state(svga);
/* init misc state */
svga->curr.sample_mask = ~0;
@@ -250,6 +257,7 @@ svga_context_create(struct pipe_screen *screen, void *priv, unsigned flags)
memset(&svga->state.hw_clear, 0xcd, sizeof(svga->state.hw_clear));
memset(&svga->state.hw_clear.framebuffer, 0x0,
sizeof(svga->state.hw_clear.framebuffer));
+ memset(&svga->state.hw_clear.rtv, 0, sizeof(svga->state.hw_clear.rtv));
svga->state.hw_clear.num_rendertargets = 0;
svga->state.hw_clear.dsv = NULL;
@@ -269,6 +277,8 @@ svga_context_create(struct pipe_screen *screen, void *priv, unsigned flags)
svga->state.hw_draw.vs = NULL;
svga->state.hw_draw.gs = NULL;
svga->state.hw_draw.fs = NULL;
+ svga->state.hw_draw.tcs = NULL;
+ svga->state.hw_draw.tes = NULL;
/* Initialize the currently bound buffer resources */
memset(svga->state.hw_draw.constbuf, 0,
@@ -303,10 +313,16 @@ svga_context_create(struct pipe_screen *screen, void *priv, unsigned flags)
svga->noop_blend = svga->pipe.create_blend_state(&svga->pipe, &noop_tmpl);
}
- svga->dirty = ~0;
+ svga->dirty = SVGA_NEW_ALL;
svga->pred.query_id = SVGA3D_INVALID_ID;
svga->disable_rasterizer = FALSE;
+ /**
+ * Create stream output statistics queries used in the workaround for auto
+ * draw with stream instancing.
+ */
+ svga_create_stream_output_queries(svga);
+
goto done;
cleanup:
@@ -398,6 +414,11 @@ svga_context_flush(struct svga_context *svga,
svga->rebind.flags.fs = TRUE;
svga->rebind.flags.gs = TRUE;
+ if (svga_have_sm5(svga)) {
+ svga->rebind.flags.tcs = TRUE;
+ svga->rebind.flags.tes = TRUE;
+ }
+
if (svga_need_to_rebind_resources(svga)) {
svga->rebind.flags.query = TRUE;
}
@@ -447,12 +468,7 @@ svga_hwtnl_flush_retry(struct svga_context *svga)
{
enum pipe_error ret = PIPE_OK;
- ret = svga_hwtnl_flush(svga->hwtnl);
- if (ret == PIPE_ERROR_OUT_OF_MEMORY) {
- svga_context_flush(svga, NULL);
- ret = svga_hwtnl_flush(svga->hwtnl);
- }
-
+ SVGA_RETRY_OOM(svga, ret, svga_hwtnl_flush(svga->hwtnl));
assert(ret == PIPE_OK);
}
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index eef8b88f594..c0c315119f6 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -43,7 +43,7 @@
#include "svga_winsys.h"
#include "svga_hw_reg.h"
#include "svga3d_shaderdefs.h"
-
+#include "svga_debug.h"
/** Non-GPU queries for gallium HUD */
enum svga_hud {
@@ -56,6 +56,7 @@ enum svga_hud {
SVGA_QUERY_NUM_BUFFERS_MAPPED,
SVGA_QUERY_NUM_TEXTURES_MAPPED,
SVGA_QUERY_NUM_BYTES_UPLOADED,
+ SVGA_QUERY_NUM_COMMAND_BUFFERS,
SVGA_QUERY_COMMAND_BUFFER_SIZE,
SVGA_QUERY_FLUSH_TIME,
SVGA_QUERY_SURFACE_WRITE_FLUSHES,
@@ -64,6 +65,8 @@ enum svga_hud {
SVGA_QUERY_NUM_BUFFER_UPLOADS,
SVGA_QUERY_NUM_CONST_BUF_UPDATES,
SVGA_QUERY_NUM_CONST_UPDATES,
+ SVGA_QUERY_NUM_SHADER_RELOCATIONS,
+ SVGA_QUERY_NUM_SURFACE_RELOCATIONS,
/* running total counters */
SVGA_QUERY_MEMORY_USED,
@@ -74,6 +77,7 @@ enum svga_hud {
SVGA_QUERY_NUM_GENERATE_MIPMAP,
SVGA_QUERY_NUM_FAILED_ALLOCATIONS,
SVGA_QUERY_NUM_COMMANDS_PER_DRAW,
+ SVGA_QUERY_SHADER_MEM_USED,
/*SVGA_QUERY_MAX has to be last because it is size of an array*/
SVGA_QUERY_MAX
@@ -109,6 +113,8 @@ struct svga_blend_state {
unsigned alpha_to_coverage:1;
unsigned alpha_to_one:1;
unsigned blend_color_alpha:1; /**< set blend color to alpha value */
+ unsigned logicop_enabled:1;
+ unsigned logicop_mode:5;
/** Per-render target state */
struct {
@@ -269,6 +275,11 @@ struct svga_state
struct svga_vertex_shader *vs;
struct svga_geometry_shader *user_gs; /* user-specified GS */
struct svga_geometry_shader *gs; /* derived GS */
+ /* derived tessellation control shader */
+ struct svga_tcs_shader *tcs;
+ /* derived tessellation evaluation shader */
+ struct svga_tes_shader *tes;
+ struct svga_compute_shader *cs;
struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
/** Constant buffers for each shader.
@@ -286,11 +297,11 @@ struct svga_state
int nr_fbs;
struct pipe_poly_stipple poly_stipple;
- struct pipe_scissor_state scissor;
+ struct pipe_scissor_state scissor[SVGA3D_DX_MAX_VIEWPORTS];
struct pipe_blend_color blend_color;
struct pipe_stencil_ref stencil_ref;
struct pipe_clip_state clip;
- struct pipe_viewport_state viewport;
+ struct pipe_viewport_state viewport[SVGA3D_DX_MAX_VIEWPORTS];
unsigned num_samplers[PIPE_SHADER_TYPES];
unsigned num_sampler_views[PIPE_SHADER_TYPES];
@@ -303,6 +314,14 @@ struct svga_state
} tex_flags;
unsigned sample_mask;
+ unsigned vertices_per_patch;
+ float default_tesslevels[6]; /* tessellation (outer[4] + inner[2]) levels */
+ struct {
+ /* Determine the layout of the grid (in block units) to be used. */
+ unsigned size[3];
+ /* If DispatchIndirect is used, this will has grid size info*/
+ struct pipe_resource *indirect;
+ } grid_info;
};
struct svga_prescale {
@@ -311,21 +330,27 @@ struct svga_prescale {
boolean enabled;
};
+struct svga_depthrange {
+ float zmin;
+ float zmax;
+};
/* Updated by calling svga_update_state( SVGA_STATE_HW_CLEAR )
*/
struct svga_hw_clear_state
{
- SVGA3dRect viewport;
-
- struct {
- float zmin, zmax;
- } depthrange;
-
struct pipe_framebuffer_state framebuffer;
- struct svga_prescale prescale;
+
+ /* VGPU9 only */
+ SVGA3dRect viewport;
+ struct svga_depthrange depthrange;
/* VGPU10 state */
+ SVGA3dViewport viewports[SVGA3D_DX_MAX_VIEWPORTS];
+ struct svga_prescale prescale[SVGA3D_DX_MAX_VIEWPORTS];
+ struct pipe_scissor_state scissors[SVGA3D_DX_MAX_VIEWPORTS];
+ unsigned num_prescale;
+
unsigned num_rendertargets;
struct pipe_surface *rtv[SVGA3D_MAX_RENDER_TARGETS];
struct pipe_surface *dsv;
@@ -361,6 +386,9 @@ struct svga_hw_draw_state
struct svga_shader_variant *fs;
struct svga_shader_variant *vs;
struct svga_shader_variant *gs;
+ struct svga_shader_variant *tcs;
+ struct svga_shader_variant *tes;
+ struct svga_shader_variant *cs;
/** Currently bound constant buffer, per shader stage */
struct pipe_resource *constbuf[PIPE_SHADER_TYPES];
@@ -495,7 +523,7 @@ struct svga_context
struct util_bitmask *query_id_bm;
struct {
- unsigned dirty[SVGA_STATE_MAX];
+ uint64_t dirty[SVGA_STATE_MAX];
/** bitmasks of which const buffers are changed */
unsigned dirty_constbufs[PIPE_SHADER_TYPES];
@@ -508,7 +536,7 @@ struct svga_context
} state;
struct svga_state curr; /* state from the gallium frontend */
- unsigned dirty; /* statechanges since last update_state() */
+ uint64_t dirty; /* statechanges since last update_state() */
union {
struct {
@@ -518,6 +546,9 @@ struct svga_context
unsigned vs:1;
unsigned fs:1;
unsigned gs:1;
+ unsigned tcs:1;
+ unsigned tes:1;
+ unsigned cs:1;
unsigned query:1;
} flags;
unsigned val;
@@ -531,7 +562,10 @@ struct svga_context
struct util_bitmask *gb_query_alloc_mask; /**< gb query object allocation mask */
struct svga_qmem_alloc_entry *gb_query_map[SVGA_QUERY_MAX];
/**< query mem block mapping */
- struct svga_query *sq[SVGA_QUERY_MAX]; /**< queries currently in progress */
+ struct svga_query *sq[SVGA_QUERY_MAX+12]; /**< queries currently in progress */
+ /* The last 12 entries are for streamout
+ * queries for stream 0..3
+ */
/** List of buffers with queued transfers */
struct list_head dirty_buffers;
@@ -545,6 +579,7 @@ struct svga_context
uint64_t map_buffer_time; /**< SVGA_QUERY_MAP_BUFFER_TIME */
uint64_t num_buffers_mapped; /**< SVGA_QUERY_NUM_BUFFERS_MAPPED */
uint64_t num_textures_mapped; /**< SVGA_QUERY_NUM_TEXTURES_MAPPED */
+ uint64_t num_command_buffers; /**< SVGA_QUERY_NUM_COMMAND_BUFFERS */
uint64_t command_buffer_size; /**< SVGA_QUERY_COMMAND_BUFFER_SIZE */
uint64_t flush_time; /**< SVGA_QUERY_FLUSH_TIME */
uint64_t surface_write_flushes; /**< SVGA_QUERY_SURFACE_WRITE_FLUSHES */
@@ -566,16 +601,28 @@ struct svga_context
uint64_t num_surface_views; /**< SVGA_QUERY_NUM_SURFACE_VIEWS */
uint64_t num_bytes_uploaded; /**< SVGA_QUERY_NUM_BYTES_UPLOADED */
uint64_t num_generate_mipmap; /**< SVGA_QUERY_NUM_GENERATE_MIPMAP */
+ uint64_t shader_mem_used; /**< SVGA_QUERY_SHADER_MEM_USED */
boolean uses_time; /**< os_time_get() calls needed? */
} hud;
/** The currently bound stream output targets */
+ boolean in_streamout; /* Set if streamout is active */
unsigned num_so_targets;
struct svga_winsys_surface *so_surfaces[SVGA3D_DX_MAX_SOTARGETS];
struct pipe_stream_output_target *so_targets[SVGA3D_DX_MAX_SOTARGETS];
struct svga_stream_output *current_so;
+ /**
+ * The following states are used in the workaround for auto draw with
+ * stream instancing.
+ */
+
+ /* Last bound SO targets that can be used to get vertex count */
+ struct pipe_stream_output_target *vcount_so_targets[SVGA3D_DX_MAX_SOTARGETS];
+ unsigned vcount_buffer_stream; /* SO buffer to stream index mask */
+ struct pipe_query *so_queries[4]; /* SO stat queries for each stream */
+
/** A blend state with blending disabled, for falling back to when blending
* is illegal (e.g. an integer texture is bound)
*/
@@ -601,41 +648,58 @@ struct svga_context
boolean render_condition;
boolean disable_rasterizer; /* Set if to disable rasterization */
+
+ struct {
+ struct svga_tcs_shader *passthrough_tcs;
+ struct svga_vertex_shader *vs;
+ struct svga_tes_shader *tes;
+ unsigned vertices_per_patch;
+ boolean passthrough;
+ } tcs;
+
};
/* A flag for each frontend state object:
*/
-#define SVGA_NEW_BLEND 0x1
-#define SVGA_NEW_DEPTH_STENCIL_ALPHA 0x2
-#define SVGA_NEW_RAST 0x4
-#define SVGA_NEW_SAMPLER 0x8
-#define SVGA_NEW_TEXTURE 0x10
-#define SVGA_NEW_VBUFFER 0x20
-#define SVGA_NEW_VELEMENT 0x40
-#define SVGA_NEW_FS 0x80
-#define SVGA_NEW_VS 0x100
-#define SVGA_NEW_FS_CONST_BUFFER 0x200
-#define SVGA_NEW_VS_CONST_BUFFER 0x400
-#define SVGA_NEW_FRAME_BUFFER 0x800
-#define SVGA_NEW_STIPPLE 0x1000
-#define SVGA_NEW_SCISSOR 0x2000
-#define SVGA_NEW_BLEND_COLOR 0x4000
-#define SVGA_NEW_CLIP 0x8000
-#define SVGA_NEW_VIEWPORT 0x10000
-#define SVGA_NEW_PRESCALE 0x20000
-#define SVGA_NEW_REDUCED_PRIMITIVE 0x40000
-#define SVGA_NEW_TEXTURE_BINDING 0x80000
-#define SVGA_NEW_NEED_PIPELINE 0x100000
-#define SVGA_NEW_NEED_SWVFETCH 0x200000
-#define SVGA_NEW_NEED_SWTNL 0x400000
-#define SVGA_NEW_FS_VARIANT 0x800000
-#define SVGA_NEW_VS_VARIANT 0x1000000
-#define SVGA_NEW_TEXTURE_FLAGS 0x4000000
-#define SVGA_NEW_STENCIL_REF 0x8000000
-#define SVGA_NEW_GS 0x10000000
-#define SVGA_NEW_GS_CONST_BUFFER 0x20000000
-#define SVGA_NEW_GS_VARIANT 0x40000000
-#define SVGA_NEW_TEXTURE_CONSTS 0x80000000
+#define SVGA_NEW_BLEND ((uint64_t) 0x1)
+#define SVGA_NEW_DEPTH_STENCIL_ALPHA ((uint64_t) 0x2)
+#define SVGA_NEW_RAST ((uint64_t) 0x4)
+#define SVGA_NEW_SAMPLER ((uint64_t) 0x8)
+#define SVGA_NEW_TEXTURE ((uint64_t) 0x10)
+#define SVGA_NEW_VBUFFER ((uint64_t) 0x20)
+#define SVGA_NEW_VELEMENT ((uint64_t) 0x40)
+#define SVGA_NEW_FS ((uint64_t) 0x80)
+#define SVGA_NEW_VS ((uint64_t) 0x100)
+#define SVGA_NEW_FS_CONST_BUFFER ((uint64_t) 0x200)
+#define SVGA_NEW_VS_CONST_BUFFER ((uint64_t) 0x400)
+#define SVGA_NEW_FRAME_BUFFER ((uint64_t) 0x800)
+#define SVGA_NEW_STIPPLE ((uint64_t) 0x1000)
+#define SVGA_NEW_SCISSOR ((uint64_t) 0x2000)
+#define SVGA_NEW_BLEND_COLOR ((uint64_t) 0x4000)
+#define SVGA_NEW_CLIP ((uint64_t) 0x8000)
+#define SVGA_NEW_VIEWPORT ((uint64_t) 0x10000)
+#define SVGA_NEW_PRESCALE ((uint64_t) 0x20000)
+#define SVGA_NEW_REDUCED_PRIMITIVE ((uint64_t) 0x40000)
+#define SVGA_NEW_TEXTURE_BINDING ((uint64_t) 0x80000)
+#define SVGA_NEW_NEED_PIPELINE ((uint64_t) 0x100000)
+#define SVGA_NEW_NEED_SWVFETCH ((uint64_t) 0x200000)
+#define SVGA_NEW_NEED_SWTNL ((uint64_t) 0x400000)
+#define SVGA_NEW_FS_VARIANT ((uint64_t) 0x800000)
+#define SVGA_NEW_VS_VARIANT ((uint64_t) 0x1000000)
+#define SVGA_NEW_TEXTURE_FLAGS ((uint64_t) 0x4000000)
+#define SVGA_NEW_STENCIL_REF ((uint64_t) 0x8000000)
+#define SVGA_NEW_GS ((uint64_t) 0x10000000)
+#define SVGA_NEW_GS_CONST_BUFFER ((uint64_t) 0x20000000)
+#define SVGA_NEW_GS_VARIANT ((uint64_t) 0x40000000)
+#define SVGA_NEW_TEXTURE_CONSTS ((uint64_t) 0x80000000)
+#define SVGA_NEW_TCS ((uint64_t) 0x100000000)
+#define SVGA_NEW_TES ((uint64_t) 0x200000000)
+#define SVGA_NEW_TCS_VARIANT ((uint64_t) 0x400000000)
+#define SVGA_NEW_TES_VARIANT ((uint64_t) 0x800000000)
+#define SVGA_NEW_TCS_CONST_BUFFER ((uint64_t) 0x1000000000)
+#define SVGA_NEW_TES_CONST_BUFFER ((uint64_t) 0x2000000000)
+#define SVGA_NEW_TCS_PARAM ((uint64_t) 0x4000000000)
+#define SVGA_NEW_ALL ((uint64_t) 0xFFFFFFFFFFFFFFFF)
void svga_init_state_functions( struct svga_context *svga );
@@ -648,9 +712,11 @@ void svga_init_depth_stencil_functions( struct svga_context *svga );
void svga_init_misc_functions( struct svga_context *svga );
void svga_init_rasterizer_functions( struct svga_context *svga );
void svga_init_sampler_functions( struct svga_context *svga );
+void svga_init_cs_functions( struct svga_context *svga );
void svga_init_fs_functions( struct svga_context *svga );
void svga_init_vs_functions( struct svga_context *svga );
void svga_init_gs_functions( struct svga_context *svga );
+void svga_init_ts_functions( struct svga_context *svga );
void svga_init_vertex_functions( struct svga_context *svga );
void svga_init_constbuffer_functions( struct svga_context *svga );
void svga_init_draw_functions( struct svga_context *svga );
@@ -663,6 +729,7 @@ void svga_cleanup_vertex_state( struct svga_context *svga );
void svga_cleanup_sampler_state( struct svga_context *svga );
void svga_cleanup_tss_binding( struct svga_context *svga );
void svga_cleanup_framebuffer( struct svga_context *svga );
+void svga_cleanup_tcs_state( struct svga_context *svga );
void svga_context_flush( struct svga_context *svga,
struct pipe_fence_handle **pfence );
@@ -724,6 +791,12 @@ svga_have_sm4_1(const struct svga_context *svga)
}
static inline boolean
+svga_have_sm5(const struct svga_context *svga)
+{
+ return svga_screen(svga->pipe.screen)->sws->have_sm5;
+}
+
+static inline boolean
svga_need_to_rebind_resources(const struct svga_context *svga)
{
return svga_screen(svga->pipe.screen)->sws->need_to_rebind_resources;
@@ -745,5 +818,107 @@ svga_get_time(struct svga_context *svga)
return svga->hud.uses_time ? os_time_get() : 0;
}
+/*
+ * The SVGA_TRY_XX family of macros can be used to optionally replace a
+ * function call with an error value, the purpose is to trigger and test
+ * retry path handling.
+ */
+#ifdef DEBUG
+
+/*
+ * Optionally replace a function call with a PIPE_ERROR_OUT_OF_MEMORY
+ * return value
+ */
+#define SVGA_TRY(_func) \
+ ((SVGA_DEBUG & DEBUG_RETRY) ? PIPE_ERROR_OUT_OF_MEMORY : (_func))
+
+/* Optionally replace a function call with a NULL return value */
+#define SVGA_TRY_PTR(_func) \
+ ((SVGA_DEBUG & DEBUG_RETRY) ? NULL : (_func))
+
+/*
+ * Optionally replace a function call with a NULL return value, and set
+ * the _retry parameter to TRUE.
+ */
+#define SVGA_TRY_MAP(_func, _retry) \
+ ((SVGA_DEBUG & DEBUG_RETRY) ? (_retry) = TRUE, NULL : (_func))
+#else
+
+#define SVGA_TRY(_func) (_func)
+
+#define SVGA_TRY_PTR(_func) (_func)
+
+#define SVGA_TRY_MAP(_func, _retry) (_func)
+#endif
+
+/**
+ * Enter retry processing after hitting out-of-command space
+ */
+static inline void
+svga_retry_enter(struct svga_context *svga)
+{
+ /* We shouldn't nest retries, but currently we do. */
+ if ((SVGA_DEBUG & DEBUG_RETRY) && svga->swc->in_retry) {
+ debug_printf("WARNING: Recursive retry. Level: %u.\n",
+ svga->swc->in_retry);
+ }
+ svga->swc->in_retry++;
+}
+
+/**
+ * Exit retry processing after hitting out-of-command space
+ */
+static inline void
+svga_retry_exit(struct svga_context *svga)
+{
+ assert(svga->swc->in_retry > 0);
+ svga->swc->in_retry--;
+}
+
+/**
+ * Perform a function call, and on failure flush the context and retry,
+ * asserting that the retry succeeded. On return, the boolean argument
+ * _retried indicates whether the function call was retried or not.
+ */
+#define SVGA_RETRY_CHECK(_svga, _func, _retried) \
+ do { \
+ enum pipe_error ret; \
+ \
+ ret = SVGA_TRY(_func); \
+ (_retried) = (ret != PIPE_OK); \
+ if (_retried) { \
+ svga_retry_enter(_svga); \
+ svga_context_flush(_svga, NULL); \
+ ret = (_func); \
+ assert(ret == PIPE_OK); \
+ svga_retry_exit(_svga); \
+ } \
+ } while(0)
+
+/**
+ * Perform a function call, and on failure flush the context and retry,
+ * asserting that the retry succeeded.
+ */
+#define SVGA_RETRY(_svga, _func) \
+ do { \
+ UNUSED boolean retried; \
+ \
+ SVGA_RETRY_CHECK(_svga, _func, retried); \
+ } while(0)
+
+/**
+ * Perform a function call, and on out-of-memory, flush the context and
+ * retry. The retry return value is stored in _ret for reuse.
+ */
+#define SVGA_RETRY_OOM(_svga, _ret, _func) \
+ do { \
+ (_ret) = SVGA_TRY(_func); \
+ if ((_ret) == PIPE_ERROR_OUT_OF_MEMORY) { \
+ svga_retry_enter(_svga); \
+ svga_context_flush(_svga, NULL); \
+ (_ret) = (_func); \
+ svga_retry_exit(_svga); \
+ } \
+ } while (0);
#endif
diff --git a/src/gallium/drivers/svga/svga_debug.h b/src/gallium/drivers/svga/svga_debug.h
index 3686cc6d9cc..cdad858b045 100644
--- a/src/gallium/drivers/svga/svga_debug.h
+++ b/src/gallium/drivers/svga/svga_debug.h
@@ -46,6 +46,7 @@
#define DEBUG_CACHE 0x8000
#define DEBUG_STREAMOUT 0x10000
#define DEBUG_SAMPLERS 0x20000
+#define DEBUG_RETRY 0x100000
#ifdef DEBUG
extern int SVGA_DEBUG;
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
index e0e55f129b8..f8db818b3d0 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -349,7 +349,7 @@ validate_sampler_resources(struct svga_context *svga)
assert(svga_have_vgpu10(svga));
- for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+ for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_COMPUTE; shader++) {
unsigned count = svga->curr.num_sampler_views[shader];
unsigned i;
struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS];
@@ -379,7 +379,8 @@ validate_sampler_resources(struct svga_context *svga)
if (shader == PIPE_SHADER_FRAGMENT &&
svga->curr.rast->templ.poly_stipple_enable) {
- const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+ const unsigned unit =
+ svga_fs_variant(svga->state.hw_draw.fs)->pstipple_sampler_unit;
struct svga_pipe_sampler_view *sv =
svga->polygon_stipple.sampler_view;
@@ -415,7 +416,7 @@ validate_constant_buffers(struct svga_context *svga)
assert(svga_have_vgpu10(svga));
- for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+ for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_COMPUTE; shader++) {
enum pipe_error ret;
struct svga_buffer *buffer;
struct svga_winsys_surface *handle;
@@ -482,6 +483,8 @@ last_command_was_draw(const struct svga_context *svga)
case SVGA_3D_CMD_DX_DRAW_INSTANCED:
case SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED:
case SVGA_3D_CMD_DX_DRAW_AUTO:
+ case SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED_INDIRECT:
+ case SVGA_3D_CMD_DX_DRAW_INSTANCED_INDIRECT:
return true;
default:
return false;
@@ -511,17 +514,51 @@ vertex_buffers_equal(unsigned count,
* Prepare the vertex buffers for a drawing command.
*/
static enum pipe_error
-validate_vertex_buffers(struct svga_hwtnl *hwtnl)
+validate_vertex_buffers(struct svga_hwtnl *hwtnl,
+ const struct pipe_stream_output_target *so_vertex_count)
{
struct svga_context *svga = hwtnl->svga;
struct pipe_resource *vbuffers[SVGA3D_INPUTREG_MAX];
struct svga_winsys_surface *vbuffer_handles[SVGA3D_INPUTREG_MAX];
- const unsigned vbuf_count = hwtnl->cmd.vbuf_count;
+ struct svga_winsys_surface *so_vertex_count_handle;
+ const unsigned vbuf_count = so_vertex_count ? 1 : hwtnl->cmd.vbuf_count;
int last_vbuf = -1;
unsigned i;
assert(svga_have_vgpu10(svga));
+ /* Get handle for each referenced vertex buffer, unless we're using a
+ * stream-out buffer to specify the drawing information (DrawAuto).
+ */
+ if (so_vertex_count) {
+ i = 0;
+ }
+ else {
+ for (i = 0; i < vbuf_count; i++) {
+ struct svga_buffer *sbuf =
+ svga_buffer(hwtnl->cmd.vbufs[i].buffer.resource);
+
+ if (sbuf) {
+ vbuffer_handles[i] = svga_buffer_handle(svga, &sbuf->b.b,
+ PIPE_BIND_VERTEX_BUFFER);
+ assert(sbuf->key.flags & SVGA3D_SURFACE_BIND_VERTEX_BUFFER);
+ if (vbuffer_handles[i] == NULL)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ vbuffers[i] = &sbuf->b.b;
+ last_vbuf = i;
+ }
+ else {
+ vbuffers[i] = NULL;
+ vbuffer_handles[i] = NULL;
+ }
+ }
+ }
+
+ for (; i < svga->state.hw_draw.num_vbuffers; i++) {
+ vbuffers[i] = NULL;
+ vbuffer_handles[i] = NULL;
+ }
+
/* Get handle for each referenced vertex buffer */
for (i = 0; i < vbuf_count; i++) {
struct svga_buffer *sbuf =
@@ -558,14 +595,38 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl)
svga->state.hw_draw.layout_id = hwtnl->cmd.vdecl_layout_id;
}
+ /* Get handle for the stream out buffer */
+ if (so_vertex_count) {
+ so_vertex_count_handle = svga_buffer_handle(svga,
+ so_vertex_count->buffer,
+ (PIPE_BIND_VERTEX_BUFFER |
+ PIPE_BIND_STREAM_OUTPUT));
+ if (!so_vertex_count_handle)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+ else {
+ so_vertex_count_handle = NULL;
+ }
+
/* setup vertex buffers */
{
SVGA3dVertexBuffer vbuffer_attrs[PIPE_MAX_ATTRIBS];
- for (i = 0; i < vbuf_count; i++) {
- vbuffer_attrs[i].stride = hwtnl->cmd.vbufs[i].stride;
- vbuffer_attrs[i].offset = hwtnl->cmd.vbufs[i].buffer_offset;
- vbuffer_attrs[i].sid = 0;
+ if (so_vertex_count) {
+ /* Set IA slot0 input buffer to the SO buffer */
+ assert(vbuf_count == 1);
+ vbuffer_attrs[0].stride = hwtnl->cmd.vbufs[0].stride;
+ vbuffer_attrs[0].offset = hwtnl->cmd.vbufs[0].buffer_offset;
+ vbuffer_attrs[0].sid = 0;
+ vbuffers[0] = so_vertex_count->buffer;
+ vbuffer_handles[0] = so_vertex_count_handle;
+ }
+ else {
+ for (i = 0; i < vbuf_count; i++) {
+ vbuffer_attrs[i].stride = hwtnl->cmd.vbufs[i].stride;
+ vbuffer_attrs[i].offset = hwtnl->cmd.vbufs[i].buffer_offset;
+ vbuffer_attrs[i].sid = 0;
+ }
}
/* If any of the vertex buffer state has changed, issue
@@ -736,10 +797,14 @@ static enum pipe_error
draw_vgpu10(struct svga_hwtnl *hwtnl,
const SVGA3dPrimitiveRange *range,
unsigned vcount,
+ unsigned min_index, unsigned max_index,
struct pipe_resource *ib,
- unsigned start_instance, unsigned instance_count)
+ unsigned start_instance, unsigned instance_count,
+ const struct pipe_draw_indirect_info *indirect,
+ const struct pipe_stream_output_target *so_vertex_count)
{
struct svga_context *svga = hwtnl->svga;
+ struct svga_winsys_surface *indirect_handle;
enum pipe_error ret;
assert(svga_have_vgpu10(svga));
@@ -779,7 +844,7 @@ draw_vgpu10(struct svga_hwtnl *hwtnl,
if (ret != PIPE_OK)
return ret;
- ret = validate_vertex_buffers(hwtnl);
+ ret = validate_vertex_buffers(hwtnl, so_vertex_count);
if (ret != PIPE_OK)
return ret;
@@ -789,6 +854,16 @@ draw_vgpu10(struct svga_hwtnl *hwtnl,
return ret;
}
+ if (indirect) {
+ indirect_handle = svga_buffer_handle(svga, indirect->buffer,
+ PIPE_BIND_COMMAND_ARGS_BUFFER);
+ if (!indirect_handle)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+ else {
+ indirect_handle = NULL;
+ }
+
/* Set primitive type (line, tri, etc) */
if (svga->state.hw_draw.topology != range->primType) {
ret = SVGA3D_vgpu10_SetTopology(svga->swc, range->primType);
@@ -800,15 +875,18 @@ draw_vgpu10(struct svga_hwtnl *hwtnl,
if (ib) {
/* indexed drawing */
- if (instance_count > 1) {
+ if (indirect) {
+ ret = SVGA3D_sm5_DrawIndexedInstancedIndirect(svga->swc,
+ indirect_handle,
+ indirect->offset);
+ }
+ else if (instance_count > 1) {
ret = SVGA3D_vgpu10_DrawIndexedInstanced(svga->swc,
vcount,
instance_count,
0, /* startIndexLocation */
range->indexBias,
start_instance);
- if (ret != PIPE_OK)
- return ret;
}
else {
/* non-instanced drawing */
@@ -816,8 +894,9 @@ draw_vgpu10(struct svga_hwtnl *hwtnl,
vcount,
0, /* startIndexLocation */
range->indexBias);
- if (ret != PIPE_OK)
- return ret;
+ }
+ if (ret != PIPE_OK) {
+ return ret;
}
}
else {
@@ -835,22 +914,30 @@ draw_vgpu10(struct svga_hwtnl *hwtnl,
assert(svga->state.hw_draw.ib == NULL);
- if (instance_count > 1) {
+ if (so_vertex_count) {
+ /* Stream-output drawing */
+ ret = SVGA3D_vgpu10_DrawAuto(svga->swc);
+ }
+ else if (indirect) {
+ ret = SVGA3D_sm5_DrawInstancedIndirect(svga->swc,
+ indirect_handle,
+ indirect->offset);
+ }
+ else if (instance_count > 1) {
ret = SVGA3D_vgpu10_DrawInstanced(svga->swc,
vcount,
instance_count,
range->indexBias,
start_instance);
- if (ret != PIPE_OK)
- return ret;
}
else {
/* non-instanced */
ret = SVGA3D_vgpu10_Draw(svga->swc,
vcount,
range->indexBias);
- if (ret != PIPE_OK)
- return ret;
+ }
+ if (ret != PIPE_OK) {
+ return ret;
}
}
@@ -1044,14 +1131,20 @@ check_draw_params(struct svga_hwtnl *hwtnl,
/**
* All drawing filters down into this function, either directly
* on the hardware path or after doing software vertex processing.
+ * \param indirect if non-null, get the vertex count, first vertex, etc.
+ * from a buffer.
+ * \param so_vertex_count if non-null, get the vertex count from a
+ * stream-output target.
*/
enum pipe_error
svga_hwtnl_prim(struct svga_hwtnl *hwtnl,
- const SVGA3dPrimitiveRange * range,
+ const SVGA3dPrimitiveRange *range,
unsigned vcount,
- unsigned min_index,
- unsigned max_index, struct pipe_resource *ib,
- unsigned start_instance, unsigned instance_count)
+ unsigned min_index, unsigned max_index,
+ struct pipe_resource *ib,
+ unsigned start_instance, unsigned instance_count,
+ const struct pipe_draw_indirect_info *indirect,
+ const struct pipe_stream_output_target *so_vertex_count)
{
enum pipe_error ret = PIPE_OK;
@@ -1059,17 +1152,14 @@ svga_hwtnl_prim(struct svga_hwtnl *hwtnl,
if (svga_have_vgpu10(hwtnl->svga)) {
/* draw immediately */
- ret = draw_vgpu10(hwtnl, range, vcount, ib,
- start_instance, instance_count);
- if (ret != PIPE_OK) {
- svga_context_flush(hwtnl->svga, NULL);
- ret = draw_vgpu10(hwtnl, range, vcount, ib,
- start_instance, instance_count);
- assert(ret == PIPE_OK);
- }
+ SVGA_RETRY(hwtnl->svga, draw_vgpu10(hwtnl, range, vcount, min_index,
+ max_index, ib, start_instance,
+ instance_count, indirect,
+ so_vertex_count));
}
else {
/* batch up drawing commands */
+ assert(indirect == NULL);
#ifdef DEBUG
check_draw_params(hwtnl, range, min_index, max_index, ib);
assert(start_instance == 0);
diff --git a/src/gallium/drivers/svga/svga_draw.h b/src/gallium/drivers/svga/svga_draw.h
index 9d79676d3f9..56d5127051d 100644
--- a/src/gallium/drivers/svga/svga_draw.h
+++ b/src/gallium/drivers/svga/svga_draw.h
@@ -60,7 +60,8 @@ svga_hwtnl_vertex_buffers(struct svga_hwtnl *hwtnl,
enum pipe_error
svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
enum pipe_prim_type prim, unsigned start, unsigned count,
- unsigned start_instance, unsigned instance_count);
+ unsigned start_instance, unsigned instance_count,
+ ubyte vertices_per_patch);
enum pipe_error
svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c
index 19d5e503137..af27e038bc8 100644
--- a/src/gallium/drivers/svga/svga_draw_arrays.c
+++ b/src/gallium/drivers/svga/svga_draw_arrays.c
@@ -175,13 +175,14 @@ done:
static enum pipe_error
simple_draw_arrays(struct svga_hwtnl *hwtnl,
enum pipe_prim_type prim, unsigned start, unsigned count,
- unsigned start_instance, unsigned instance_count)
+ unsigned start_instance, unsigned instance_count,
+ ubyte vertices_per_patch)
{
SVGA3dPrimitiveRange range;
unsigned hw_prim;
unsigned hw_count;
- hw_prim = svga_translate_prim(prim, count, &hw_count);
+ hw_prim = svga_translate_prim(prim, count, &hw_count, vertices_per_patch);
if (hw_count == 0)
return PIPE_ERROR_BAD_INPUT;
@@ -200,14 +201,16 @@ simple_draw_arrays(struct svga_hwtnl *hwtnl,
*/
return svga_hwtnl_prim(hwtnl, &range, count,
0, count - 1, NULL,
- start_instance, instance_count);
+ start_instance, instance_count,
+ NULL, NULL);
}
enum pipe_error
svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
enum pipe_prim_type prim, unsigned start, unsigned count,
- unsigned start_instance, unsigned instance_count)
+ unsigned start_instance, unsigned instance_count,
+ ubyte vertices_per_patch)
{
enum pipe_prim_type gen_prim;
unsigned gen_size, gen_nr;
@@ -225,7 +228,7 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
}
if (svga->curr.rast->templ.flatshade &&
- svga->state.hw_draw.fs->constant_color_output) {
+ svga_fs_variant(svga->state.hw_draw.fs)->constant_color_output) {
/* The fragment color is a constant, not per-vertex so the whole
* primitive will be the same color (except for possible blending).
* We can ignore the current provoking vertex state and use whatever
@@ -273,7 +276,8 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
if (gen_type == U_GENERATE_LINEAR) {
ret = simple_draw_arrays(hwtnl, gen_prim, start, count,
- start_instance, instance_count);
+ start_instance, instance_count,
+ vertices_per_patch);
}
else {
struct pipe_resource *gen_buf = NULL;
@@ -299,7 +303,8 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
count - 1,
gen_prim, 0, gen_nr,
start_instance,
- instance_count);
+ instance_count,
+ vertices_per_patch);
}
if (gen_buf) {
diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c
index 41cd4d18993..b17fe44f747 100644
--- a/src/gallium/drivers/svga/svga_draw_elements.c
+++ b/src/gallium/drivers/svga/svga_draw_elements.c
@@ -186,14 +186,15 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl,
enum pipe_prim_type prim, unsigned start,
unsigned count,
unsigned start_instance,
- unsigned instance_count)
+ unsigned instance_count,
+ ubyte vertices_per_patch)
{
SVGA3dPrimitiveRange range;
unsigned hw_prim;
unsigned hw_count;
unsigned index_offset = start * index_size;
- hw_prim = svga_translate_prim(prim, count, &hw_count);
+ hw_prim = svga_translate_prim(prim, count, &hw_count, vertices_per_patch);
if (hw_count == 0)
return PIPE_OK; /* nothing to draw */
@@ -206,7 +207,8 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl,
return svga_hwtnl_prim(hwtnl, &range, count,
min_index, max_index, index_buffer,
- start_instance, instance_count);
+ start_instance, instance_count,
+ NULL, NULL);
}
@@ -234,12 +236,20 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
&gen_size, &gen_nr, &gen_func);
}
else {
+ unsigned hw_pv;
+
+ /* There is no geometry ordering with PATCH, so no need to
+ * consider provoking vertex mode for the translation.
+ * So use the same api_pv as the hw_pv.
+ */
+ hw_pv = info->mode == PIPE_PRIM_PATCHES ? hwtnl->api_pv :
+ hwtnl->hw_pv;
gen_type = u_index_translator(svga_hw_prims,
info->mode,
info->index_size,
count,
hwtnl->api_pv,
- hwtnl->hw_pv,
+ hw_pv,
PR_DISABLE,
&gen_prim, &gen_size, &gen_nr, &gen_func);
}
@@ -271,7 +281,8 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
info->max_index,
gen_prim, index_offset, count,
info->start_instance,
- info->instance_count);
+ info->instance_count,
+ info->vertices_per_patch);
pipe_resource_reference(&index_buffer, NULL);
}
else {
@@ -299,7 +310,8 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
gen_prim, gen_offset,
gen_nr,
info->start_instance,
- info->instance_count);
+ info->instance_count,
+ info->vertices_per_patch);
}
if (gen_buf) {
diff --git a/src/gallium/drivers/svga/svga_draw_private.h b/src/gallium/drivers/svga/svga_draw_private.h
index 52a2c0f18b3..475ccc5aae0 100644
--- a/src/gallium/drivers/svga/svga_draw_private.h
+++ b/src/gallium/drivers/svga/svga_draw_private.h
@@ -52,7 +52,8 @@ static const unsigned svga_hw_prims =
(1 << PIPE_PRIM_LINES_ADJACENCY) |
(1 << PIPE_PRIM_LINE_STRIP_ADJACENCY) |
(1 << PIPE_PRIM_TRIANGLES_ADJACENCY) |
- (1 << PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY));
+ (1 << PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY) |
+ (1 << PIPE_PRIM_PATCHES));
/**
@@ -64,7 +65,8 @@ static const unsigned svga_hw_prims =
* those to other types of primitives with index/translation code.
*/
static inline SVGA3dPrimitiveType
-svga_translate_prim(unsigned mode, unsigned vcount, unsigned *prim_count)
+svga_translate_prim(unsigned mode, unsigned vcount, unsigned *prim_count,
+ ubyte vertices_per_patch)
{
switch (mode) {
case PIPE_PRIM_POINTS:
@@ -107,6 +109,13 @@ svga_translate_prim(unsigned mode, unsigned vcount, unsigned *prim_count)
*prim_count = vcount / 2 - 2 ;
return SVGA3D_PRIMITIVE_TRIANGLESTRIP_ADJ;
+ case PIPE_PRIM_PATCHES:
+ *prim_count = vcount / vertices_per_patch ;
+ assert(vertices_per_patch >= 1);
+ assert(vertices_per_patch <= 32);
+ return (SVGA3D_PRIMITIVE_1_CONTROL_POINT_PATCH - 1)
+ + vertices_per_patch;
+
default:
assert(0);
*prim_count = 0;
@@ -218,7 +227,9 @@ svga_hwtnl_prim(struct svga_hwtnl *hwtnl,
unsigned min_index,
unsigned max_index,
struct pipe_resource *ib,
- unsigned start_instance, unsigned instance_count);
+ unsigned start_instance, unsigned instance_count,
+ const struct pipe_draw_indirect_info *indirect,
+ const struct pipe_stream_output_target *so_vertex_count);
enum pipe_error
svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl,
@@ -231,6 +242,7 @@ svga_hwtnl_simple_draw_range_elements(struct svga_hwtnl *hwtnl,
unsigned start,
unsigned count,
unsigned start_instance,
- unsigned instance_count);
+ unsigned instance_count,
+ ubyte vertices_per_patch);
#endif
diff --git a/src/gallium/drivers/svga/svga_format.c b/src/gallium/drivers/svga/svga_format.c
index 3f68f0cd67e..bb2f546d67d 100644
--- a/src/gallium/drivers/svga/svga_format.c
+++ b/src/gallium/drivers/svga/svga_format.c
@@ -71,10 +71,10 @@ static const struct vgpu10_format_entry format_conversion_table[] =
[ PIPE_FORMAT_Z32_FLOAT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_D32_FLOAT, SVGA3D_D32_FLOAT, 0 },
[ PIPE_FORMAT_Z24_UNORM_S8_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_D24_UNORM_S8_UINT, SVGA3D_D24_UNORM_S8_UINT, 0 },
[ PIPE_FORMAT_Z24X8_UNORM ] = { SVGA3D_FORMAT_INVALID, SVGA3D_D24_UNORM_S8_UINT, SVGA3D_D24_UNORM_S8_UINT, 0 },
- [ PIPE_FORMAT_R32_FLOAT ] = { SVGA3D_R32_FLOAT, SVGA3D_R32_FLOAT, SVGA3D_R32_FLOAT, TF_GEN_MIPS },
- [ PIPE_FORMAT_R32G32_FLOAT ] = { SVGA3D_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, TF_GEN_MIPS },
+ [ PIPE_FORMAT_R32_FLOAT ] = { SVGA3D_R32_FLOAT, SVGA3D_R32_FLOAT, SVGA3D_R32_FLOAT, TF_GEN_MIPS },
+ [ PIPE_FORMAT_R32G32_FLOAT ] = { SVGA3D_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, SVGA3D_R32G32_FLOAT, TF_GEN_MIPS },
[ PIPE_FORMAT_R32G32B32_FLOAT ] = { SVGA3D_R32G32B32_FLOAT, SVGA3D_R32G32B32_FLOAT, SVGA3D_R32G32B32_FLOAT, TF_GEN_MIPS },
- [ PIPE_FORMAT_R32G32B32A32_FLOAT ] = { SVGA3D_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, TF_GEN_MIPS },
+ [ PIPE_FORMAT_R32G32B32A32_FLOAT ] = { SVGA3D_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, SVGA3D_R32G32B32A32_FLOAT, TF_GEN_MIPS },
[ PIPE_FORMAT_R32_USCALED ] = { SVGA3D_R32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
[ PIPE_FORMAT_R32G32_USCALED ] = { SVGA3D_R32G32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
[ PIPE_FORMAT_R32G32B32_USCALED ] = { SVGA3D_R32G32B32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, VF_U_TO_F_CAST },
@@ -176,11 +176,11 @@ static const struct vgpu10_format_entry format_conversion_table[] =
[ PIPE_FORMAT_R16G16B16A16_SINT ] = { SVGA3D_R16G16B16A16_SINT, SVGA3D_R16G16B16A16_SINT, SVGA3D_R16G16B16A16_SINT, 0 },
[ PIPE_FORMAT_R32_UINT ] = { SVGA3D_R32_UINT, SVGA3D_R32_UINT, SVGA3D_R32_UINT, 0 },
[ PIPE_FORMAT_R32G32_UINT ] = { SVGA3D_R32G32_UINT, SVGA3D_R32G32_UINT, SVGA3D_R32G32_UINT, 0 },
- [ PIPE_FORMAT_R32G32B32_UINT ] = { SVGA3D_R32G32B32_UINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ [ PIPE_FORMAT_R32G32B32_UINT ] = { SVGA3D_R32G32B32_UINT, SVGA3D_R32G32B32_UINT, SVGA3D_R32G32B32_UINT, 0 },
[ PIPE_FORMAT_R32G32B32A32_UINT ] = { SVGA3D_R32G32B32A32_UINT, SVGA3D_R32G32B32A32_UINT, SVGA3D_R32G32B32A32_UINT, 0 },
[ PIPE_FORMAT_R32_SINT ] = { SVGA3D_R32_SINT, SVGA3D_R32_SINT, SVGA3D_R32_SINT, 0 },
[ PIPE_FORMAT_R32G32_SINT ] = { SVGA3D_R32G32_SINT, SVGA3D_R32G32_SINT, SVGA3D_R32G32_SINT, 0 },
- [ PIPE_FORMAT_R32G32B32_SINT ] = { SVGA3D_R32G32B32_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
+ [ PIPE_FORMAT_R32G32B32_SINT ] = { SVGA3D_R32G32B32_SINT, SVGA3D_R32G32B32_SINT, SVGA3D_R32G32B32_SINT, 0 },
[ PIPE_FORMAT_R32G32B32A32_SINT ] = { SVGA3D_R32G32B32A32_SINT, SVGA3D_R32G32B32A32_SINT, SVGA3D_R32G32B32A32_SINT, 0 },
[ PIPE_FORMAT_A8_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R8_UINT, TF_000X },
[ PIPE_FORMAT_I8_UINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R8_UINT, TF_XXXX },
@@ -2137,7 +2137,7 @@ svga_is_format_supported(struct pipe_screen *screen,
}
if (util_format_is_srgb(format) &&
- (bindings & PIPE_BIND_DISPLAY_TARGET)) {
+ (bindings & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_RENDER_TARGET))) {
/* We only support sRGB rendering with vgpu10 */
return false;
}
@@ -2252,6 +2252,12 @@ svga_is_dx_format_supported(struct pipe_screen *screen,
return svga_format != SVGA3D_FORMAT_INVALID;
}
+ if (bindings & PIPE_BIND_SAMPLER_VIEW && target == PIPE_BUFFER) {
+ unsigned flags;
+ svga_translate_texture_buffer_view_format(format, &svga_format, &flags);
+ return svga_format != SVGA3D_FORMAT_INVALID;
+ }
+
svga_format = svga_translate_format(ss, format, bindings);
if (svga_format == SVGA3D_FORMAT_INVALID) {
return false;
diff --git a/src/gallium/drivers/svga/svga_link.c b/src/gallium/drivers/svga/svga_link.c
index 0bf40d153b7..c9861a7e481 100644
--- a/src/gallium/drivers/svga/svga_link.c
+++ b/src/gallium/drivers/svga/svga_link.c
@@ -87,6 +87,15 @@ svga_link_shaders(const struct tgsi_shader_info *outshader_info,
}
}
+ /* Find the index for position */
+ linkage->position_index = 0;
+ for (i = 0; i < outshader_info->num_outputs; i++) {
+ if (outshader_info->output_semantic_name[i] == TGSI_SEMANTIC_POSITION) {
+ linkage->position_index = i;
+ break;
+ }
+ }
+
linkage->num_inputs = inshader_info->num_inputs;
/* Things like the front-face register are handled here */
@@ -100,7 +109,8 @@ svga_link_shaders(const struct tgsi_shader_info *outshader_info,
/* Debug */
if (SVGA_DEBUG & DEBUG_TGSI) {
- unsigned reg = 0;
+ uint64_t reg = 0;
+ uint64_t one = 1;
debug_printf("### linkage info: num_inputs=%d input_map_max=%d\n",
linkage->num_inputs, linkage->input_map_max);
@@ -116,10 +126,8 @@ svga_link_shaders(const struct tgsi_shader_info *outshader_info,
tgsi_interpolate_names[inshader_info->input_interpolate[i]]);
/* make sure no repeating register index */
- if (reg & 1 << linkage->input_map[i]) {
- assert(0);
- }
- reg |= 1 << linkage->input_map[i];
+ assert((reg & (one << linkage->input_map[i])) == 0);
+ reg |= one << linkage->input_map[i];
}
}
}
diff --git a/src/gallium/drivers/svga/svga_link.h b/src/gallium/drivers/svga/svga_link.h
index c21686eef59..8d3517ea28a 100644
--- a/src/gallium/drivers/svga/svga_link.h
+++ b/src/gallium/drivers/svga/svga_link.h
@@ -9,6 +9,7 @@ struct svga_context;
struct shader_linkage
{
unsigned num_inputs;
+ unsigned position_index; /* position register index */
unsigned input_map_max; /* highest index of mapped inputs */
ubyte input_map[PIPE_MAX_SHADER_INPUTS];
};
diff --git a/src/gallium/drivers/svga/svga_pipe_blend.c b/src/gallium/drivers/svga/svga_pipe_blend.c
index b5557d31f44..e24a6beb0e4 100644
--- a/src/gallium/drivers/svga/svga_pipe_blend.c
+++ b/src/gallium/drivers/svga/svga_pipe_blend.c
@@ -92,6 +92,51 @@ svga_translate_blend_func(unsigned mode)
/**
+ * Translate gallium logicop mode to SVGA3D logicop mode.
+ */
+static int
+translate_logicop(enum pipe_logicop op)
+{
+ switch (op) {
+ case PIPE_LOGICOP_CLEAR:
+ return SVGA3D_DX11_LOGICOP_CLEAR;
+ case PIPE_LOGICOP_NOR:
+ return SVGA3D_DX11_LOGICOP_NOR;
+ case PIPE_LOGICOP_AND_INVERTED:
+ return SVGA3D_DX11_LOGICOP_AND_INVERTED;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ return SVGA3D_DX11_LOGICOP_COPY_INVERTED;
+ case PIPE_LOGICOP_AND_REVERSE:
+ return SVGA3D_DX11_LOGICOP_AND_REVERSE;
+ case PIPE_LOGICOP_INVERT:
+ return SVGA3D_DX11_LOGICOP_INVERT;
+ case PIPE_LOGICOP_XOR:
+ return SVGA3D_DX11_LOGICOP_XOR;
+ case PIPE_LOGICOP_NAND:
+ return SVGA3D_DX11_LOGICOP_NAND;
+ case PIPE_LOGICOP_AND:
+ return SVGA3D_DX11_LOGICOP_AND;
+ case PIPE_LOGICOP_EQUIV:
+ return SVGA3D_DX11_LOGICOP_EQUIV;
+ case PIPE_LOGICOP_NOOP:
+ return SVGA3D_DX11_LOGICOP_NOOP;
+ case PIPE_LOGICOP_OR_INVERTED:
+ return SVGA3D_DX11_LOGICOP_OR_INVERTED;
+ case PIPE_LOGICOP_COPY:
+ return SVGA3D_DX11_LOGICOP_COPY;
+ case PIPE_LOGICOP_OR_REVERSE:
+ return SVGA3D_DX11_LOGICOP_OR_REVERSE;
+ case PIPE_LOGICOP_OR:
+ return SVGA3D_DX11_LOGICOP_OR;
+ case PIPE_LOGICOP_SET:
+ return SVGA3D_DX11_LOGICOP_SET;
+ default:
+ return SVGA3D_DX11_LOGICOP_COPY;
+ }
+};
+
+
+/**
* Define a vgpu10 blend state object for the given
* svga blend state.
*/
@@ -100,7 +145,6 @@ define_blend_state_object(struct svga_context *svga,
struct svga_blend_state *bs)
{
SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS];
- unsigned try;
int i;
assert(svga_have_vgpu10(svga));
@@ -116,31 +160,141 @@ define_blend_state_object(struct svga_context *svga,
perRT[i].destBlendAlpha = bs->rt[i].dstblend_alpha;
perRT[i].blendOpAlpha = bs->rt[i].blendeq_alpha;
perRT[i].renderTargetWriteMask = bs->rt[i].writemask;
- perRT[i].logicOpEnable = 0;
- perRT[i].logicOp = SVGA3D_LOGICOP_COPY;
+ perRT[i].logicOpEnable = bs->logicop_enabled;
+ perRT[i].logicOp = bs->logicop_mode;
}
- /* Loop in case command buffer is full and we need to flush and retry */
- for (try = 0; try < 2; try++) {
- enum pipe_error ret;
-
- ret = SVGA3D_vgpu10_DefineBlendState(svga->swc,
- bs->id,
- bs->alpha_to_coverage,
- bs->independent_blend_enable,
- perRT);
- if (ret == PIPE_OK)
- return;
- svga_context_flush(svga, NULL);
+ SVGA_RETRY(svga, SVGA3D_vgpu10_DefineBlendState(svga->swc,
+ bs->id,
+ bs->alpha_to_coverage,
+ bs->independent_blend_enable,
+ perRT));
+}
+
+
+/**
+ * If SVGA3D_DEVCAP_LOGIC_BLENDOPS is false, we can't directly implement
+ * GL's logicops. But we can emulate some of them. We set up the blending
+ * state for that here.
+ */
+static void
+emulate_logicop(struct svga_context *svga,
+ unsigned logicop_func,
+ struct svga_blend_state *blend,
+ unsigned buffer)
+{
+ switch (logicop_func) {
+ case PIPE_LOGICOP_XOR:
+ case PIPE_LOGICOP_INVERT:
+ blend->need_white_fragments = TRUE;
+ blend->rt[buffer].blend_enable = TRUE;
+ blend->rt[buffer].srcblend = SVGA3D_BLENDOP_ONE;
+ blend->rt[buffer].dstblend = SVGA3D_BLENDOP_ONE;
+ blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_SUBTRACT;
+ break;
+ case PIPE_LOGICOP_CLEAR:
+ blend->rt[buffer].blend_enable = TRUE;
+ blend->rt[buffer].srcblend = SVGA3D_BLENDOP_ZERO;
+ blend->rt[buffer].dstblend = SVGA3D_BLENDOP_ZERO;
+ blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_MINIMUM;
+ break;
+ case PIPE_LOGICOP_COPY:
+ blend->rt[buffer].blend_enable = FALSE;
+ blend->rt[buffer].srcblend = SVGA3D_BLENDOP_ONE;
+ blend->rt[buffer].dstblend = SVGA3D_BLENDOP_ZERO;
+ blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_ADD;
+ break;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ blend->rt[buffer].blend_enable = TRUE;
+ blend->rt[buffer].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR;
+ blend->rt[buffer].dstblend = SVGA3D_BLENDOP_ZERO;
+ blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_ADD;
+ break;
+ case PIPE_LOGICOP_NOOP:
+ blend->rt[buffer].blend_enable = TRUE;
+ blend->rt[buffer].srcblend = SVGA3D_BLENDOP_ZERO;
+ blend->rt[buffer].dstblend = SVGA3D_BLENDOP_DESTCOLOR;
+ blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_ADD;
+ break;
+ case PIPE_LOGICOP_SET:
+ blend->rt[buffer].blend_enable = TRUE;
+ blend->rt[buffer].srcblend = SVGA3D_BLENDOP_ONE;
+ blend->rt[buffer].dstblend = SVGA3D_BLENDOP_ONE;
+ blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_MAXIMUM;
+ break;
+ case PIPE_LOGICOP_AND:
+ /* Approximate with minimum - works for the 0 & anything case: */
+ blend->rt[buffer].blend_enable = TRUE;
+ blend->rt[buffer].srcblend = SVGA3D_BLENDOP_SRCCOLOR;
+ blend->rt[buffer].dstblend = SVGA3D_BLENDOP_DESTCOLOR;
+ blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_MINIMUM;
+ break;
+ case PIPE_LOGICOP_AND_REVERSE:
+ blend->rt[buffer].blend_enable = TRUE;
+ blend->rt[buffer].srcblend = SVGA3D_BLENDOP_SRCCOLOR;
+ blend->rt[buffer].dstblend = SVGA3D_BLENDOP_INVDESTCOLOR;
+ blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_MINIMUM;
+ break;
+ case PIPE_LOGICOP_AND_INVERTED:
+ blend->rt[buffer].blend_enable = TRUE;
+ blend->rt[buffer].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR;
+ blend->rt[buffer].dstblend = SVGA3D_BLENDOP_DESTCOLOR;
+ blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_MINIMUM;
+ break;
+ case PIPE_LOGICOP_OR:
+ /* Approximate with maximum - works for the 1 | anything case: */
+ blend->rt[buffer].blend_enable = TRUE;
+ blend->rt[buffer].srcblend = SVGA3D_BLENDOP_SRCCOLOR;
+ blend->rt[buffer].dstblend = SVGA3D_BLENDOP_DESTCOLOR;
+ blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_MAXIMUM;
+ break;
+ case PIPE_LOGICOP_OR_REVERSE:
+ blend->rt[buffer].blend_enable = TRUE;
+ blend->rt[buffer].srcblend = SVGA3D_BLENDOP_SRCCOLOR;
+ blend->rt[buffer].dstblend = SVGA3D_BLENDOP_INVDESTCOLOR;
+ blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_MAXIMUM;
+ break;
+ case PIPE_LOGICOP_OR_INVERTED:
+ blend->rt[buffer].blend_enable = TRUE;
+ blend->rt[buffer].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR;
+ blend->rt[buffer].dstblend = SVGA3D_BLENDOP_DESTCOLOR;
+ blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_MAXIMUM;
+ break;
+ case PIPE_LOGICOP_NAND:
+ case PIPE_LOGICOP_NOR:
+ case PIPE_LOGICOP_EQUIV:
+ /* Fill these in with plausible values */
+ blend->rt[buffer].blend_enable = FALSE;
+ blend->rt[buffer].srcblend = SVGA3D_BLENDOP_ONE;
+ blend->rt[buffer].dstblend = SVGA3D_BLENDOP_ZERO;
+ blend->rt[buffer].blendeq = SVGA3D_BLENDEQ_ADD;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ blend->rt[buffer].srcblend_alpha = blend->rt[buffer].srcblend;
+ blend->rt[buffer].dstblend_alpha = blend->rt[buffer].dstblend;
+ blend->rt[buffer].blendeq_alpha = blend->rt[buffer].blendeq;
+
+ if (logicop_func == PIPE_LOGICOP_XOR) {
+ pipe_debug_message(&svga->debug.callback, CONFORMANCE,
+ "XOR logicop mode has limited support");
+ }
+ else if (logicop_func != PIPE_LOGICOP_COPY) {
+ pipe_debug_message(&svga->debug.callback, CONFORMANCE,
+ "general logicops are not supported");
}
}
+
static void *
svga_create_blend_state(struct pipe_context *pipe,
const struct pipe_blend_state *templ)
{
struct svga_context *svga = svga_context(pipe);
+ struct svga_screen *ss = svga_screen(pipe->screen);
struct svga_blend_state *blend = CALLOC_STRUCT( svga_blend_state );
unsigned i;
@@ -166,107 +320,18 @@ svga_create_blend_state(struct pipe_context *pipe,
* top of D3D9 API. Instead we try to simulate with various blend modes.
*/
if (templ->logicop_enable) {
- switch (templ->logicop_func) {
- case PIPE_LOGICOP_XOR:
- case PIPE_LOGICOP_INVERT:
- blend->need_white_fragments = TRUE;
- blend->rt[i].blend_enable = TRUE;
- blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE;
- blend->rt[i].dstblend = SVGA3D_BLENDOP_ONE;
- blend->rt[i].blendeq = SVGA3D_BLENDEQ_SUBTRACT;
- break;
- case PIPE_LOGICOP_CLEAR:
- blend->rt[i].blend_enable = TRUE;
- blend->rt[i].srcblend = SVGA3D_BLENDOP_ZERO;
- blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO;
- blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM;
- break;
- case PIPE_LOGICOP_COPY:
- blend->rt[i].blend_enable = FALSE;
- blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE;
- blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO;
- blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD;
- break;
- case PIPE_LOGICOP_COPY_INVERTED:
- blend->rt[i].blend_enable = TRUE;
- blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR;
- blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO;
- blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD;
- break;
- case PIPE_LOGICOP_NOOP:
- blend->rt[i].blend_enable = TRUE;
- blend->rt[i].srcblend = SVGA3D_BLENDOP_ZERO;
- blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR;
- blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD;
- break;
- case PIPE_LOGICOP_SET:
- blend->rt[i].blend_enable = TRUE;
- blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE;
- blend->rt[i].dstblend = SVGA3D_BLENDOP_ONE;
- blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM;
- break;
- case PIPE_LOGICOP_AND:
- /* Approximate with minimum - works for the 0 & anything case: */
- blend->rt[i].blend_enable = TRUE;
- blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR;
- blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR;
- blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM;
- break;
- case PIPE_LOGICOP_AND_REVERSE:
- blend->rt[i].blend_enable = TRUE;
- blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR;
- blend->rt[i].dstblend = SVGA3D_BLENDOP_INVDESTCOLOR;
- blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM;
- break;
- case PIPE_LOGICOP_AND_INVERTED:
- blend->rt[i].blend_enable = TRUE;
- blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR;
- blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR;
- blend->rt[i].blendeq = SVGA3D_BLENDEQ_MINIMUM;
- break;
- case PIPE_LOGICOP_OR:
- /* Approximate with maximum - works for the 1 | anything case: */
- blend->rt[i].blend_enable = TRUE;
- blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR;
- blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR;
- blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM;
- break;
- case PIPE_LOGICOP_OR_REVERSE:
- blend->rt[i].blend_enable = TRUE;
- blend->rt[i].srcblend = SVGA3D_BLENDOP_SRCCOLOR;
- blend->rt[i].dstblend = SVGA3D_BLENDOP_INVDESTCOLOR;
- blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM;
- break;
- case PIPE_LOGICOP_OR_INVERTED:
- blend->rt[i].blend_enable = TRUE;
- blend->rt[i].srcblend = SVGA3D_BLENDOP_INVSRCCOLOR;
- blend->rt[i].dstblend = SVGA3D_BLENDOP_DESTCOLOR;
- blend->rt[i].blendeq = SVGA3D_BLENDEQ_MAXIMUM;
- break;
- case PIPE_LOGICOP_NAND:
- case PIPE_LOGICOP_NOR:
- case PIPE_LOGICOP_EQUIV:
- /* Fill these in with plausible values */
- blend->rt[i].blend_enable = FALSE;
- blend->rt[i].srcblend = SVGA3D_BLENDOP_ONE;
- blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO;
- blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD;
- break;
- default:
- assert(0);
- break;
- }
- blend->rt[i].srcblend_alpha = blend->rt[i].srcblend;
- blend->rt[i].dstblend_alpha = blend->rt[i].dstblend;
- blend->rt[i].blendeq_alpha = blend->rt[i].blendeq;
-
- if (templ->logicop_func == PIPE_LOGICOP_XOR) {
- pipe_debug_message(&svga->debug.callback, CONFORMANCE,
- "XOR logicop mode has limited support");
+ if (ss->haveBlendLogicops) {
+ blend->logicop_enabled = TRUE;
+ blend->logicop_mode = translate_logicop(templ->logicop_func);
+ blend->rt[i].blendeq = SVGA3D_BLENDEQ_ADD;
+ blend->rt[i].blendeq_alpha = SVGA3D_BLENDEQ_ADD;
+ blend->rt[i].srcblend = SVGA3D_BLENDOP_ZERO;
+ blend->rt[i].dstblend = SVGA3D_BLENDOP_ZERO;
+ blend->rt[i].srcblend_alpha = SVGA3D_BLENDOP_ZERO;
+ blend->rt[i].dstblend_alpha = SVGA3D_BLENDOP_ZERO;
}
- else if (templ->logicop_func != PIPE_LOGICOP_COPY) {
- pipe_debug_message(&svga->debug.callback, CONFORMANCE,
- "general logicops are not supported");
+ else {
+ emulate_logicop(svga, templ->logicop_func, blend, i);
}
}
else {
@@ -374,14 +439,7 @@ static void svga_delete_blend_state(struct pipe_context *pipe,
(struct svga_blend_state *) blend;
if (svga_have_vgpu10(svga) && bs->id != SVGA3D_INVALID_ID) {
- enum pipe_error ret;
-
- ret = SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id);
- if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
- ret = SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id);
- assert(ret == PIPE_OK);
- }
+ SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyBlendState(svga->swc, bs->id));
if (bs->id == svga->state.hw_draw.blend_id)
svga->state.hw_draw.blend_id = SVGA3D_INVALID_ID;
diff --git a/src/gallium/drivers/svga/svga_pipe_blit.c b/src/gallium/drivers/svga/svga_pipe_blit.c
index 31806ceb1e1..a756509ce76 100644
--- a/src/gallium/drivers/svga/svga_pipe_blit.c
+++ b/src/gallium/drivers/svga/svga_pipe_blit.c
@@ -80,7 +80,6 @@ intra_surface_copy(struct svga_context *svga, struct pipe_resource *tex,
unsigned dst_x, unsigned dst_y, unsigned dst_z,
unsigned width, unsigned height, unsigned depth)
{
- enum pipe_error ret;
SVGA3dCopyBox box;
struct svga_texture *stex;
@@ -102,15 +101,8 @@ intra_surface_copy(struct svga_context *svga, struct pipe_resource *tex,
box.srcy = src_y;
box.srcz = src_z;
- ret = SVGA3D_vgpu10_IntraSurfaceCopy(svga->swc,
- stex->handle, level, layer_face, &box);
- if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
- ret = SVGA3D_vgpu10_IntraSurfaceCopy(svga->swc,
- stex->handle, level, layer_face, &box);
- assert(ret == PIPE_OK);
- }
-
+ SVGA_RETRY(svga, SVGA3D_vgpu10_IntraSurfaceCopy(svga->swc, stex->handle,
+ level, layer_face, &box));
/* Mark the texture subresource as rendered-to. */
svga_set_texture_rendered_to(stex, layer_face, level);
}
@@ -630,11 +622,13 @@ try_blit(struct svga_context *svga, const struct pipe_blit_info *blit_info)
util_blitter_save_vertex_elements(svga->blitter, (void*)svga->curr.velems);
util_blitter_save_vertex_shader(svga->blitter, svga->curr.vs);
util_blitter_save_geometry_shader(svga->blitter, svga->curr.user_gs);
+ util_blitter_save_tessctrl_shader(svga->blitter, svga->curr.tcs);
+ util_blitter_save_tesseval_shader(svga->blitter, svga->curr.tes);
util_blitter_save_so_targets(svga->blitter, svga->num_so_targets,
(struct pipe_stream_output_target**)svga->so_targets);
util_blitter_save_rasterizer(svga->blitter, (void*)svga->curr.rast);
- util_blitter_save_viewport(svga->blitter, &svga->curr.viewport);
- util_blitter_save_scissor(svga->blitter, &svga->curr.scissor);
+ util_blitter_save_viewport(svga->blitter, &svga->curr.viewport[0]);
+ util_blitter_save_scissor(svga->blitter, &svga->curr.scissor[0]);
util_blitter_save_fragment_shader(svga->blitter, svga->curr.fs);
util_blitter_save_blend(svga->blitter, (void*)svga->curr.blend);
util_blitter_save_depth_stencil_alpha(svga->blitter,
@@ -835,7 +829,6 @@ svga_resource_copy_region(struct pipe_context *pipe,
if (dst_tex->target == PIPE_BUFFER && src_tex->target == PIPE_BUFFER) {
/* can't copy within the same buffer, unfortunately */
if (svga_have_vgpu10(svga) && src_tex != dst_tex) {
- enum pipe_error ret;
struct svga_winsys_surface *src_surf;
struct svga_winsys_surface *dst_surf;
struct svga_buffer *dbuffer = svga_buffer(dst_tex);
@@ -844,15 +837,9 @@ svga_resource_copy_region(struct pipe_context *pipe,
src_surf = svga_buffer_handle(svga, src_tex, sbuffer->bind_flags);
dst_surf = svga_buffer_handle(svga, dst_tex, dbuffer->bind_flags);
- ret = SVGA3D_vgpu10_BufferCopy(svga->swc, src_surf, dst_surf,
- src_box->x, dstx, src_box->width);
- if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
- ret = SVGA3D_vgpu10_BufferCopy(svga->swc, src_surf, dst_surf,
- src_box->x, dstx, src_box->width);
- assert(ret == PIPE_OK);
- }
-
+ SVGA_RETRY(svga, SVGA3D_vgpu10_BufferCopy(svga->swc, src_surf,
+ dst_surf, src_box->x, dstx,
+ src_box->width));
dbuffer->dirty = TRUE;
}
else {
diff --git a/src/gallium/drivers/svga/svga_pipe_clear.c b/src/gallium/drivers/svga/svga_pipe_clear.c
index 89a9b533f91..490f91b5fc9 100644
--- a/src/gallium/drivers/svga/svga_pipe_clear.c
+++ b/src/gallium/drivers/svga/svga_pipe_clear.c
@@ -45,11 +45,13 @@ begin_blit(struct svga_context *svga)
util_blitter_save_vertex_elements(svga->blitter, (void*)svga->curr.velems);
util_blitter_save_vertex_shader(svga->blitter, svga->curr.vs);
util_blitter_save_geometry_shader(svga->blitter, svga->curr.gs);
+ util_blitter_save_tessctrl_shader(svga->blitter, svga->curr.tcs);
+ util_blitter_save_tesseval_shader(svga->blitter, svga->curr.tes);
util_blitter_save_so_targets(svga->blitter, svga->num_so_targets,
(struct pipe_stream_output_target**)svga->so_targets);
util_blitter_save_rasterizer(svga->blitter, (void*)svga->curr.rast);
- util_blitter_save_viewport(svga->blitter, &svga->curr.viewport);
- util_blitter_save_scissor(svga->blitter, &svga->curr.scissor);
+ util_blitter_save_viewport(svga->blitter, &svga->curr.viewport[0]);
+ util_blitter_save_scissor(svga->blitter, &svga->curr.scissor[0]);
util_blitter_save_fragment_shader(svga->blitter, svga->curr.fs);
util_blitter_save_blend(svga->blitter, (void*)svga->curr.blend);
util_blitter_save_depth_stencil_alpha(svga->blitter,
@@ -248,15 +250,7 @@ svga_clear(struct pipe_context *pipe, unsigned buffers, const struct pipe_scisso
/* flush any queued prims (don't want them to appear after the clear!) */
svga_hwtnl_flush_retry(svga);
- ret = try_clear( svga, buffers, color, depth, stencil );
-
- if (ret == PIPE_ERROR_OUT_OF_MEMORY) {
- /* Flush command buffer and retry:
- */
- svga_context_flush( svga, NULL );
-
- ret = try_clear( svga, buffers, color, depth, stencil );
- }
+ SVGA_RETRY_OOM(svga, ret, try_clear( svga, buffers, color, depth, stencil));
/*
* Mark target surfaces as dirty
@@ -277,7 +271,6 @@ svga_clear_texture(struct pipe_context *pipe,
{
struct svga_context *svga = svga_context(pipe);
struct svga_surface *svga_surface_dst;
- enum pipe_error ret;
struct pipe_surface tmpl;
struct pipe_surface *surface;
@@ -309,8 +302,8 @@ svga_clear_texture(struct pipe_context *pipe,
stencil = 0;
}
else {
- util_format_unpack_z_float(surface->format, &depth, data, 1);
- util_format_unpack_s_8uint(surface->format, &stencil, data, 1);
+ desc->unpack_z_float(&depth, 0, data, 0, 1, 1);
+ desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
}
if (util_format_has_depth(desc)) {
@@ -334,17 +327,9 @@ svga_clear_texture(struct pipe_context *pipe,
/* clearing whole surface, use direct VGPU10 command */
- ret = SVGA3D_vgpu10_ClearDepthStencilView(svga->swc, dsv,
- clear_flags,
- stencil, depth);
- if (ret != PIPE_OK) {
- /* flush and try again */
- svga_context_flush(svga, NULL);
- ret = SVGA3D_vgpu10_ClearDepthStencilView(svga->swc, dsv,
- clear_flags,
- stencil, depth);
- assert(ret == PIPE_OK);
- }
+ SVGA_RETRY(svga, SVGA3D_vgpu10_ClearDepthStencilView(svga->swc, dsv,
+ clear_flags,
+ stencil, depth));
}
else {
/* To clear subtexture use software fallback */
@@ -367,7 +352,18 @@ svga_clear_texture(struct pipe_context *pipe,
color.f[0] = color.f[1] = color.f[2] = color.f[3] = 0;
}
else {
- util_format_unpack_rgba(surface->format, color.ui, data, 1);
+ if (util_format_is_pure_sint(surface->format)) {
+ /* signed integer */
+ desc->unpack_rgba_sint(color.i, 0, data, 0, 1, 1);
+ }
+ else if (util_format_is_pure_uint(surface->format)) {
+ /* unsigned integer */
+ desc->unpack_rgba_uint(color.ui, 0, data, 0, 1, 1);
+ }
+ else {
+ /* floating point */
+ desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1);
+ }
}
/* Setup render target view */
@@ -390,14 +386,8 @@ svga_clear_texture(struct pipe_context *pipe,
}
else {
/* clearing whole surface using VGPU10 command */
- ret = SVGA3D_vgpu10_ClearRenderTargetView(svga->swc, rtv,
- color.f);
- if (ret != PIPE_OK) {
- svga_context_flush(svga,NULL);
- ret = SVGA3D_vgpu10_ClearRenderTargetView(svga->swc, rtv,
- color.f);
- assert(ret == PIPE_OK);
- }
+ SVGA_RETRY(svga, SVGA3D_vgpu10_ClearRenderTargetView(svga->swc, rtv,
+ color.f));
}
}
else {
@@ -526,13 +516,9 @@ svga_clear_render_target(struct pipe_context *pipe,
height);
} else {
enum pipe_error ret;
-
- ret = svga_try_clear_render_target(svga, dst, color);
- if (ret == PIPE_ERROR_OUT_OF_MEMORY) {
- svga_context_flush( svga, NULL );
- ret = svga_try_clear_render_target(svga, dst, color);
- }
-
+
+ SVGA_RETRY_OOM(svga, ret, svga_try_clear_render_target(svga, dst,
+ color));
assert (ret == PIPE_OK);
}
svga_toggle_render_condition(svga, render_condition_enabled, TRUE);
diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c
index 5ebd17cf0ea..e6fabfc995e 100644
--- a/src/gallium/drivers/svga/svga_pipe_draw.c
+++ b/src/gallium/drivers/svga/svga_pipe_draw.c
@@ -24,12 +24,16 @@
**********************************************************/
+#include "util/u_draw.h"
+#include "util/format/u_format.h"
#include "util/u_helpers.h"
#include "util/u_inlines.h"
#include "util/u_prim.h"
#include "util/u_prim_restart.h"
#include "svga_context.h"
+#include "svga_draw_private.h"
+#include "svga_screen.h"
#include "svga_draw.h"
#include "svga_shader.h"
#include "svga_surface.h"
@@ -37,60 +41,138 @@
#include "svga_debug.h"
#include "svga_resource_buffer.h"
-/* Returns TRUE if we are currently using flat shading.
- */
-static boolean
-is_using_flat_shading(const struct svga_context *svga)
-{
- return
- svga->state.hw_draw.fs ? svga->state.hw_draw.fs->uses_flat_interp : FALSE;
-}
-
static enum pipe_error
retry_draw_range_elements(struct svga_context *svga,
const struct pipe_draw_info *info,
unsigned count)
{
- enum pipe_error ret;
-
SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DRAWELEMENTS);
- ret = svga_hwtnl_draw_range_elements(svga->hwtnl, info, count);
- if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
- ret = svga_hwtnl_draw_range_elements(svga->hwtnl, info, count);
- }
+ SVGA_RETRY(svga, svga_hwtnl_draw_range_elements(svga->hwtnl, info, count));
- assert (ret == PIPE_OK);
SVGA_STATS_TIME_POP(svga_sws(svga));
- return ret;
+ return PIPE_OK;
}
static enum pipe_error
-retry_draw_arrays(struct svga_context *svga,
- enum pipe_prim_type prim, unsigned start, unsigned count,
- unsigned start_instance, unsigned instance_count)
+retry_draw_arrays( struct svga_context *svga,
+ enum pipe_prim_type prim, unsigned start, unsigned count,
+ unsigned start_instance, unsigned instance_count,
+ ubyte vertices_per_patch)
{
enum pipe_error ret;
SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DRAWARRAYS);
- for (unsigned try = 0; try < 2; try++) {
- ret = svga_hwtnl_draw_arrays(svga->hwtnl, prim, start, count,
- start_instance, instance_count);
- if (ret == PIPE_OK)
- break;
- svga_context_flush(svga, NULL);
- }
-
+ SVGA_RETRY_OOM(svga, ret, svga_hwtnl_draw_arrays(svga->hwtnl, prim, start,
+ count, start_instance,
+ instance_count,
+ vertices_per_patch));
SVGA_STATS_TIME_POP(svga_sws(svga));
return ret;
}
/**
+ * Auto draw (get vertex count from a transform feedback result).
+ */
+static enum pipe_error
+retry_draw_auto(struct svga_context *svga,
+ const struct pipe_draw_info *info)
+{
+ assert(svga_have_sm5(svga));
+ assert(info->count_from_stream_output);
+ assert(info->instance_count == 1);
+ /* SO drawing implies core profile and none of these prim types */
+ assert(info->mode != PIPE_PRIM_QUADS &&
+ info->mode != PIPE_PRIM_QUAD_STRIP &&
+ info->mode != PIPE_PRIM_POLYGON);
+
+ if (info->mode == PIPE_PRIM_LINE_LOOP) {
+ /* XXX need to do a fallback */
+ assert(!"draw auto fallback not supported yet");
+ return PIPE_OK;
+ }
+ else {
+ SVGA3dPrimitiveRange range;
+ unsigned hw_count;
+
+ range.primType = svga_translate_prim(info->mode, 12, &hw_count,
+ info->vertices_per_patch);
+ range.primitiveCount = 0;
+ range.indexArray.surfaceId = SVGA3D_INVALID_ID;
+ range.indexArray.offset = 0;
+ range.indexArray.stride = 0;
+ range.indexWidth = 0;
+ range.indexBias = 0;
+
+ SVGA_RETRY(svga, svga_hwtnl_prim
+ (svga->hwtnl, &range,
+ 0, /* vertex count comes from SO buffer */
+ 0, /* don't know min index */
+ ~0u, /* don't know max index */
+ NULL, /* no index buffer */
+ 0, /* start instance */
+ 1, /* only 1 instance supported */
+ NULL, /* indirect drawing info */
+ info->count_from_stream_output));
+
+ return PIPE_OK;
+ }
+}
+
+
+/**
+ * Indirect draw (get vertex count, start index, etc. from a buffer object.
+ */
+static enum pipe_error
+retry_draw_indirect(struct svga_context *svga,
+ const struct pipe_draw_info *info)
+{
+ assert(svga_have_sm5(svga));
+ assert(info->indirect);
+ /* indirect drawing implies core profile and none of these prim types */
+ assert(info->mode != PIPE_PRIM_QUADS &&
+ info->mode != PIPE_PRIM_QUAD_STRIP &&
+ info->mode != PIPE_PRIM_POLYGON);
+
+ if (info->mode == PIPE_PRIM_LINE_LOOP) {
+ /* need to do a fallback */
+ util_draw_indirect(&svga->pipe, info);
+ return PIPE_OK;
+ }
+ else {
+ SVGA3dPrimitiveRange range;
+ unsigned hw_count;
+
+ range.primType = svga_translate_prim(info->mode, 12, &hw_count,
+ info->vertices_per_patch);
+ range.primitiveCount = 0; /* specified in indirect buffer */
+ range.indexArray.surfaceId = SVGA3D_INVALID_ID;
+ range.indexArray.offset = 0;
+ range.indexArray.stride = 0;
+ range.indexWidth = info->index_size;
+ range.indexBias = 0; /* specified in indirect buffer */
+
+ SVGA_RETRY(svga, svga_hwtnl_prim
+ (svga->hwtnl, &range,
+ 0, /* vertex count is in indirect buffer */
+ 0, /* don't know min index */
+ ~0u, /* don't know max index */
+ info->index.resource,
+ info->start_instance,
+ 0, /* don't know instance count */
+ info->indirect,
+ NULL)); /* SO vertex count */
+
+ return PIPE_OK;
+ }
+}
+
+
+/**
* Determine if we need to implement primitive restart with a fallback
* path which breaks the original primitive into sub-primitive at the
* restart indexes.
@@ -116,6 +198,21 @@ need_fallback_prim_restart(const struct svga_context *svga,
}
+/**
+ * A helper function to return the vertex count from the primitive count
+ * returned from the stream output statistics query for the specified stream.
+ */
+static unsigned
+get_vcount_from_stream_output(struct svga_context *svga,
+ const struct pipe_draw_info *info,
+ unsigned stream)
+{
+ unsigned primcount;
+ primcount = svga_get_primcount_from_stream_output(svga, stream);
+ return u_vertices_for_prims(info->mode, primcount);
+}
+
+
static void
svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
@@ -147,6 +244,18 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
svga->dirty |= SVGA_NEW_REDUCED_PRIMITIVE;
}
+ if (svga->curr.vertices_per_patch != info->vertices_per_patch) {
+ svga->curr.vertices_per_patch = info->vertices_per_patch;
+
+ /* If input patch size changes, we need to notifiy the TCS
+ * code to reevaluate the shader variant since the
+ * vertices per patch count is a constant in the control
+ * point count declaration.
+ */
+ if (svga->curr.tcs || svga->curr.tes)
+ svga->dirty |= SVGA_NEW_TCS_PARAM;
+ }
+
if (need_fallback_prim_restart(svga, info)) {
enum pipe_error r;
r = util_draw_vbo_without_prim_restart(pipe, info);
@@ -155,7 +264,8 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
goto done;
}
- if (!u_trim_pipe_prim(info->mode, &count))
+ if (!info->indirect && !info->count_from_stream_output &&
+ !u_trim_pipe_prim(info->mode, &count))
goto done;
needed_swtnl = svga->state.sw.need_swtnl;
@@ -189,20 +299,53 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
}
svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);
+ svga_update_state_retry(svga, SVGA_STATE_HW_DRAW);
+
/** determine if flatshade is to be used after svga_update_state()
* in case the fragment shader is changed.
*/
svga_hwtnl_set_flatshade(svga->hwtnl,
svga->curr.rast->templ.flatshade ||
- is_using_flat_shading(svga),
+ svga_is_using_flat_shading(svga),
svga->curr.rast->templ.flatshade_first);
- if (info->index_size) {
+ if (info->count_from_stream_output) {
+ unsigned stream = 0;
+ assert(count == 0);
+
+ /* If the vertex count is from the stream output of a non-zero stream
+ * or the draw info specifies instancing, we will need a workaround
+ * since the draw_auto command does not support stream instancing.
+ * The workaround requires querying the vertex count from the
+ * stream output statistics query for the specified stream and then
+ * fallback to the regular draw function.
+ */
+
+ /* Check the stream index of the specified stream output target */
+ for (unsigned i = 0; i < ARRAY_SIZE(svga->so_targets); i++) {
+ if (svga->vcount_so_targets[i] == info->count_from_stream_output) {
+ stream = (svga->vcount_buffer_stream >> (i * 4)) & 0xf;
+ break;
+ }
+ }
+ if (info->instance_count > 1 || stream > 0) {
+ count = get_vcount_from_stream_output(svga, info, stream);
+ }
+ }
+
+ if (info->count_from_stream_output && count == 0) {
+ ret = retry_draw_auto(svga, info);
+ }
+ else if (info->indirect) {
+ ret = retry_draw_indirect(svga, info);
+ }
+ else if (info->index_size) {
ret = retry_draw_range_elements(svga, info, count);
}
else {
ret = retry_draw_arrays(svga, info->mode, info->start, count,
- info->start_instance, info->instance_count);
+ info->start_instance, info->instance_count,
+ info->vertices_per_patch);
}
}
diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c
index a2f00b1d290..7795afbfe1f 100644
--- a/src/gallium/drivers/svga/svga_pipe_fs.c
+++ b/src/gallium/drivers/svga/svga_pipe_fs.c
@@ -37,7 +37,7 @@
#include "svga_shader.h"
-static void *
+void *
svga_create_fs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
{
@@ -69,7 +69,7 @@ svga_create_fs_state(struct pipe_context *pipe,
}
-static void
+void
svga_bind_fs_state(struct pipe_context *pipe, void *shader)
{
struct svga_fragment_shader *fs = (struct svga_fragment_shader *) shader;
@@ -85,6 +85,7 @@ svga_delete_fs_state(struct pipe_context *pipe, void *shader)
{
struct svga_context *svga = svga_context(pipe);
struct svga_fragment_shader *fs = (struct svga_fragment_shader *) shader;
+ struct svga_fragment_shader *next_fs;
struct svga_shader_variant *variant, *tmp;
enum pipe_error ret;
@@ -92,27 +93,32 @@ svga_delete_fs_state(struct pipe_context *pipe, void *shader)
assert(fs->base.parent == NULL);
- draw_delete_fragment_shader(svga->swtnl.draw, fs->draw_shader);
+ while (fs) {
+ next_fs = (struct svga_fragment_shader *) fs->base.next;
+
+ draw_delete_fragment_shader(svga->swtnl.draw, fs->draw_shader);
- for (variant = fs->base.variants; variant; variant = tmp) {
- tmp = variant->next;
+ for (variant = fs->base.variants; variant; variant = tmp) {
+ tmp = variant->next;
- /* Check if deleting currently bound shader */
- if (variant == svga->state.hw_draw.fs) {
- ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL);
- if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
+ /* Check if deleting currently bound shader */
+ if (variant == svga->state.hw_draw.fs) {
ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL);
- assert(ret == PIPE_OK);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, NULL);
+ assert(ret == PIPE_OK);
+ }
+ svga->state.hw_draw.fs = NULL;
}
- svga->state.hw_draw.fs = NULL;
+
+ svga_destroy_shader_variant(svga, variant);
}
- svga_destroy_shader_variant(svga, variant);
+ FREE((void *)fs->base.tokens);
+ FREE(fs);
+ fs = next_fs;
}
-
- FREE((void *)fs->base.tokens);
- FREE(fs);
}
diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c
index e0d1e51f412..61b4897c5d6 100644
--- a/src/gallium/drivers/svga/svga_pipe_misc.c
+++ b/src/gallium/drivers/svga/svga_pipe_misc.c
@@ -40,9 +40,16 @@ svga_set_scissor_states(struct pipe_context *pipe,
unsigned num_scissors,
const struct pipe_scissor_state *scissors)
{
+ ASSERTED struct svga_screen *svgascreen = svga_screen(pipe->screen);
struct svga_context *svga = svga_context(pipe);
+ unsigned i, num_sc;
+
+ assert(start_slot + num_scissors <= svgascreen->max_viewports);
+
+ for (i = 0, num_sc = start_slot; i < num_scissors; i++) {
+ svga->curr.scissor[num_sc++] = scissors[i]; /* struct copy */
+ }
- memcpy(&svga->curr.scissor, scissors, sizeof(*scissors));
svga->dirty |= SVGA_NEW_SCISSOR;
}
@@ -199,8 +206,14 @@ svga_set_viewport_states(struct pipe_context *pipe,
const struct pipe_viewport_state *viewports)
{
struct svga_context *svga = svga_context(pipe);
+ ASSERTED struct svga_screen *svgascreen = svga_screen(pipe->screen);
+ unsigned i, num_vp;
- svga->curr.viewport = *viewports; /* struct copy */
+ assert(start_slot + num_viewports <= svgascreen->max_viewports);
+
+ for (i = 0, num_vp = start_slot; i < num_viewports; i++) {
+ svga->curr.viewport[num_vp++] = viewports[i]; /* struct copy */
+ }
svga->dirty |= SVGA_NEW_VIEWPORT;
}
diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c
index 1b9b17e2a8e..38874deb414 100644
--- a/src/gallium/drivers/svga/svga_pipe_query.c
+++ b/src/gallium/drivers/svga/svga_pipe_query.c
@@ -50,6 +50,7 @@ struct svga_query {
SVGA3dQueryType svga_type; /**< SVGA3D_QUERYTYPE_x or unused */
unsigned id; /** Per-context query identifier */
+ boolean active; /** TRUE if query is active */
struct pipe_fence_handle *fence;
@@ -214,10 +215,10 @@ get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq,
* will hold queries of the same type. Multiple memory blocks can be allocated
* for a particular query type.
*
- * Currently each memory block is of 184 bytes. We support up to 128
+ * Currently each memory block is of 184 bytes. We support up to 512
* memory blocks. The query memory size is arbitrary right now.
* Each occlusion query takes about 8 bytes. One memory block can accomodate
- * 23 occlusion queries. 128 of those blocks can support up to 2944 occlusion
+ * 23 occlusion queries. 512 of those blocks can support up to 11K occlusion
* queries. That seems reasonable for now. If we think this limit is
* not enough, we can increase the limit or try to grow the mob in runtime.
* Note, SVGA device does not impose one mob per context for queries,
@@ -228,7 +229,7 @@ get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq,
* following commands: DXMoveQuery, DXBindAllQuery & DXReadbackAllQuery.
*/
#define SVGA_QUERY_MEM_BLOCK_SIZE (sizeof(SVGADXQueryResultUnion) * 2)
-#define SVGA_QUERY_MEM_SIZE (128 * SVGA_QUERY_MEM_BLOCK_SIZE)
+#define SVGA_QUERY_MEM_SIZE (512 * SVGA_QUERY_MEM_BLOCK_SIZE)
struct svga_qmem_alloc_entry
{
@@ -243,31 +244,34 @@ struct svga_qmem_alloc_entry
/**
* Allocate a memory block from the query object memory
- * \return -1 if out of memory, else index of the query memory block
+ * \return NULL if out of memory, else pointer to the query memory block
*/
-static int
+static struct svga_qmem_alloc_entry *
allocate_query_block(struct svga_context *svga)
{
int index;
unsigned offset;
+ struct svga_qmem_alloc_entry *alloc_entry = NULL;
/* Find the next available query block */
index = util_bitmask_add(svga->gb_query_alloc_mask);
if (index == UTIL_BITMASK_INVALID_INDEX)
- return -1;
+ return NULL;
offset = index * SVGA_QUERY_MEM_BLOCK_SIZE;
if (offset >= svga->gb_query_len) {
unsigned i;
+ /* Deallocate the out-of-range index */
+ util_bitmask_clear(svga->gb_query_alloc_mask, index);
+ index = -1;
+
/**
* All the memory blocks are allocated, lets see if there is
* any empty memory block around that can be freed up.
*/
- index = -1;
for (i = 0; i < SVGA3D_QUERYTYPE_MAX && index == -1; i++) {
- struct svga_qmem_alloc_entry *alloc_entry;
struct svga_qmem_alloc_entry *prev_alloc_entry = NULL;
alloc_entry = svga->gb_query_map[i];
@@ -286,9 +290,20 @@ allocate_query_block(struct svga_context *svga)
}
}
}
+
+ if (index == -1) {
+ debug_printf("Query memory object is full\n");
+ return NULL;
+ }
}
- return index;
+ if (!alloc_entry) {
+ assert(index != -1);
+ alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry);
+ alloc_entry->block_index = index;
+ }
+
+ return alloc_entry;
}
/**
@@ -346,17 +361,14 @@ allocate_query_block_entry(struct svga_context *svga,
unsigned len)
{
struct svga_qmem_alloc_entry *alloc_entry;
- int block_index = -1;
- block_index = allocate_query_block(svga);
- if (block_index == -1)
- return NULL;
- alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry);
+ alloc_entry = allocate_query_block(svga);
if (!alloc_entry)
return NULL;
- alloc_entry->block_index = block_index;
- alloc_entry->start_offset = block_index * SVGA_QUERY_MEM_BLOCK_SIZE;
+ assert(alloc_entry->block_index != -1);
+ alloc_entry->start_offset =
+ alloc_entry->block_index * SVGA_QUERY_MEM_BLOCK_SIZE;
alloc_entry->nquery = 0;
alloc_entry->alloc_mask = util_bitmask_create();
alloc_entry->next = NULL;
@@ -508,17 +520,16 @@ define_query_vgpu10(struct svga_context *svga,
sq->gb_query = svga->gb_query;
- /* Allocate an integer ID for this query */
- sq->id = util_bitmask_add(svga->query_id_bm);
- if (sq->id == UTIL_BITMASK_INVALID_INDEX)
- return PIPE_ERROR_OUT_OF_MEMORY;
+ /* Make sure query length is in multiples of 8 bytes */
+ qlen = align(resultLen + sizeof(SVGA3dQueryState), 8);
/* Find a slot for this query in the gb object */
- qlen = resultLen + sizeof(SVGA3dQueryState);
sq->offset = allocate_query(svga, sq->svga_type, qlen);
if (sq->offset == -1)
return PIPE_ERROR_OUT_OF_MEMORY;
+ assert((sq->offset & 7) == 0);
+
SVGA_DBG(DEBUG_QUERY, " query type=%d qid=0x%x offset=%d\n",
sq->svga_type, sq->id, sq->offset);
@@ -731,7 +742,19 @@ svga_create_query(struct pipe_context *pipe,
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_SO_STATISTICS:
assert(svga_have_vgpu10(svga));
- sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS;
+
+ /* Until the device supports the new query type for multiple streams,
+ * we will use the single stream query type for stream 0.
+ */
+ if (svga_have_sm5(svga) && index > 0) {
+ assert(index < 4);
+
+ sq->svga_type = SVGA3D_QUERYTYPE_SOSTATS_STREAM0 + index;
+ }
+ else {
+ assert(index == 0);
+ sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS;
+ }
ret = define_query_vgpu10(svga, sq,
sizeof(SVGADXStreamOutStatisticsQueryResult));
if (ret != PIPE_OK)
@@ -969,7 +992,10 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
assert(!"unexpected query type in svga_begin_query()");
}
- svga->sq[sq->type] = sq;
+ SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d type=%d svga_type=%d\n",
+ __FUNCTION__, sq, sq->id, sq->type, sq->svga_type);
+
+ sq->active = TRUE;
return true;
}
@@ -988,12 +1014,12 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
sq, sq->id);
- if (sq->type == PIPE_QUERY_TIMESTAMP && svga->sq[sq->type] != sq)
+ if (sq->type == PIPE_QUERY_TIMESTAMP && !sq->active)
svga_begin_query(pipe, q);
svga_hwtnl_flush_retry(svga);
- assert(svga->sq[sq->type] == sq);
+ assert(sq->active);
switch (sq->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
@@ -1083,7 +1109,7 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
default:
assert(!"unexpected query type in svga_end_query()");
}
- svga->sq[sq->type] = NULL;
+ sq->active = FALSE;
return true;
}
diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
index 7d5936fa1ec..7764a855391 100644
--- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c
+++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
@@ -118,6 +118,9 @@ define_rasterizer_object(struct svga_context *svga,
rast->templ.line_stipple_factor : 0;
const uint16 line_pattern = rast->templ.line_stipple_enable ?
rast->templ.line_stipple_pattern : 0;
+ const uint8 pv_last = !rast->templ.flatshade_first &&
+ svgascreen->haveProvokingVertex;
+
unsigned try;
rast->id = util_bitmask_add(svga->rast_object_id_bm);
@@ -194,7 +197,18 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
rast->templ.point_smooth = TRUE;
}
- if (templ->point_smooth) {
+ if (rast->templ.point_smooth &&
+ rast->templ.point_size_per_vertex == 0 &&
+ rast->templ.point_size <= screen->pointSmoothThreshold) {
+ /* If the point size is less than the threshold, disable smoothing.
+ * Note that this only effects point rendering when we use the
+ * pipe_rasterizer_state::point_size value, not when the point size
+ * is set in the VS.
+ */
+ rast->templ.point_smooth = FALSE;
+ }
+
+ if (rast->templ.point_smooth) {
/* For smooth points we need to generate fragments for at least
* a 2x2 region. Otherwise the quad we draw may be too small and
* we may generate no fragments at all.
@@ -237,7 +251,7 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
}
}
- if (!svga_have_vgpu10(svga) && templ->point_smooth) {
+ if (!svga_have_vgpu10(svga) && rast->templ.point_smooth) {
rast->need_pipeline |= SVGA_PIPELINE_FLAG_POINTS;
rast->need_pipeline_points_str = "smooth points";
}
diff --git a/src/gallium/drivers/svga/svga_pipe_streamout.c b/src/gallium/drivers/svga/svga_pipe_streamout.c
index 0c6c034751c..380ceaa3aa7 100644
--- a/src/gallium/drivers/svga/svga_pipe_streamout.c
+++ b/src/gallium/drivers/svga/svga_pipe_streamout.c
@@ -44,6 +44,89 @@ svga_stream_output_target(struct pipe_stream_output_target *s)
return (struct svga_stream_output_target *)s;
}
+
+/**
+ * A helper function to send different version of the DefineStreamOutput command
+ * depending on if device is SM5 capable or not.
+ */
+static enum pipe_error
+svga_define_stream_output(struct svga_context *svga,
+ SVGA3dStreamOutputId soid,
+ uint32 numOutputStreamEntries,
+ uint32 numOutputStreamStrides,
+ uint32 streamStrides[SVGA3D_DX_MAX_SOTARGETS],
+ const SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS],
+ uint32 rasterizedStream,
+ struct svga_stream_output *streamout)
+{
+ unsigned i;
+
+ SVGA_DBG(DEBUG_STREAMOUT, "%s: id=%d\n", __FUNCTION__, soid);
+ SVGA_DBG(DEBUG_STREAMOUT,
+ "numOutputStreamEntires=%d\n", numOutputStreamEntries);
+
+ for (i = 0; i < numOutputStreamEntries; i++) {
+ SVGA_DBG(DEBUG_STREAMOUT,
+ " %d: slot=%d regIdx=%d regMask=0x%x stream=%d\n",
+ i, decls[i].outputSlot, decls[i].registerIndex,
+ decls[i].registerMask, decls[i].stream);
+ }
+
+ SVGA_DBG(DEBUG_STREAMOUT,
+ "numOutputStreamStrides=%d\n", numOutputStreamStrides);
+ for (i = 0; i < numOutputStreamStrides; i++) {
+ SVGA_DBG(DEBUG_STREAMOUT, " %d ", streamStrides[i]);
+ }
+ SVGA_DBG(DEBUG_STREAMOUT, "\n");
+
+ if (svga_have_sm5(svga) &&
+ (numOutputStreamEntries > SVGA3D_MAX_DX10_STREAMOUT_DECLS ||
+ numOutputStreamStrides > 1)) {
+ unsigned bufSize = sizeof(SVGA3dStreamOutputDeclarationEntry)
+ * numOutputStreamEntries;
+ struct svga_winsys_buffer *declBuf;
+ struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
+ void *map;
+
+ declBuf = svga_winsys_buffer_create(svga, 1, SVGA_BUFFER_USAGE_PINNED,
+ bufSize);
+ if (!declBuf)
+ return PIPE_ERROR;
+ map = sws->buffer_map(sws, declBuf, PIPE_TRANSFER_WRITE);
+ if (!map) {
+ sws->buffer_destroy(sws, declBuf);
+ return PIPE_ERROR;
+ }
+
+ /* copy decls to buffer */
+ memcpy(map, decls, bufSize);
+
+ /* unmap buffer */
+ sws->buffer_unmap(sws, declBuf);
+ streamout->declBuf = declBuf;
+
+ SVGA_RETRY(svga, SVGA3D_sm5_DefineAndBindStreamOutput
+ (svga->swc, soid,
+ numOutputStreamEntries,
+ numOutputStreamStrides,
+ streamStrides,
+ streamout->declBuf,
+ rasterizedStream,
+ bufSize));
+ } else {
+ SVGA_RETRY(svga, SVGA3D_vgpu10_DefineStreamOutput(svga->swc, soid,
+ numOutputStreamEntries,
+ streamStrides,
+ decls));
+ }
+
+ return PIPE_OK;
+}
+
+
+/**
+ * Creates stream output from the stream output info.
+ */
struct svga_stream_output *
svga_create_stream_output(struct svga_context *svga,
struct svga_shader *shader,
@@ -52,9 +135,13 @@ svga_create_stream_output(struct svga_context *svga,
struct svga_stream_output *streamout;
SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS];
unsigned strides[SVGA3D_DX_MAX_SOTARGETS];
+ unsigned dstOffset[SVGA3D_DX_MAX_SOTARGETS];
+ unsigned numStreamStrides = 0;
+ unsigned numDecls;
unsigned i;
enum pipe_error ret;
unsigned id;
+ ASSERTED unsigned maxDecls;
assert(info->num_outputs <= PIPE_MAX_SO_OUTPUTS);
@@ -64,7 +151,12 @@ svga_create_stream_output(struct svga_context *svga,
if (!svga_have_vgpu10(svga))
return NULL;
- assert(info->num_outputs <= SVGA3D_MAX_STREAMOUT_DECLS);
+ if (svga_have_sm5(svga))
+ maxDecls = SVGA3D_MAX_STREAMOUT_DECLS;
+ else if (svga_have_vgpu10(svga))
+ maxDecls = SVGA3D_MAX_DX10_STREAMOUT_DECLS;
+
+ assert(info->num_outputs <= maxDecls);
/* Allocate an integer ID for the stream output */
id = util_bitmask_add(svga->stream_output_id_bm);
@@ -81,15 +173,17 @@ svga_create_stream_output(struct svga_context *svga,
streamout->info = *info;
streamout->id = id;
streamout->pos_out_index = -1;
+ streamout->streammask = 0;
- SVGA_DBG(DEBUG_STREAMOUT, "%s, num_outputs=%d id=%d\n", __FUNCTION__,
- info->num_outputs, id);
-
- /* init whole decls and stride arrays to zero to avoid garbage values */
+ /* Init whole decls and stride arrays to zero to avoid garbage values */
memset(decls, 0, sizeof(decls));
memset(strides, 0, sizeof(strides));
+ memset(dstOffset, 0, sizeof(dstOffset));
+
+ SVGA_DBG(DEBUG_STREAMOUT, "%s: num_outputs\n",
+ __FUNCTION__, info->num_outputs);
- for (i = 0; i < info->num_outputs; i++) {
+ for (i = 0, numDecls = 0; i < info->num_outputs; i++, numDecls++) {
unsigned reg_idx = info->output[i].register_index;
unsigned buf_idx = info->output[i].output_buffer;
const enum tgsi_semantic sem_name =
@@ -97,17 +191,59 @@ svga_create_stream_output(struct svga_context *svga,
assert(buf_idx <= PIPE_MAX_SO_BUFFERS);
+ numStreamStrides = MAX2(numStreamStrides, buf_idx);
+
+ SVGA_DBG(DEBUG_STREAMOUT,
+ " %d: register_index=%d output_buffer=%d stream=%d\n",
+ i, reg_idx, buf_idx, info->output[i].stream);
+
+ SVGA_DBG(DEBUG_STREAMOUT,
+ " dst_offset=%d start_component=%d num_components=%d\n",
+ info->output[i].dst_offset,
+ info->output[i].start_component,
+ info->output[i].num_components);
+
+ streamout->buffer_stream |= info->output[i].stream << (buf_idx * 4);
+
+ /**
+ * Check if the destination offset of the current output
+ * is at the expected offset. If it is greater, then that means
+ * there is a gap in the stream output. We need to insert
+ * extra declaration entries with an invalid register index
+ * to specify a gap.
+ */
+ while (info->output[i].dst_offset > dstOffset[buf_idx]) {
+
+ unsigned numComponents = info->output[i].dst_offset -
+ dstOffset[buf_idx];;
+
+ assert(svga_have_sm5(svga));
+
+ /* We can only specify at most 4 components to skip in each
+ * declaration entry.
+ */
+ numComponents = numComponents > 4 ? 4 : numComponents;
+
+ decls[numDecls].outputSlot = buf_idx,
+ decls[numDecls].stream = info->output[i].stream;
+ decls[numDecls].registerIndex = SVGA3D_INVALID_ID;
+ decls[numDecls].registerMask = (1 << numComponents) - 1;
+
+ dstOffset[buf_idx] += numComponents;
+ numDecls++;
+ }
+
if (sem_name == TGSI_SEMANTIC_POSITION) {
/**
* Check if streaming out POSITION. If so, replace the
* register index with the index for NON_ADJUSTED POSITION.
*/
- decls[i].registerIndex = shader->info.num_outputs;
+ decls[numDecls].registerIndex = shader->info.num_outputs;
/* Save this output index, so we can tell later if this stream output
* includes an output of a vertex position
*/
- streamout->pos_out_index = i;
+ streamout->pos_out_index = numDecls;
}
else if (sem_name == TGSI_SEMANTIC_CLIPDIST) {
/**
@@ -116,44 +252,49 @@ svga_create_stream_output(struct svga_context *svga,
* It's valid to write to ClipDistance variable for non-enabled
* clip planes.
*/
- decls[i].registerIndex = shader->info.num_outputs + 1 +
- shader->info.output_semantic_index[reg_idx];
+ decls[numDecls].registerIndex =
+ shader->info.num_outputs + 1 +
+ shader->info.output_semantic_index[reg_idx];
}
else {
- decls[i].registerIndex = reg_idx;
+ decls[numDecls].registerIndex = reg_idx;
}
- decls[i].outputSlot = buf_idx;
- decls[i].registerMask =
+ decls[numDecls].outputSlot = buf_idx;
+ decls[numDecls].registerMask =
((1 << info->output[i].num_components) - 1)
<< info->output[i].start_component;
- SVGA_DBG(DEBUG_STREAMOUT, "%d slot=%d regIdx=%d regMask=0x%x\n",
- i, decls[i].outputSlot, decls[i].registerIndex,
- decls[i].registerMask);
+ decls[numDecls].stream = info->output[i].stream;
+ assert(decls[numDecls].stream == 0 || svga_have_sm5(svga));
+
+ /* Set the bit in streammask for the enabled stream */
+ streamout->streammask |= 1 << info->output[i].stream;
+
+ /* Update the expected offset for the next output */
+ dstOffset[buf_idx] += info->output[i].num_components;
strides[buf_idx] = info->stride[buf_idx] * sizeof(float);
}
- ret = SVGA3D_vgpu10_DefineStreamOutput(svga->swc, id,
- info->num_outputs,
- strides,
- decls);
+ assert(numDecls <= maxDecls);
+
+ /* Send the DefineStreamOutput command.
+ * Note, rasterizedStream is always 0.
+ */
+ ret = svga_define_stream_output(svga, id,
+ numDecls, numStreamStrides+1,
+ strides, decls, 0, streamout);
+
if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
- ret = SVGA3D_vgpu10_DefineStreamOutput(svga->swc, id,
- info->num_outputs,
- strides,
- decls);
- if (ret != PIPE_OK) {
- util_bitmask_clear(svga->stream_output_id_bm, id);
- FREE(streamout);
- streamout = NULL;
- }
+ util_bitmask_clear(svga->stream_output_id_bm, id);
+ FREE(streamout);
+ streamout = NULL;
}
return streamout;
}
+
enum pipe_error
svga_set_stream_output(struct svga_context *svga,
struct svga_stream_output *streamout)
@@ -168,12 +309,28 @@ svga_set_stream_output(struct svga_context *svga,
streamout, id);
if (svga->current_so != streamout) {
+
+ /* Before unbinding the current stream output, stop the stream output
+ * statistics queries for the active streams.
+ */
+ if (svga_have_sm5(svga) && svga->current_so) {
+ svga->vcount_buffer_stream = svga->current_so->buffer_stream;
+ svga_end_stream_output_queries(svga, svga->current_so->streammask);
+ }
+
enum pipe_error ret = SVGA3D_vgpu10_SetStreamOutput(svga->swc, id);
if (ret != PIPE_OK) {
return ret;
}
svga->current_so = streamout;
+
+ /* After binding the new stream output, start the stream output
+ * statistics queries for the active streams.
+ */
+ if (svga_have_sm5(svga) && svga->current_so) {
+ svga_begin_stream_output_queries(svga, svga->current_so->streammask);
+ }
}
return PIPE_OK;
@@ -183,17 +340,18 @@ void
svga_delete_stream_output(struct svga_context *svga,
struct svga_stream_output *streamout)
{
- enum pipe_error ret;
+ struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x\n", __FUNCTION__, streamout);
assert(svga_have_vgpu10(svga));
assert(streamout != NULL);
- ret = SVGA3D_vgpu10_DestroyStreamOutput(svga->swc, streamout->id);
- if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
- ret = SVGA3D_vgpu10_DestroyStreamOutput(svga->swc, streamout->id);
+ SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyStreamOutput(svga->swc,
+ streamout->id));
+
+ if (svga_have_sm5(svga) && streamout->declBuf) {
+ sws->buffer_destroy(sws, streamout->declBuf);
}
/* Release the ID */
@@ -203,6 +361,7 @@ svga_delete_stream_output(struct svga_context *svga,
FREE(streamout);
}
+
static struct pipe_stream_output_target *
svga_create_stream_output_target(struct pipe_context *pipe,
struct pipe_resource *buffer,
@@ -252,9 +411,9 @@ svga_set_stream_output_targets(struct pipe_context *pipe,
{
struct svga_context *svga = svga_context(pipe);
struct SVGA3dSoTarget soBindings[SVGA3D_DX_MAX_SOTARGETS];
- enum pipe_error ret;
unsigned i;
unsigned num_so_targets;
+ boolean begin_so_queries = num_targets > 0;
SVGA_DBG(DEBUG_STREAMOUT, "%s num_targets=%d\n", __FUNCTION__,
num_targets);
@@ -269,6 +428,14 @@ svga_set_stream_output_targets(struct pipe_context *pipe,
sbuf->dirty = TRUE;
}
+ /* Before the currently bound streamout targets are unbound,
+ * save them in case they need to be referenced to retrieve the
+ * number of vertices being streamed out.
+ */
+ for (i = 0; i < ARRAY_SIZE(svga->so_targets); i++) {
+ svga->vcount_so_targets[i] = svga->so_targets[i];
+ }
+
assert(num_targets <= SVGA3D_DX_MAX_SOTARGETS);
for (i = 0; i < num_targets; i++) {
@@ -283,7 +450,16 @@ svga_set_stream_output_targets(struct pipe_context *pipe,
& SVGA3D_SURFACE_BIND_STREAM_OUTPUT);
svga->so_targets[i] = &sot->base;
- soBindings[i].offset = sot->base.buffer_offset;
+ if (offsets[i] == -1) {
+ soBindings[i].offset = -1;
+
+ /* The streamout is being resumed. There is no need to restart streamout statistics
+ * queries for the draw-auto fallback since those queries are still active.
+ */
+ begin_so_queries = FALSE;
+ }
+ else
+ soBindings[i].offset = sot->base.buffer_offset + offsets[i];
/* The size cannot extend beyond the end of the buffer. Clamp it. */
size = MIN2(sot->base.buffer_size,
@@ -299,15 +475,22 @@ svga_set_stream_output_targets(struct pipe_context *pipe,
}
num_so_targets = MAX2(svga->num_so_targets, num_targets);
- ret = SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets,
- soBindings, svga->so_surfaces);
- if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
- ret = SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets,
- soBindings, svga->so_surfaces);
- }
-
+ SVGA_RETRY(svga, SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets,
+ soBindings, svga->so_surfaces));
svga->num_so_targets = num_targets;
+
+ if (svga_have_sm5(svga) && svga->current_so && begin_so_queries) {
+
+ /* If there are aleady active queries and we need to start a new streamout,
+ * we need to stop the current active queries first.
+ */
+ if (svga->in_streamout) {
+ svga_end_stream_output_queries(svga, svga->current_so->streammask);
+ }
+
+ /* Start stream out statistics queries for the new streamout */
+ svga_begin_stream_output_queries(svga, svga->current_so->streammask);
+ }
}
/**
@@ -329,6 +512,7 @@ svga_rebind_stream_output_targets(struct svga_context *svga)
return PIPE_OK;
}
+
void
svga_init_stream_output_functions(struct svga_context *svga)
{
@@ -336,3 +520,117 @@ svga_init_stream_output_functions(struct svga_context *svga)
svga->pipe.stream_output_target_destroy = svga_destroy_stream_output_target;
svga->pipe.set_stream_output_targets = svga_set_stream_output_targets;
}
+
+
+/**
+ * A helper function to create stream output statistics queries for each stream.
+ * These queries are created as a workaround for DrawTransformFeedbackInstanced or
+ * DrawTransformFeedbackStreamInstanced when auto draw doesn't support
+ * instancing or non-0 stream. In this case, the vertex count will
+ * be retrieved from the stream output statistics query.
+ */
+void
+svga_create_stream_output_queries(struct svga_context *svga)
+{
+ unsigned i;
+
+ if (!svga_have_sm5(svga))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {
+ svga->so_queries[i] = svga->pipe.create_query(&svga->pipe,
+ PIPE_QUERY_SO_STATISTICS, i);
+ assert(svga->so_queries[i] != NULL);
+ }
+}
+
+
+/**
+ * Destroy the stream output statistics queries for the draw-auto workaround.
+ */
+void
+svga_destroy_stream_output_queries(struct svga_context *svga)
+{
+ unsigned i;
+
+ if (!svga_have_sm5(svga))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {
+ svga->pipe.destroy_query(&svga->pipe, svga->so_queries[i]);
+ }
+}
+
+
+/**
+ * Start stream output statistics queries for the active streams.
+ */
+void
+svga_begin_stream_output_queries(struct svga_context *svga,
+ unsigned streammask)
+{
+ assert(svga_have_sm5(svga));
+ assert(!svga->in_streamout);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {
+ bool ret;
+ if (streammask & (1 << i)) {
+ ret = svga->pipe.begin_query(&svga->pipe, svga->so_queries[i]);
+ }
+ (void) ret;
+ }
+ svga->in_streamout = TRUE;
+
+ return;
+}
+
+
+/**
+ * Stop stream output statistics queries for the active streams.
+ */
+void
+svga_end_stream_output_queries(struct svga_context *svga,
+ unsigned streammask)
+{
+ assert(svga_have_sm5(svga));
+
+ if (!svga->in_streamout)
+ return;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {
+ bool ret;
+ if (streammask & (1 << i)) {
+ ret = svga->pipe.end_query(&svga->pipe, svga->so_queries[i]);
+ }
+ (void) ret;
+ }
+ svga->in_streamout = FALSE;
+
+ return;
+}
+
+
+/**
+ * Return the primitive count returned from the stream output statistics query
+ * for the specified stream.
+ */
+unsigned
+svga_get_primcount_from_stream_output(struct svga_context *svga,
+ unsigned stream)
+{
+ unsigned primcount = 0;
+ union pipe_query_result result;
+ bool ret;
+
+ if (svga->current_so) {
+ svga_end_stream_output_queries(svga, svga->current_so->streammask);
+ }
+
+ ret = svga->pipe.get_query_result(&svga->pipe,
+ svga->so_queries[stream],
+ TRUE, &result);
+ if (ret)
+ primcount = result.so_statistics.num_primitives_written;
+
+ return primcount;
+}
diff --git a/src/gallium/drivers/svga/svga_pipe_ts.c b/src/gallium/drivers/svga/svga_pipe_ts.c
new file mode 100644
index 00000000000..12a3bf486b7
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_ts.c
@@ -0,0 +1,219 @@
+/**********************************************************
+ * Copyright 2018-2020 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_context.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "svga_context.h"
+#include "svga_shader.h"
+
+static void
+svga_set_tess_state(struct pipe_context *pipe,
+ const float default_outer_level[4],
+ const float default_inner_level[2])
+{
+ struct svga_context *svga = svga_context(pipe);
+ unsigned i;
+
+ for (i = 0; i < 4; i++) {
+ svga->curr.default_tesslevels[i] = default_outer_level[i];
+ }
+ for (i = 0; i < 2; i++) {
+ svga->curr.default_tesslevels[i + 4] = default_inner_level[i];
+ }
+}
+
+
+static void *
+svga_create_tcs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_tcs_shader *tcs;
+
+ tcs = CALLOC_STRUCT(svga_tcs_shader);
+ if (!tcs)
+ return NULL;
+
+ SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_CREATETCS);
+
+ tcs->base.tokens = tgsi_dup_tokens(templ->tokens);
+
+ /* Collect basic info that we'll need later:
+ */
+ tgsi_scan_shader(tcs->base.tokens, &tcs->base.info);
+
+ tcs->base.id = svga->debug.shader_id++;
+
+ tcs->generic_outputs = svga_get_generic_outputs_mask(&tcs->base.info);
+
+ SVGA_STATS_TIME_POP(svga_sws(svga));
+ return tcs;
+}
+
+
+static void
+svga_bind_tcs_state(struct pipe_context *pipe, void *shader)
+{
+ struct svga_tcs_shader *tcs = (struct svga_tcs_shader *) shader;
+ struct svga_context *svga = svga_context(pipe);
+
+ if (tcs == svga->curr.tcs)
+ return;
+
+ svga->curr.tcs = tcs;
+ svga->dirty |= SVGA_NEW_TCS;
+}
+
+
+static void
+svga_delete_tcs_state(struct pipe_context *pipe, void *shader)
+{
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_tcs_shader *tcs = (struct svga_tcs_shader *) shader;
+ struct svga_tcs_shader *next_tcs;
+ struct svga_shader_variant *variant, *tmp;
+
+ svga_hwtnl_flush_retry(svga);
+
+ assert(tcs->base.parent == NULL);
+
+ while (tcs) {
+ next_tcs = (struct svga_tcs_shader *)tcs->base.next;
+ for (variant = tcs->base.variants; variant; variant = tmp) {
+ tmp = variant->next;
+
+ /* Check if deleting currently bound shader */
+ if (variant == svga->state.hw_draw.tcs) {
+ SVGA_RETRY(svga, svga_set_shader(svga, SVGA3D_SHADERTYPE_HS, NULL));
+ svga->state.hw_draw.tcs = NULL;
+ }
+
+ svga_destroy_shader_variant(svga, variant);
+ }
+
+ FREE((void *)tcs->base.tokens);
+ FREE(tcs);
+ tcs = next_tcs;
+ }
+}
+
+
+void
+svga_cleanup_tcs_state(struct svga_context *svga)
+{
+ if (svga->tcs.passthrough_tcs) {
+ svga_delete_tcs_state(&svga->pipe, svga->tcs.passthrough_tcs);
+ }
+}
+
+
+static void *
+svga_create_tes_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_tes_shader *tes;
+
+ tes = CALLOC_STRUCT(svga_tes_shader);
+ if (!tes)
+ return NULL;
+
+ SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_CREATETES);
+
+ tes->base.tokens = tgsi_dup_tokens(templ->tokens);
+
+ /* Collect basic info that we'll need later:
+ */
+ tgsi_scan_shader(tes->base.tokens, &tes->base.info);
+
+ tes->base.id = svga->debug.shader_id++;
+
+ tes->generic_inputs = svga_get_generic_inputs_mask(&tes->base.info);
+
+ SVGA_STATS_TIME_POP(svga_sws(svga));
+ return tes;
+}
+
+
+static void
+svga_bind_tes_state(struct pipe_context *pipe, void *shader)
+{
+ struct svga_tes_shader *tes = (struct svga_tes_shader *) shader;
+ struct svga_context *svga = svga_context(pipe);
+
+ if (tes == svga->curr.tes)
+ return;
+
+ svga->curr.tes = tes;
+ svga->dirty |= SVGA_NEW_TES;
+}
+
+
+static void
+svga_delete_tes_state(struct pipe_context *pipe, void *shader)
+{
+ struct svga_context *svga = svga_context(pipe);
+ struct svga_tes_shader *tes = (struct svga_tes_shader *) shader;
+ struct svga_tes_shader *next_tes;
+ struct svga_shader_variant *variant, *tmp;
+
+ svga_hwtnl_flush_retry(svga);
+
+ assert(tes->base.parent == NULL);
+
+ while (tes) {
+ next_tes = (struct svga_tes_shader *)tes->base.next;
+ for (variant = tes->base.variants; variant; variant = tmp) {
+ tmp = variant->next;
+
+ /* Check if deleting currently bound shader */
+ if (variant == svga->state.hw_draw.tes) {
+ SVGA_RETRY(svga, svga_set_shader(svga, SVGA3D_SHADERTYPE_DS, NULL));
+ svga->state.hw_draw.tes = NULL;
+ }
+
+ svga_destroy_shader_variant(svga, variant);
+ }
+
+ FREE((void *)tes->base.tokens);
+ FREE(tes);
+ tes = next_tes;
+ }
+}
+
+
+void
+svga_init_ts_functions(struct svga_context *svga)
+{
+ svga->pipe.set_tess_state = svga_set_tess_state;
+ svga->pipe.create_tcs_state = svga_create_tcs_state;
+ svga->pipe.bind_tcs_state = svga_bind_tcs_state;
+ svga->pipe.delete_tcs_state = svga_delete_tcs_state;
+ svga->pipe.create_tes_state = svga_create_tes_state;
+ svga->pipe.bind_tes_state = svga_bind_tes_state;
+ svga->pipe.delete_tes_state = svga_delete_tes_state;
+}
diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c
index a475e000f2e..aa7396c2c6b 100644
--- a/src/gallium/drivers/svga/svga_pipe_vs.c
+++ b/src/gallium/drivers/svga/svga_pipe_vs.c
@@ -166,6 +166,7 @@ svga_delete_vs_state(struct pipe_context *pipe, void *shader)
{
struct svga_context *svga = svga_context(pipe);
struct svga_vertex_shader *vs = (struct svga_vertex_shader *)shader;
+ struct svga_vertex_shader *next_vs;
struct svga_shader_variant *variant, *tmp;
enum pipe_error ret;
@@ -173,37 +174,42 @@ svga_delete_vs_state(struct pipe_context *pipe, void *shader)
assert(vs->base.parent == NULL);
- /* Check if there is a generated geometry shader to go with this
- * vertex shader. If there is, then delete the geometry shader as well.
- */
- if (vs->gs != NULL) {
- svga->pipe.delete_gs_state(&svga->pipe, vs->gs);
- }
+ while (vs) {
+ next_vs = (struct svga_vertex_shader *)vs->base.next;
- if (vs->base.stream_output != NULL)
- svga_delete_stream_output(svga, vs->base.stream_output);
+ /* Check if there is a generated geometry shader to go with this
+ * vertex shader. If there is, then delete the geometry shader as well.
+ */
+ if (vs->gs != NULL) {
+ svga->pipe.delete_gs_state(&svga->pipe, vs->gs);
+ }
- draw_delete_vertex_shader(svga->swtnl.draw, vs->draw_shader);
+ if (vs->base.stream_output != NULL)
+ svga_delete_stream_output(svga, vs->base.stream_output);
- for (variant = vs->base.variants; variant; variant = tmp) {
- tmp = variant->next;
+ draw_delete_vertex_shader(svga->swtnl.draw, vs->draw_shader);
- /* Check if deleting currently bound shader */
- if (variant == svga->state.hw_draw.vs) {
- ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL);
- if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
+ for (variant = vs->base.variants; variant; variant = tmp) {
+ tmp = variant->next;
+
+ /* Check if deleting currently bound shader */
+ if (variant == svga->state.hw_draw.vs) {
ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL);
- assert(ret == PIPE_OK);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_VS, NULL);
+ assert(ret == PIPE_OK);
+ }
+ svga->state.hw_draw.vs = NULL;
}
- svga->state.hw_draw.vs = NULL;
+
+ svga_destroy_shader_variant(svga, variant);
}
- svga_destroy_shader_variant(svga, variant);
+ FREE((void *)vs->base.tokens);
+ FREE(vs);
+ vs = next_vs;
}
-
- FREE((void *)vs->base.tokens);
- FREE(vs);
}
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c
index 6629a8cc14d..4f19b8ca035 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer.c
@@ -53,7 +53,8 @@ svga_buffer_needs_hw_storage(const struct svga_screen *ss,
const struct pipe_resource *template)
{
unsigned bind_mask = (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT);
+ PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT |
+ PIPE_BIND_SHADER_BUFFER | PIPE_BIND_COMMAND_ARGS_BUFFER);
if (ss->sws->have_vgpu10) {
/*
@@ -478,6 +479,9 @@ svga_buffer_create(struct pipe_screen *screen,
*/
bind_flags |= (PIPE_BIND_VERTEX_BUFFER |
PIPE_BIND_INDEX_BUFFER);
+
+ /* It may be used for shader resource as well. */
+ bind_flags |= PIPE_BIND_SAMPLER_VIEW;
}
if (svga_buffer_create_host_surface(ss, sbuf, bind_flags) != PIPE_OK)
diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
index 2e9ca060059..5d2b934e7c1 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
@@ -175,6 +175,11 @@ svga_buffer_create_host_surface(struct svga_screen *ss,
if (bind_flags & PIPE_BIND_SAMPLER_VIEW)
sbuf->key.flags |= SVGA3D_SURFACE_BIND_SHADER_RESOURCE;
+ if (bind_flags & PIPE_BIND_COMMAND_ARGS_BUFFER) {
+ assert(ss->sws->have_sm5);
+ sbuf->key.flags |= SVGA3D_SURFACE_DRAWINDIRECT_ARGS;
+ }
+
if (!bind_flags && sbuf->b.b.usage == PIPE_USAGE_STAGING) {
/* This surface is to be used with the
* SVGA3D_CMD_DX_TRANSFER_FROM_BUFFER command, and no other
diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c
index 2aa4e52faa7..1bae8c39595 100644
--- a/src/gallium/drivers/svga/svga_resource_texture.c
+++ b/src/gallium/drivers/svga/svga_resource_texture.c
@@ -133,26 +133,25 @@ svga_transfer_dma(struct svga_context *svga,
}
}
else {
- int y, h, y_max;
+ int y, h, srcy;
unsigned blockheight =
util_format_get_blockheight(st->base.resource->format);
h = st->hw_nblocksy * blockheight;
- y_max = st->box.y + st->box.h;
+ srcy = 0;
- for (y = st->box.y; y < y_max; y += h) {
+ for (y = 0; y < st->box.h; y += h) {
unsigned offset, length;
void *hw, *sw;
- if (y + h > y_max)
- h = y_max - y;
+ if (y + h > st->box.h)
+ h = st->box.h - y;
/* Transfer band must be aligned to pixel block boundaries */
assert(y % blockheight == 0);
assert(h % blockheight == 0);
- /* First band starts at the top of the SW buffer. */
- offset = (y - st->box.y) * st->base.stride / blockheight;
+ offset = y * st->base.stride / blockheight;
length = h * st->base.stride / blockheight;
sw = (uint8_t *) st->swbuf + offset;
@@ -160,9 +159,9 @@ svga_transfer_dma(struct svga_context *svga,
if (transfer == SVGA3D_WRITE_HOST_VRAM) {
unsigned usage = PIPE_TRANSFER_WRITE;
- /* Don't write to an in-flight DMA buffer. Synchronize or
- * discard in-flight storage. */
- if (y != st->box.y) {
+ /* Wait for the previous DMAs to complete */
+ /* TODO: keep one DMA (at half the size) in the background */
+ if (y) {
svga_context_flush(svga, NULL);
usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
}
@@ -178,7 +177,7 @@ svga_transfer_dma(struct svga_context *svga,
svga_transfer_dma_band(svga, st, transfer,
st->box.x, y, st->box.z,
st->box.w, h, st->box.d,
- 0, 0, 0, flags);
+ 0, srcy, 0, flags);
/*
* Prevent the texture contents to be discarded on the next band
@@ -488,6 +487,18 @@ svga_texture_transfer_map_direct(struct svga_context *svga,
svga_context_flush(svga, NULL);
}
+ if (map && rebind) {
+ enum pipe_error ret;
+
+ ret = SVGA3D_BindGBSurface(swc, surf);
+ if (ret != PIPE_OK) {
+ svga_context_flush(svga, NULL);
+ ret = SVGA3D_BindGBSurface(swc, surf);
+ assert(ret == PIPE_OK);
+ }
+ svga_context_flush(svga, NULL);
+ }
+
/*
* Make sure we return NULL if the map fails
*/
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index 2975bfefdfa..f7e3a900290 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -49,6 +49,10 @@
/* NOTE: this constant may get moved into a svga3d*.h header file */
#define SVGA3D_DX_MAX_RESOURCE_SIZE (128 * 1024 * 1024)
+#ifndef MESA_GIT_SHA1
+#define MESA_GIT_SHA1 "(unknown git revision)"
+#endif
+
#ifdef DEBUG
int SVGA_DEBUG = 0;
@@ -249,7 +253,8 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
12 /* 2048x2048 */);
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
- return sws->have_vgpu10 ? SVGA3D_MAX_SURFACE_ARRAYSIZE : 0;
+ return sws->have_sm5 ? SVGA3D_SM5_MAX_SURFACE_ARRAYSIZE :
+ (sws->have_vgpu10 ? SVGA3D_SM4_MAX_SURFACE_ARRAYSIZE : 0);
case PIPE_CAP_BLEND_EQUATION_SEPARATE: /* req. for GL 1.5 */
return 1;
@@ -266,7 +271,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
return 1; /* The color outputs of vertex shaders are not clamped */
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
- return 0; /* The driver can't clamp vertex colors */
+ return sws->have_vgpu10;
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
return 0; /* The driver can't clamp fragment colors */
@@ -274,10 +279,16 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
return 1; /* expected for GL_ARB_framebuffer_object */
case PIPE_CAP_GLSL_FEATURE_LEVEL:
- return sws->have_vgpu10 ? 330 : 120;
+ if (sws->have_sm5) {
+ return 410;
+ } else if (sws->have_vgpu10) {
+ return 330;
+ } else {
+ return 120;
+ }
case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
- return sws->have_vgpu10 ? 330 : 120;
+ return sws->have_sm5 ? 410 : (sws->have_vgpu10 ? 330 : 120);
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE:
@@ -303,10 +314,12 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
return sws->have_vgpu10 ? 4 : 0;
case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
- return sws->have_vgpu10 ? SVGA3D_MAX_STREAMOUT_DECLS : 0;
+ return sws->have_sm5 ? SVGA3D_MAX_STREAMOUT_DECLS :
+ (sws->have_vgpu10 ? SVGA3D_MAX_DX10_STREAMOUT_DECLS : 0);
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ return sws->have_sm5;
case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
- return 0;
+ return sws->have_sm5;
case PIPE_CAP_TEXTURE_MULTISAMPLE:
return svgascreen->ms_samples ? 1 : 0;
@@ -350,7 +363,16 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
return sws->have_sm4_1;
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
- return sws->have_sm4_1 ? 1 : 0; /* only single-channel textures */
+ /* SM4_1 supports only single-channel textures where as SM5 supports
+ * all four channel textures */
+ return sws->have_sm5 ? 4 :
+ (sws->have_sm4_1 ? 1 : 0);
+ case PIPE_CAP_DRAW_INDIRECT:
+ return sws->have_sm5;
+ case PIPE_CAP_MAX_VERTEX_STREAMS:
+ return sws->have_sm5 ? 4 : 0;
+ case PIPE_CAP_COMPUTE:
+ return 0;
case PIPE_CAP_MAX_VARYINGS:
return sws->have_vgpu10 ? VGPU10_MAX_FS_INPUTS : 10;
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
@@ -362,9 +384,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_SHADER_STENCIL_EXPORT:
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
case PIPE_CAP_TEXTURE_BARRIER:
- case PIPE_CAP_MAX_VERTEX_STREAMS:
case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
- case PIPE_CAP_COMPUTE:
case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
@@ -372,7 +392,6 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
- case PIPE_CAP_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
@@ -410,7 +429,10 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
return 2048;
case PIPE_CAP_MAX_VIEWPORTS:
- return 1;
+ assert((!sws->have_vgpu10 && svgascreen->max_viewports == 1) ||
+ (sws->have_vgpu10 &&
+ svgascreen->max_viewports == SVGA3D_DX_MAX_VIEWPORTS));
+ return svgascreen->max_viewports;
case PIPE_CAP_ENDIANNESS:
return PIPE_ENDIAN_LITTLE;
@@ -427,10 +449,11 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
return sws->have_vgpu10;
case PIPE_CAP_CLEAR_TEXTURE:
return sws->have_vgpu10;
+ case PIPE_CAP_DOUBLES:
+ return sws->have_sm5;
case PIPE_CAP_UMA:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
- case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
@@ -453,7 +476,6 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
case PIPE_CAP_FBFETCH:
case PIPE_CAP_TGSI_MUL_ZERO_WINS:
- case PIPE_CAP_DOUBLES:
case PIPE_CAP_INT64:
case PIPE_CAP_INT64_DIVMOD:
case PIPE_CAP_TGSI_TEX_TXF_LZ:
@@ -487,6 +509,9 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
return 32;
case PIPE_CAP_MAX_SHADER_BUFFER_SIZE:
return 1 << 27;
+ /* Verify this once protocol is finalized. Setting it to minimum value. */
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ return sws->have_sm5 ? 30 : 0;
default:
return u_pipe_screen_get_param_defaults(screen, param);
}
@@ -674,12 +699,12 @@ vgpu10_get_shader_param(struct pipe_screen *screen,
assert(sws->have_vgpu10);
(void) sws; /* silence unused var warnings in non-debug builds */
- /* Only VS, GS, FS supported */
- if (shader != PIPE_SHADER_VERTEX &&
- shader != PIPE_SHADER_GEOMETRY &&
- shader != PIPE_SHADER_FRAGMENT) {
+ if ((!sws->have_sm5) &&
+ (shader == PIPE_SHADER_TESS_CTRL || shader == PIPE_SHADER_TESS_EVAL))
+ return 0;
+
+ if (shader == PIPE_SHADER_COMPUTE)
return 0;
- }
/* NOTE: we do not query the device for any caps/limits at this time */
@@ -697,6 +722,10 @@ vgpu10_get_shader_param(struct pipe_screen *screen,
return VGPU10_MAX_FS_INPUTS;
else if (shader == PIPE_SHADER_GEOMETRY)
return VGPU10_MAX_GS_INPUTS;
+ else if (shader == PIPE_SHADER_TESS_CTRL)
+ return VGPU11_MAX_HS_INPUTS;
+ else if (shader == PIPE_SHADER_TESS_EVAL)
+ return VGPU11_MAX_DS_INPUT_CONTROL_POINTS;
else
return VGPU10_MAX_VS_INPUTS;
case PIPE_SHADER_CAP_MAX_OUTPUTS:
@@ -704,6 +733,10 @@ vgpu10_get_shader_param(struct pipe_screen *screen,
return VGPU10_MAX_FS_OUTPUTS;
else if (shader == PIPE_SHADER_GEOMETRY)
return VGPU10_MAX_GS_OUTPUTS;
+ else if (shader == PIPE_SHADER_TESS_CTRL)
+ return VGPU11_MAX_HS_OUTPUTS;
+ else if (shader == PIPE_SHADER_TESS_EVAL)
+ return VGPU11_MAX_DS_OUTPUTS;
else
return VGPU10_MAX_VS_OUTPUTS;
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
@@ -844,6 +877,8 @@ svga_get_driver_query_info(struct pipe_screen *screen,
PIPE_DRIVER_QUERY_TYPE_UINT64),
QUERY("num-bytes-uploaded", SVGA_QUERY_NUM_BYTES_UPLOADED,
PIPE_DRIVER_QUERY_TYPE_BYTES),
+ QUERY("num-command-buffers", SVGA_QUERY_NUM_COMMAND_BUFFERS,
+ PIPE_DRIVER_QUERY_TYPE_UINT64),
QUERY("command-buffer-size", SVGA_QUERY_COMMAND_BUFFER_SIZE,
PIPE_DRIVER_QUERY_TYPE_BYTES),
QUERY("flush-time", SVGA_QUERY_FLUSH_TIME,
@@ -860,6 +895,10 @@ svga_get_driver_query_info(struct pipe_screen *screen,
PIPE_DRIVER_QUERY_TYPE_UINT64),
QUERY("num-const-updates", SVGA_QUERY_NUM_CONST_UPDATES,
PIPE_DRIVER_QUERY_TYPE_UINT64),
+ QUERY("num-shader-relocations", SVGA_QUERY_NUM_SHADER_RELOCATIONS,
+ PIPE_DRIVER_QUERY_TYPE_UINT64),
+ QUERY("num-surface-relocations", SVGA_QUERY_NUM_SURFACE_RELOCATIONS,
+ PIPE_DRIVER_QUERY_TYPE_UINT64),
/* running total counters */
QUERY("memory-used", SVGA_QUERY_MEMORY_USED,
@@ -878,6 +917,8 @@ svga_get_driver_query_info(struct pipe_screen *screen,
PIPE_DRIVER_QUERY_TYPE_UINT64),
QUERY("num-commands-per-draw", SVGA_QUERY_NUM_COMMANDS_PER_DRAW,
PIPE_DRIVER_QUERY_TYPE_FLOAT),
+ QUERY("shader-mem-used", SVGA_QUERY_SHADER_MEM_USED,
+ PIPE_DRIVER_QUERY_TYPE_UINT64),
};
#undef QUERY
@@ -1012,9 +1053,10 @@ svga_screen_create(struct svga_winsys_screen *sws)
goto error2;
}
- debug_printf("%s enabled = %u\n",
- sws->have_sm4_1 ? "SM4_1" : "VGPU10",
- sws->have_sm4_1 ? 1 : sws->have_vgpu10);
+ debug_printf("%s enabled\n",
+ sws->have_sm5 ? "SM5" :
+ sws->have_sm4_1 ? "SM4_1" :
+ sws->have_vgpu10 ? "VGPU10" : "VGPU9");
debug_printf("Mesa: %s %s (%s)\n", svga_get_name(screen),
PACKAGE_VERSION, MESA_GIT_SHA1);
@@ -1081,13 +1123,23 @@ svga_screen_create(struct svga_winsys_screen *sws)
svgascreen->ms_samples |= 1 << 3;
}
+ if (sws->have_sm5 && debug_get_bool_option("SVGA_MSAA", TRUE)) {
+ if (get_bool_cap(sws, SVGA3D_DEVCAP_MULTISAMPLE_8X, FALSE))
+ svgascreen->ms_samples |= 1 << 7;
+ }
+
/* Maximum number of constant buffers */
svgascreen->max_const_buffers =
get_uint_cap(sws, SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS, 1);
svgascreen->max_const_buffers = MIN2(svgascreen->max_const_buffers,
SVGA_MAX_CONST_BUFS);
+ svgascreen->haveBlendLogicops =
+ get_bool_cap(sws, SVGA3D_DEVCAP_LOGIC_BLENDOPS, FALSE);
+
screen->is_format_supported = svga_is_dx_format_supported;
+
+ svgascreen->max_viewports = SVGA3D_DX_MAX_VIEWPORTS;
}
else {
/* VGPU9 */
@@ -1122,6 +1174,9 @@ svga_screen_create(struct svga_winsys_screen *sws)
/* No multisampling */
svgascreen->ms_samples = 0;
+
+ /* Only one viewport */
+ svgascreen->max_viewports = 1;
}
/* common VGPU9 / VGPU10 caps */
diff --git a/src/gallium/drivers/svga/svga_screen.h b/src/gallium/drivers/svga/svga_screen.h
index 12b93468da2..aa0001b11e5 100644
--- a/src/gallium/drivers/svga/svga_screen.h
+++ b/src/gallium/drivers/svga/svga_screen.h
@@ -50,10 +50,13 @@ struct svga_screen
/** Device caps */
boolean haveProvokingVertex;
boolean haveLineStipple, haveLineSmooth;
+ boolean haveBlendLogicops;
float maxLineWidth, maxLineWidthAA;
float maxPointSize;
+ float pointSmoothThreshold; /** Disable point AA for sizes less than this */
unsigned max_color_buffers;
unsigned max_const_buffers;
+ unsigned max_viewports;
unsigned ms_samples;
struct {
diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c
index b5bcd51a7fc..a0e5f5ff2b9 100644
--- a/src/gallium/drivers/svga/svga_screen_cache.c
+++ b/src/gallium/drivers/svga/svga_screen_cache.c
@@ -311,6 +311,9 @@ svga_screen_cache_add(struct svga_screen *svgascreen,
}
+/* Maximum number of invalidate surface commands in a command buffer */
+# define SVGA_MAX_SURFACE_TO_INVALIDATE 1000
+
/**
* Called during the screen flush to move all buffers not in a validate list
* into the unused list.
@@ -354,6 +357,7 @@ svga_screen_cache_flush(struct svga_screen *svgascreen,
next = curr->next;
}
+ unsigned nsurf = 0;
curr = cache->validated.next;
next = curr->next;
while (curr != &cache->validated) {
@@ -381,12 +385,14 @@ svga_screen_cache_flush(struct svga_screen *svgascreen,
* this function itself is called inside svga_context_flush().
*/
svga->swc->flush(svga->swc, NULL);
+ nsurf = 0;
ret = SVGA3D_InvalidateGBSurface(svga->swc, entry->handle);
assert(ret == PIPE_OK);
}
/* add the entry to the invalidated list */
list_add(&entry->head, &cache->invalidated);
+ nsurf++;
}
curr = next;
@@ -394,6 +400,16 @@ svga_screen_cache_flush(struct svga_screen *svgascreen,
}
mtx_unlock(&cache->mutex);
+
+ /**
+ * In some rare cases (when running ARK survival), we hit the max number
+ * of surface relocations with invalidated surfaces during context flush.
+ * So if the number of invalidated surface exceeds a certain limit (1000),
+ * we'll do another winsys flush.
+ */
+ if (nsurf > SVGA_MAX_SURFACE_TO_INVALIDATE) {
+ svga->swc->flush(svga->swc, NULL);
+ }
}
diff --git a/src/gallium/drivers/svga/svga_shader.c b/src/gallium/drivers/svga/svga_shader.c
index 3a7516945c6..52f1153fd61 100644
--- a/src/gallium/drivers/svga/svga_shader.c
+++ b/src/gallium/drivers/svga/svga_shader.c
@@ -229,22 +229,25 @@ static const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = {
*/
void
svga_init_shader_key_common(const struct svga_context *svga,
- enum pipe_shader_type shader,
+ enum pipe_shader_type shader_type,
+ const struct svga_shader *shader,
struct svga_compile_key *key)
{
unsigned i, idx = 0;
- assert(shader < ARRAY_SIZE(svga->curr.num_sampler_views));
+ assert(shader_type < ARRAY_SIZE(svga->curr.num_sampler_views));
/* In case the number of samplers and sampler_views doesn't match,
* loop over the lower of the two counts.
*/
- key->num_textures = MAX2(svga->curr.num_sampler_views[shader],
- svga->curr.num_samplers[shader]);
+ key->num_textures = MAX2(svga->curr.num_sampler_views[shader_type],
+ svga->curr.num_samplers[shader_type]);
for (i = 0; i < key->num_textures; i++) {
- struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
- const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i];
+ struct pipe_sampler_view *view = svga->curr.sampler_views[shader_type][i];
+ const struct svga_sampler_state
+ *sampler = svga->curr.sampler[shader_type][i];
+
if (view) {
assert(view->texture);
assert(view->texture->target < (1 << 4)); /* texture_target:4 */
@@ -304,6 +307,12 @@ svga_init_shader_key_common(const struct svga_context *svga,
if (view->texture->format == PIPE_FORMAT_DXT1_RGB ||
view->texture->format == PIPE_FORMAT_DXT1_SRGB)
swizzle_tab = set_alpha;
+
+ /* Save the compare function as we need to handle
+ * depth compare in the shader.
+ */
+ key->tex[i].compare_mode = sampler->compare_mode;
+ key->tex[i].compare_func = sampler->compare_func;
}
key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
@@ -314,8 +323,10 @@ svga_init_shader_key_common(const struct svga_context *svga,
if (sampler) {
if (!sampler->normalized_coords) {
- assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */
- key->tex[i].width_height_idx = idx++;
+ if (view) {
+ assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */
+ key->tex[i].width_height_idx = idx++;
+ }
key->tex[i].unnormalized = TRUE;
++key->num_unnormalized_coords;
@@ -326,6 +337,9 @@ svga_init_shader_key_common(const struct svga_context *svga,
}
}
}
+
+ key->clamp_vertex_color = svga->curr.rast ?
+ svga->curr.rast->templ.clamp_vertex_color : 0;
}
@@ -380,6 +394,8 @@ define_gb_shader_vgpu9(struct svga_context *svga,
variant->gb_shader = sws->shader_create(sws, variant->type,
variant->tokens, codeLen);
+ svga->hud.shader_mem_used += codeLen;
+
if (!variant->gb_shader)
return PIPE_ERROR_OUT_OF_MEMORY;
@@ -398,6 +414,7 @@ define_gb_shader_vgpu10(struct svga_context *svga,
{
struct svga_winsys_context *swc = svga->swc;
enum pipe_error ret;
+ unsigned len = codeLen + variant->signatureLen;
/**
* Shaders in VGPU10 enabled device reside in the device COTable.
@@ -412,7 +429,11 @@ define_gb_shader_vgpu10(struct svga_context *svga,
/* Create gb memory for the shader and upload the shader code */
variant->gb_shader = swc->shader_create(swc,
variant->id, variant->type,
- variant->tokens, codeLen);
+ variant->tokens, codeLen,
+ variant->signature,
+ variant->signatureLen);
+
+ svga->hud.shader_mem_used += len;
if (!variant->gb_shader) {
/* Free the shader ID */
@@ -429,7 +450,8 @@ define_gb_shader_vgpu10(struct svga_context *svga,
* the shader creation and return an error.
*/
ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
- variant->id, variant->type, codeLen);
+ variant->id, variant->type,
+ len);
if (ret != PIPE_OK)
goto fail;
@@ -511,7 +533,10 @@ svga_set_shader(struct svga_context *svga,
assert(type == SVGA3D_SHADERTYPE_VS ||
type == SVGA3D_SHADERTYPE_GS ||
- type == SVGA3D_SHADERTYPE_PS);
+ type == SVGA3D_SHADERTYPE_PS ||
+ type == SVGA3D_SHADERTYPE_HS ||
+ type == SVGA3D_SHADERTYPE_DS ||
+ type == SVGA3D_SHADERTYPE_CS);
if (svga_have_gb_objects(svga)) {
struct svga_winsys_gb_shader *gbshader =
@@ -533,7 +558,27 @@ svga_set_shader(struct svga_context *svga,
struct svga_shader_variant *
svga_new_shader_variant(struct svga_context *svga, enum pipe_shader_type type)
{
- struct svga_shader_variant *variant = CALLOC_STRUCT(svga_shader_variant);
+ struct svga_shader_variant *variant;
+
+ switch (type) {
+ case PIPE_SHADER_FRAGMENT:
+ variant = CALLOC(1, sizeof(struct svga_fs_variant));
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ variant = CALLOC(1, sizeof(struct svga_gs_variant));
+ break;
+ case PIPE_SHADER_VERTEX:
+ variant = CALLOC(1, sizeof(struct svga_vs_variant));
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ variant = CALLOC(1, sizeof(struct svga_tes_variant));
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ variant = CALLOC(1, sizeof(struct svga_tcs_variant));
+ break;
+ default:
+ return NULL;
+ }
if (variant) {
variant->type = svga_shader_type(type);
@@ -547,19 +592,11 @@ void
svga_destroy_shader_variant(struct svga_context *svga,
struct svga_shader_variant *variant)
{
- enum pipe_error ret = PIPE_OK;
-
if (svga_have_gb_objects(svga) && variant->gb_shader) {
if (svga_have_vgpu10(svga)) {
struct svga_winsys_context *swc = svga->swc;
swc->shader_destroy(swc, variant->gb_shader);
- ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
- if (ret != PIPE_OK) {
- /* flush and try again */
- svga_context_flush(svga, NULL);
- ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id);
- assert(ret == PIPE_OK);
- }
+ SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id));
util_bitmask_clear(svga->shader_id_bm, variant->id);
}
else {
@@ -570,17 +607,13 @@ svga_destroy_shader_variant(struct svga_context *svga,
}
else {
if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
- ret = SVGA3D_DestroyShader(svga->swc, variant->id, variant->type);
- if (ret != PIPE_OK) {
- /* flush and try again */
- svga_context_flush(svga, NULL);
- ret = SVGA3D_DestroyShader(svga->swc, variant->id, variant->type);
- assert(ret == PIPE_OK);
- }
+ SVGA_RETRY(svga, SVGA3D_DestroyShader(svga->swc, variant->id,
+ variant->type));
util_bitmask_clear(svga->shader_id_bm, variant->id);
}
}
+ FREE(variant->signature);
FREE((unsigned *)variant->tokens);
FREE(variant);
@@ -612,6 +645,8 @@ svga_rebind_shaders(struct svga_context *svga)
svga->rebind.flags.vs = 0;
svga->rebind.flags.gs = 0;
svga->rebind.flags.fs = 0;
+ svga->rebind.flags.tcs = 0;
+ svga->rebind.flags.tes = 0;
return PIPE_OK;
}
@@ -637,5 +672,19 @@ svga_rebind_shaders(struct svga_context *svga)
}
svga->rebind.flags.fs = 0;
+ if (svga->rebind.flags.tcs && hw->tcs && hw->tcs->gb_shader) {
+ ret = swc->resource_rebind(swc, NULL, hw->tcs->gb_shader, SVGA_RELOC_READ);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+ svga->rebind.flags.tcs = 0;
+
+ if (svga->rebind.flags.tes && hw->tes && hw->tes->gb_shader) {
+ ret = swc->resource_rebind(swc, NULL, hw->tes->gb_shader, SVGA_RELOC_READ);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+ svga->rebind.flags.tes = 0;
+
return PIPE_OK;
}
diff --git a/src/gallium/drivers/svga/svga_shader.h b/src/gallium/drivers/svga/svga_shader.h
index 67f6b5aeb63..31ccf97d39a 100644
--- a/src/gallium/drivers/svga/svga_shader.h
+++ b/src/gallium/drivers/svga/svga_shader.h
@@ -68,6 +68,8 @@ struct svga_compile_key
unsigned need_prescale:1;
unsigned writes_psize:1;
unsigned wide_point:1;
+ unsigned writes_viewport_index:1;
+ unsigned num_prescale:5;
} gs;
/* fragment shader only */
@@ -83,15 +85,42 @@ struct svga_compile_key
unsigned alpha_func:4; /**< SVGA3D_CMP_x */
unsigned write_color0_to_n_cbufs:4;
unsigned aa_point:1;
+ unsigned layer_to_zero:1;
int aa_point_coord_index;
float alpha_ref;
} fs;
+ /* tessellation control shader */
+ struct {
+ unsigned vertices_per_patch:8;
+ enum pipe_prim_type prim_mode:8;
+ enum pipe_tess_spacing spacing:3;
+ unsigned vertices_order_cw:1;
+ unsigned point_mode:1;
+ unsigned passthrough:1;
+ } tcs;
+
+ /* tessellation evaluation shader */
+ struct {
+ unsigned vertices_per_patch:8;
+ unsigned tessfactor_index:8;
+ unsigned need_prescale:1;
+ unsigned need_tessouter:1;
+ unsigned need_tessinner:1;
+ } tes;
+
+ /* compute shader */
+ struct {
+ unsigned grid_size[3];
+ } cs;
+
/* any shader type */
int8_t generic_remap_table[MAX_GENERIC_VARYING];
unsigned num_textures:8;
unsigned num_unnormalized_coords:8;
unsigned clip_plane_enable:PIPE_MAX_CLIP_PLANES;
+ unsigned last_vertex_stage:1;
+ unsigned clamp_vertex_color:1;
unsigned sprite_origin_lower_left:1;
uint16_t sprite_coord_enable;
struct {
@@ -121,6 +150,10 @@ struct svga_token_key {
unsigned writes_psize:1;
unsigned aa_point:1;
} gs;
+ struct {
+ unsigned write_position:1;
+ } vs;
+ unsigned dynamic_indexing:1;
};
/**
@@ -143,6 +176,10 @@ struct svga_shader_variant
const unsigned *tokens;
unsigned nr_tokens;
+ /* shader signature */
+ unsigned signatureLen;
+ SVGA3dDXShaderSignatureHeader *signature;
+
/** Per-context shader identifier used with SVGA_3D_CMD_SHADER_DEFINE,
* SVGA_3D_CMD_SET_SHADER and SVGA_3D_CMD_SHADER_DESTROY.
*/
@@ -154,6 +191,18 @@ struct svga_shader_variant
/* GB object buffer containing the bytecode */
struct svga_winsys_gb_shader *gb_shader;
+ /** Next variant */
+ struct svga_shader_variant *next;
+};
+
+
+/**
+ * Shader variant for fragment shader
+ */
+struct svga_fs_variant
+{
+ struct svga_shader_variant base;
+
boolean uses_flat_interp; /** TRUE if flat interpolation qualifier is
* applied to any of the varyings.
*/
@@ -168,9 +217,56 @@ struct svga_shader_variant
/** For FS-based polygon stipple */
unsigned pstipple_sampler_unit;
+};
- /** Next variant */
- struct svga_shader_variant *next;
+
+/**
+ * Shader variant for geometry shader
+ */
+struct svga_gs_variant
+{
+ struct svga_shader_variant base;
+};
+
+
+/**
+ * Shader variant for vertex shader
+ */
+struct svga_vs_variant
+{
+ struct svga_shader_variant base;
+};
+
+
+/**
+ * Shader variant for tessellation evaluation shader
+ */
+struct svga_tes_variant
+{
+ struct svga_shader_variant base;
+
+ enum pipe_prim_type prim_mode:8;
+ enum pipe_tess_spacing spacing:3;
+ unsigned vertices_order_cw:1;
+ unsigned point_mode:1;
+};
+
+
+/**
+ * Shader variant for tessellation control shader
+ */
+struct svga_tcs_variant
+{
+ struct svga_shader_variant base;
+};
+
+
+/**
+ * Shader variant for compute shader
+ */
+struct svga_cs_variant
+{
+ struct svga_shader_variant base;
};
@@ -237,6 +333,30 @@ struct svga_geometry_shader
};
+struct svga_tcs_shader
+{
+ struct svga_shader base;
+
+ /** Mask of which generic varying variables are written by this shader */
+ uint64_t generic_outputs;
+};
+
+
+struct svga_tes_shader
+{
+ struct svga_shader base;
+
+ /** Mask of which generic varying variables are written by this shader */
+ uint64_t generic_inputs;
+};
+
+
+struct svga_compute_shader
+{
+ struct svga_shader base;
+};
+
+
static inline boolean
svga_compile_keys_equal(const struct svga_compile_key *a,
const struct svga_compile_key *b)
@@ -264,7 +384,8 @@ svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
void
svga_init_shader_key_common(const struct svga_context *svga,
- enum pipe_shader_type shader,
+ enum pipe_shader_type shader_type,
+ const struct svga_shader *shader,
struct svga_compile_key *key);
struct svga_shader_variant *
@@ -328,6 +449,12 @@ svga_shader_type(enum pipe_shader_type shader)
return SVGA3D_SHADERTYPE_GS;
case PIPE_SHADER_FRAGMENT:
return SVGA3D_SHADERTYPE_PS;
+ case PIPE_SHADER_TESS_CTRL:
+ return SVGA3D_SHADERTYPE_HS;
+ case PIPE_SHADER_TESS_EVAL:
+ return SVGA3D_SHADERTYPE_DS;
+ case PIPE_SHADER_COMPUTE:
+ return SVGA3D_SHADERTYPE_CS;
default:
assert(!"Invalid shader type");
return SVGA3D_SHADERTYPE_VS;
@@ -351,4 +478,39 @@ svga_have_gs_streamout(const struct svga_context *svga)
}
+static inline struct svga_fs_variant *
+svga_fs_variant(struct svga_shader_variant *variant)
+{
+ assert(!variant || variant->type == SVGA3D_SHADERTYPE_PS);
+ return (struct svga_fs_variant *)variant;
+}
+
+
+static inline struct svga_tes_variant *
+svga_tes_variant(struct svga_shader_variant *variant)
+{
+ assert(!variant || variant->type == SVGA3D_SHADERTYPE_DS);
+ return (struct svga_tes_variant *)variant;
+}
+
+
+static inline struct svga_cs_variant *
+svga_cs_variant(struct svga_shader_variant *variant)
+{
+ assert(!variant || variant->type == SVGA3D_SHADERTYPE_CS);
+ return (struct svga_cs_variant *)variant;
+}
+
+
+/* Returns TRUE if we are currently using flat shading.
+ */
+static inline boolean
+svga_is_using_flat_shading(const struct svga_context *svga)
+{
+ return
+ svga->state.hw_draw.fs ?
+ svga_fs_variant(svga->state.hw_draw.fs)->uses_flat_interp : FALSE;
+}
+
+
#endif /* SVGA_SHADER_H */
diff --git a/src/gallium/drivers/svga/svga_state.c b/src/gallium/drivers/svga/svga_state.c
index dad78389a23..ad647d8784c 100644
--- a/src/gallium/drivers/svga/svga_state.c
+++ b/src/gallium/drivers/svga/svga_state.c
@@ -60,19 +60,40 @@ static const struct svga_tracked_state *hw_clear_state[] =
};
-/* Atoms to update hardware state prior to emitting a draw packet.
+/**
+ * Atoms to update hardware state prior to emitting a draw packet
+ * for VGPU9 device.
*/
-static const struct svga_tracked_state *hw_draw_state[] =
+static const struct svga_tracked_state *hw_draw_state_vgpu9[] =
+{
+ &svga_hw_fs,
+ &svga_hw_vs,
+ &svga_hw_rss,
+ &svga_hw_tss,
+ &svga_hw_tss_binding,
+ &svga_hw_clip_planes,
+ &svga_hw_vdecl,
+ &svga_hw_fs_constants,
+ &svga_hw_vs_constants,
+ NULL
+};
+
+
+/**
+ * Atoms to update hardware state prior to emitting a draw packet
+ * for VGPU10 device.
+ * Geometry Shader is new to VGPU10.
+ * TSS and TSS bindings are replaced by sampler and sampler bindings.
+ */
+static const struct svga_tracked_state *hw_draw_state_vgpu10[] =
{
&svga_need_tgsi_transform,
&svga_hw_fs,
&svga_hw_gs,
&svga_hw_vs,
&svga_hw_rss,
- &svga_hw_sampler, /* VGPU10 */
- &svga_hw_sampler_bindings, /* VGPU10 */
- &svga_hw_tss, /* pre-VGPU10 */
- &svga_hw_tss_binding, /* pre-VGPU10 */
+ &svga_hw_sampler,
+ &svga_hw_sampler_bindings,
&svga_hw_clip_planes,
&svga_hw_vdecl,
&svga_hw_fs_constants,
@@ -82,6 +103,33 @@ static const struct svga_tracked_state *hw_draw_state[] =
};
+/**
+ * Atoms to update hardware state prior to emitting a draw packet
+ * for SM5 device.
+ * TCS and TES Shaders are new to SM5 device.
+ */
+static const struct svga_tracked_state *hw_draw_state_sm5[] =
+{
+ &svga_need_tgsi_transform,
+ &svga_hw_fs,
+ &svga_hw_gs,
+ &svga_hw_tes,
+ &svga_hw_tcs,
+ &svga_hw_vs,
+ &svga_hw_rss,
+ &svga_hw_sampler,
+ &svga_hw_sampler_bindings,
+ &svga_hw_clip_planes,
+ &svga_hw_vdecl,
+ &svga_hw_fs_constants,
+ &svga_hw_gs_constants,
+ &svga_hw_tes_constants,
+ &svga_hw_tcs_constants,
+ &svga_hw_vs_constants,
+ NULL
+};
+
+
static const struct svga_tracked_state *swtnl_draw_state[] =
{
&svga_update_swtnl_draw,
@@ -89,6 +137,7 @@ static const struct svga_tracked_state *swtnl_draw_state[] =
NULL
};
+
/* Flattens the graph of state dependencies. Could swap the positions
* of hw_clear_state and need_swtnl_state without breaking anything.
*/
@@ -96,27 +145,26 @@ static const struct svga_tracked_state **state_levels[] =
{
need_swtnl_state,
hw_clear_state,
- hw_draw_state,
+ NULL, /* hw_draw_state, to be set to the right version */
swtnl_draw_state
};
-
-static unsigned
-check_state(unsigned a, unsigned b)
+static uint64_t
+check_state(uint64_t a, uint64_t b)
{
return (a & b);
}
static void
-accumulate_state(unsigned *a, unsigned b)
+accumulate_state(uint64_t *a, uint64_t b)
{
*a |= b;
}
static void
-xor_states(unsigned *result, unsigned a, unsigned b)
+xor_states(uint64_t *result, uint64_t a, uint64_t b)
{
*result = a ^ b;
}
@@ -125,7 +173,7 @@ xor_states(unsigned *result, unsigned a, unsigned b)
static enum pipe_error
update_state(struct svga_context *svga,
const struct svga_tracked_state *atoms[],
- unsigned *state)
+ uint64_t *state)
{
#ifdef DEBUG
boolean debug = TRUE;
@@ -144,13 +192,13 @@ update_state(struct svga_context *svga,
* state flags which are generated and checked to help ensure
* state atoms are ordered correctly in the list.
*/
- unsigned examined, prev;
+ uint64_t examined, prev;
examined = 0;
prev = *state;
for (i = 0; atoms[i] != NULL; i++) {
- unsigned generated;
+ uint64_t generated;
assert(atoms[i]->dirty);
assert(atoms[i]->update);
@@ -247,12 +295,7 @@ svga_update_state_retry(struct svga_context *svga, unsigned max_level)
{
enum pipe_error ret;
- ret = svga_update_state( svga, max_level );
-
- if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
- ret = svga_update_state( svga, max_level );
- }
+ SVGA_RETRY_OOM(svga, ret, svga_update_state( svga, max_level ));
return ret == PIPE_OK;
}
@@ -325,3 +368,14 @@ svga_emit_initial_state(struct svga_context *svga)
return PIPE_OK;
}
}
+
+
+void
+svga_init_tracked_state(struct svga_context *svga)
+{
+ /* Set the hw_draw_state atom list to the one for the particular gpu version.
+ */
+ state_levels[2] = svga_have_sm5(svga) ? hw_draw_state_sm5 :
+ (svga_have_vgpu10(svga) ? hw_draw_state_vgpu10 :
+ hw_draw_state_vgpu9);
+}
diff --git a/src/gallium/drivers/svga/svga_state.h b/src/gallium/drivers/svga/svga_state.h
index 963a27941ba..76befebe4a3 100644
--- a/src/gallium/drivers/svga/svga_state.h
+++ b/src/gallium/drivers/svga/svga_state.h
@@ -39,8 +39,8 @@ void svga_destroy_state( struct svga_context *svga );
struct svga_tracked_state {
const char *name;
- unsigned dirty;
- enum pipe_error (*update)( struct svga_context *svga, unsigned dirty );
+ uint64_t dirty;
+ enum pipe_error (*update)( struct svga_context *svga, uint64_t dirty );
};
/* NEED_SWTNL
@@ -61,6 +61,8 @@ extern struct svga_tracked_state svga_need_tgsi_transform;
extern struct svga_tracked_state svga_hw_vs;
extern struct svga_tracked_state svga_hw_fs;
extern struct svga_tracked_state svga_hw_gs;
+extern struct svga_tracked_state svga_hw_tcs;
+extern struct svga_tracked_state svga_hw_tes;
extern struct svga_tracked_state svga_hw_rss;
extern struct svga_tracked_state svga_hw_pstipple;
extern struct svga_tracked_state svga_hw_sampler;
@@ -72,6 +74,8 @@ extern struct svga_tracked_state svga_hw_vdecl;
extern struct svga_tracked_state svga_hw_fs_constants;
extern struct svga_tracked_state svga_hw_gs_constants;
extern struct svga_tracked_state svga_hw_vs_constants;
+extern struct svga_tracked_state svga_hw_tes_constants;
+extern struct svga_tracked_state svga_hw_tcs_constants;
/* SWTNL_DRAW
*/
@@ -105,4 +109,15 @@ enum pipe_error svga_reemit_vs_bindings(struct svga_context *svga);
enum pipe_error svga_reemit_fs_bindings(struct svga_context *svga);
+void svga_init_tracked_state(struct svga_context *svga);
+
+void *
+svga_create_fs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ);
+
+void
+svga_bind_fs_state(struct pipe_context *pipe, void *shader);
+
+bool svga_update_compute_state(struct svga_context *svga);
+
#endif
diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c
index e2c5bf0163a..9d9f8934ec4 100644
--- a/src/gallium/drivers/svga/svga_state_constants.c
+++ b/src/gallium/drivers/svga/svga_state_constants.c
@@ -133,12 +133,13 @@ svga_get_extra_fs_constants(const struct svga_context *svga, float *dest)
* will be returned in 'dest'.
*/
static unsigned
-svga_get_prescale_constants(const struct svga_context *svga, float **dest)
+svga_get_prescale_constants(const struct svga_context *svga, float **dest,
+ const struct svga_prescale *prescale)
{
- memcpy(*dest, svga->state.hw_clear.prescale.scale, 4 * sizeof(float));
+ memcpy(*dest, prescale->scale, 4 * sizeof(float));
*dest += 4;
- memcpy(*dest, svga->state.hw_clear.prescale.translate, 4 * sizeof(float));
+ memcpy(*dest, prescale->translate, 4 * sizeof(float));
*dest += 4;
return 2;
@@ -153,8 +154,8 @@ svga_get_pt_sprite_constants(const struct svga_context *svga, float **dest)
const struct svga_screen *screen = svga_screen(svga->pipe.screen);
float *dst = *dest;
- dst[0] = 1.0 / (svga->curr.viewport.scale[0] * 2);
- dst[1] = 1.0 / (svga->curr.viewport.scale[1] * 2);
+ dst[0] = 1.0 / (svga->curr.viewport[0].scale[0] * 2);
+ dst[1] = 1.0 / (svga->curr.viewport[0].scale[1] * 2);
dst[2] = svga->curr.rast->pointsize;
dst[3] = screen->maxPointSize;
*dest = *dest + 4;
@@ -186,6 +187,7 @@ svga_get_clip_plane_constants(const struct svga_context *svga,
return count;
}
+
/**
* Emit any extra vertex shader constants into the buffer pointed
* to by 'dest'.
@@ -203,15 +205,16 @@ svga_get_extra_vs_constants(const struct svga_context *svga, float *dest)
/* SVGA_NEW_VS_VARIANT
*/
if (variant->key.vs.need_prescale) {
- count += svga_get_prescale_constants(svga, &dest);
+ count += svga_get_prescale_constants(svga, &dest,
+ &svga->state.hw_clear.prescale[0]);
}
if (variant->key.vs.undo_viewport) {
/* Used to convert window coords back to NDC coords */
- dest[0] = 1.0f / svga->curr.viewport.scale[0];
- dest[1] = 1.0f / svga->curr.viewport.scale[1];
- dest[2] = -svga->curr.viewport.translate[0];
- dest[3] = -svga->curr.viewport.translate[1];
+ dest[0] = 1.0f / svga->curr.viewport[0].scale[0];
+ dest[1] = 1.0f / svga->curr.viewport[0].scale[1];
+ dest[2] = -svga->curr.viewport[0].translate[0];
+ dest[3] = -svga->curr.viewport[0].translate[1];
dest += 4;
count += 1;
}
@@ -250,7 +253,20 @@ svga_get_extra_gs_constants(const struct svga_context *svga, float *dest)
}
if (variant->key.gs.need_prescale) {
- count += svga_get_prescale_constants(svga, &dest);
+ unsigned i, num_prescale = 1;
+
+ /* If prescale is needed and the geometry shader writes to viewport
+ * index, then prescale for all viewports will be added to the
+ * constant buffer.
+ */
+ if (variant->key.gs.writes_viewport_index)
+ num_prescale = svga->state.hw_clear.num_prescale;
+
+ for (i = 0; i < num_prescale; i++) {
+ count +=
+ svga_get_prescale_constants(svga, &dest,
+ &svga->state.hw_clear.prescale[i]);
+ }
}
/* SVGA_NEW_CLIP */
@@ -265,6 +281,77 @@ svga_get_extra_gs_constants(const struct svga_context *svga, float *dest)
}
+/**
+ * Emit any extra tessellation control shader constants into the
+ * buffer pointed to by 'dest'.
+ */
+static unsigned
+svga_get_extra_tcs_constants(struct svga_context *svga, float *dest)
+{
+ const struct svga_shader_variant *variant = svga->state.hw_draw.tcs;
+ unsigned count = 0;
+
+ /* SVGA_NEW_CLIP */
+ count += svga_get_clip_plane_constants(svga, variant, &dest);
+
+ /* common constants */
+ count += svga_get_extra_constants_common(svga, variant,
+ PIPE_SHADER_TESS_CTRL,
+ dest);
+
+ assert(count <= MAX_EXTRA_CONSTS);
+ return count;
+}
+
+
+/**
+ * Emit any extra tessellation evaluation shader constants into
+ * the buffer pointed to by 'dest'.
+ */
+static unsigned
+svga_get_extra_tes_constants(struct svga_context *svga, float *dest)
+{
+ const struct svga_shader_variant *variant = svga->state.hw_draw.tes;
+ unsigned count = 0;
+
+ if (variant->key.tes.need_prescale) {
+ count += svga_get_prescale_constants(svga, &dest,
+ &svga->state.hw_clear.prescale[0]);
+ }
+
+ /* SVGA_NEW_CLIP */
+ count += svga_get_clip_plane_constants(svga, variant, &dest);
+
+ /* common constants */
+ count += svga_get_extra_constants_common(svga, variant,
+ PIPE_SHADER_TESS_EVAL,
+ dest);
+
+ assert(count <= MAX_EXTRA_CONSTS);
+ return count;
+}
+
+
+/**
+ * Emit any extra compute shader constants into
+ * the buffer pointed to by 'dest'.
+ */
+static unsigned
+svga_get_extra_cs_constants(struct svga_context *svga, float *dest)
+{
+ const struct svga_shader_variant *variant = svga->state.hw_draw.cs;
+ unsigned count = 0;
+
+ /* common constants */
+ count += svga_get_extra_constants_common(svga, variant,
+ PIPE_SHADER_COMPUTE,
+ dest);
+
+ assert(count <= MAX_EXTRA_CONSTS);
+ return count;
+}
+
+
/*
* Check and emit a range of shader constant registers, trying to coalesce
* successive shader constant updates in a single command in order to save
@@ -490,6 +577,15 @@ emit_constbuf_vgpu10(struct svga_context *svga, enum pipe_shader_type shader)
const struct svga_shader_variant *variant;
unsigned alloc_buf_size;
+ assert(shader == PIPE_SHADER_VERTEX ||
+ shader == PIPE_SHADER_GEOMETRY ||
+ shader == PIPE_SHADER_FRAGMENT ||
+ shader == PIPE_SHADER_TESS_CTRL ||
+ shader == PIPE_SHADER_TESS_EVAL ||
+ shader == PIPE_SHADER_COMPUTE);
+
+ cbuf = &svga->curr.constbufs[shader][0];
+
switch (shader) {
case PIPE_SHADER_VERTEX:
variant = svga->state.hw_draw.vs;
@@ -503,6 +599,18 @@ emit_constbuf_vgpu10(struct svga_context *svga, enum pipe_shader_type shader)
variant = svga->state.hw_draw.gs;
extra_count = svga_get_extra_gs_constants(svga, (float *) extras);
break;
+ case PIPE_SHADER_TESS_CTRL:
+ variant = svga->state.hw_draw.tcs;
+ extra_count = svga_get_extra_tcs_constants(svga, (float *) extras);
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ variant = svga->state.hw_draw.tes;
+ extra_count = svga_get_extra_tes_constants(svga, (float *) extras);
+ break;
+ case PIPE_SHADER_COMPUTE:
+ variant = svga->state.hw_draw.cs;
+ extra_count = svga_get_extra_cs_constants(svga, (float *) extras);
+ break;
default:
assert(!"Unexpected shader type");
/* Don't return an error code since we don't want to keep re-trying
@@ -706,7 +814,7 @@ emit_consts_vgpu10(struct svga_context *svga, enum pipe_shader_type shader)
}
static enum pipe_error
-emit_fs_consts(struct svga_context *svga, unsigned dirty)
+emit_fs_consts(struct svga_context *svga, uint64_t dirty)
{
const struct svga_shader_variant *variant = svga->state.hw_draw.fs;
enum pipe_error ret = PIPE_OK;
@@ -741,7 +849,7 @@ struct svga_tracked_state svga_hw_fs_constants =
static enum pipe_error
-emit_vs_consts(struct svga_context *svga, unsigned dirty)
+emit_vs_consts(struct svga_context *svga, uint64_t dirty)
{
const struct svga_shader_variant *variant = svga->state.hw_draw.vs;
enum pipe_error ret = PIPE_OK;
@@ -776,7 +884,7 @@ struct svga_tracked_state svga_hw_vs_constants =
static enum pipe_error
-emit_gs_consts(struct svga_context *svga, unsigned dirty)
+emit_gs_consts(struct svga_context *svga, uint64_t dirty)
{
const struct svga_shader_variant *variant = svga->state.hw_draw.gs;
enum pipe_error ret = PIPE_OK;
@@ -788,17 +896,17 @@ emit_gs_consts(struct svga_context *svga, unsigned dirty)
/* SVGA_NEW_GS_CONST_BUFFER
*/
- if (svga_have_vgpu10(svga)) {
- /**
- * If only the rasterizer state has changed and the current geometry
- * shader does not emit wide points, then there is no reason to
- * re-emit the GS constants, so skip it.
- */
- if (dirty == SVGA_NEW_RAST && !variant->key.gs.wide_point)
- return PIPE_OK;
+ assert(svga_have_vgpu10(svga));
- ret = emit_consts_vgpu10(svga, PIPE_SHADER_GEOMETRY);
- }
+ /**
+ * If only the rasterizer state has changed and the current geometry
+ * shader does not emit wide points, then there is no reason to
+ * re-emit the GS constants, so skip it.
+ */
+ if (dirty == SVGA_NEW_RAST && !variant->key.gs.wide_point)
+ return PIPE_OK;
+
+ ret = emit_consts_vgpu10(svga, PIPE_SHADER_GEOMETRY);
return ret;
}
@@ -814,3 +922,66 @@ struct svga_tracked_state svga_hw_gs_constants =
SVGA_NEW_TEXTURE_CONSTS),
emit_gs_consts
};
+
+
+/**
+ * Emit constant buffer for tessellation control shader
+ */
+static enum pipe_error
+emit_tcs_consts(struct svga_context *svga, uint64_t dirty)
+{
+ const struct svga_shader_variant *variant = svga->state.hw_draw.tcs;
+ enum pipe_error ret = PIPE_OK;
+
+ assert(svga_have_sm5(svga));
+
+ /* SVGA_NEW_TCS_VARIANT */
+ if (!variant)
+ return PIPE_OK;
+
+ /* SVGA_NEW_TCS_CONST_BUFFER */
+
+ ret = emit_consts_vgpu10(svga, PIPE_SHADER_TESS_CTRL);
+
+ return ret;
+}
+
+
+struct svga_tracked_state svga_hw_tcs_constants =
+{
+ "hw tcs params",
+ (SVGA_NEW_TCS_CONST_BUFFER |
+ SVGA_NEW_TCS_VARIANT),
+ emit_tcs_consts
+};
+
+
+/**
+ * Emit constant buffer for tessellation evaluation shader
+ */
+static enum pipe_error
+emit_tes_consts(struct svga_context *svga, uint64_t dirty)
+{
+ const struct svga_shader_variant *variant = svga->state.hw_draw.tes;
+ enum pipe_error ret = PIPE_OK;
+
+ assert(svga_have_sm5(svga));
+
+ /* SVGA_NEW_TES_VARIANT */
+ if (!variant)
+ return PIPE_OK;
+
+ ret = emit_consts_vgpu10(svga, PIPE_SHADER_TESS_EVAL);
+
+ return ret;
+}
+
+
+struct svga_tracked_state svga_hw_tes_constants =
+{
+ "hw tes params",
+ (SVGA_NEW_PRESCALE |
+ SVGA_NEW_TES_CONST_BUFFER |
+ SVGA_NEW_TES_VARIANT),
+ emit_tes_consts
+};
diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c
index 99fede51254..dacf86c4277 100644
--- a/src/gallium/drivers/svga/svga_state_framebuffer.c
+++ b/src/gallium/drivers/svga/svga_state_framebuffer.c
@@ -212,9 +212,13 @@ emit_fb_vgpu10(struct svga_context *svga)
if (curr->cbufs[i]) {
struct pipe_surface *s = curr->cbufs[i];
- rtv[i] = svga_validate_surface_view(svga, svga_surface(s));
- if (rtv[i] == NULL) {
- return PIPE_ERROR_OUT_OF_MEMORY;
+ if (curr->cbufs[i] != hw->cbufs[i]) {
+ rtv[i] = svga_validate_surface_view(svga, svga_surface(s));
+ if (rtv[i] == NULL) {
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+ } else {
+ rtv[i] = svga->state.hw_clear.rtv[i];
}
assert(svga_surface(rtv[i])->view_id != SVGA3D_INVALID_ID);
@@ -233,9 +237,13 @@ emit_fb_vgpu10(struct svga_context *svga)
if (curr->zsbuf) {
struct pipe_surface *s = curr->zsbuf;
- dsv = svga_validate_surface_view(svga, svga_surface(curr->zsbuf));
- if (!dsv) {
- return PIPE_ERROR_OUT_OF_MEMORY;
+ if (curr->zsbuf != hw->zsbuf) {
+ dsv = svga_validate_surface_view(svga, svga_surface(curr->zsbuf));
+ if (!dsv) {
+ return PIPE_ERROR_OUT_OF_MEMORY;
+ }
+ } else {
+ dsv = svga->state.hw_clear.dsv;
}
/* Set the rendered-to flag */
@@ -258,10 +266,6 @@ emit_fb_vgpu10(struct svga_context *svga)
/* number of render targets sent to the device, not including trailing
* unbound render targets.
*/
- svga->state.hw_clear.num_rendertargets = last_rtv + 1;
- svga->state.hw_clear.dsv = dsv;
- memcpy(svga->state.hw_clear.rtv, rtv, num_color * sizeof(rtv[0]));
-
for (i = 0; i < ss->max_color_buffers; i++) {
if (hw->cbufs[i] != curr->cbufs[i]) {
/* propagate the backed view surface before unbinding it */
@@ -270,19 +274,32 @@ emit_fb_vgpu10(struct svga_context *svga)
&svga_surface(hw->cbufs[i])->backed->base,
TRUE);
}
+ else if (svga->state.hw_clear.rtv[i] != hw->cbufs[i] &&
+ svga->state.hw_clear.rtv[i]) {
+ /* Free the alternate surface view when it is unbound. */
+ svga->pipe.surface_destroy(&svga->pipe, svga->state.hw_clear.rtv[i]);
+ }
pipe_surface_reference(&hw->cbufs[i], curr->cbufs[i]);
}
}
+ svga->state.hw_clear.num_rendertargets = last_rtv + 1;
+ memcpy(svga->state.hw_clear.rtv, rtv, num_color * sizeof(rtv[0]));
hw->nr_cbufs = curr->nr_cbufs;
if (hw->zsbuf != curr->zsbuf) {
/* propagate the backed view surface before unbinding it */
if (hw->zsbuf && svga_surface(hw->zsbuf)->backed) {
- svga_propagate_surface(svga, &svga_surface(hw->zsbuf)->backed->base,
+ svga_propagate_surface(svga,
+ &svga_surface(hw->zsbuf)->backed->base,
TRUE);
}
+ else if (svga->state.hw_clear.dsv != hw->zsbuf && svga->state.hw_clear.dsv) {
+ /* Free the alternate surface view when it is unbound. */
+ svga->pipe.surface_destroy(&svga->pipe, svga->state.hw_clear.dsv);
+ }
pipe_surface_reference(&hw->zsbuf, curr->zsbuf);
}
+ svga->state.hw_clear.dsv = dsv;
}
return ret;
@@ -290,7 +307,7 @@ emit_fb_vgpu10(struct svga_context *svga)
static enum pipe_error
-emit_framebuffer(struct svga_context *svga, unsigned dirty)
+emit_framebuffer(struct svga_context *svga, uint64_t dirty)
{
if (svga_have_vgpu10(svga)) {
return emit_fb_vgpu10(svga);
@@ -383,13 +400,14 @@ struct svga_tracked_state svga_hw_framebuffer =
/***********************************************************************
*/
-static enum pipe_error
-emit_viewport( struct svga_context *svga,
- unsigned dirty )
+static void
+get_viewport_prescale(struct svga_context *svga,
+ struct pipe_viewport_state *viewport,
+ SVGA3dViewport *vp,
+ struct svga_prescale *prescale)
{
- const struct pipe_viewport_state *viewport = &svga->curr.viewport;
- struct svga_prescale prescale;
SVGA3dRect rect;
+
/* Not sure if this state is relevant with POSITIONT. Probably
* not, but setting to 0,1 avoids some state pingponging.
*/
@@ -398,7 +416,6 @@ emit_viewport( struct svga_context *svga,
float flip = -1.0;
boolean degenerate = FALSE;
boolean invertY = FALSE;
- enum pipe_error ret;
float fb_width = (float) svga->curr.framebuffer.width;
float fb_height = (float) svga->curr.framebuffer.height;
@@ -407,9 +424,8 @@ emit_viewport( struct svga_context *svga,
float fy = flip * viewport->scale[1] * -1.0f + viewport->translate[1];
float fw = viewport->scale[0] * 2.0f;
float fh = flip * viewport->scale[1] * 2.0f;
- boolean emit_vgpu10_viewport = FALSE;
- memset( &prescale, 0, sizeof(prescale) );
+ memset(prescale, 0, sizeof(*prescale));
/* Examine gallium viewport transformation and produce a screen
* rectangle and possibly vertex shader pre-transformation to
@@ -423,14 +439,14 @@ emit_viewport( struct svga_context *svga,
fw,
fh);
- prescale.scale[0] = 1.0;
- prescale.scale[1] = 1.0;
- prescale.scale[2] = 1.0;
- prescale.scale[3] = 1.0;
- prescale.translate[0] = 0;
- prescale.translate[1] = 0;
- prescale.translate[2] = 0;
- prescale.translate[3] = 0;
+ prescale->scale[0] = 1.0;
+ prescale->scale[1] = 1.0;
+ prescale->scale[2] = 1.0;
+ prescale->scale[3] = 1.0;
+ prescale->translate[0] = 0;
+ prescale->translate[1] = 0;
+ prescale->translate[2] = 0;
+ prescale->translate[3] = 0;
/* Enable prescale to adjust vertex positions to match
VGPU10 convention only if rasterization is enabled.
@@ -439,12 +455,12 @@ emit_viewport( struct svga_context *svga,
degenerate = TRUE;
goto out;
} else {
- prescale.enabled = TRUE;
+ prescale->enabled = TRUE;
}
if (fw < 0) {
- prescale.scale[0] *= -1.0f;
- prescale.translate[0] += -fw;
+ prescale->scale[0] *= -1.0f;
+ prescale->translate[0] += -fw;
fw = -fw;
fx = viewport->scale[0] * 1.0f + viewport->translate[0];
}
@@ -452,54 +468,54 @@ emit_viewport( struct svga_context *svga,
if (fh < 0.0) {
if (svga_have_vgpu10(svga)) {
/* floating point viewport params below */
- prescale.translate[1] = fh + fy * 2.0f;
+ prescale->translate[1] = fh + fy * 2.0f;
}
else {
/* integer viewport params below */
- prescale.translate[1] = fh - 1.0f + fy * 2.0f;
+ prescale->translate[1] = fh - 1.0f + fy * 2.0f;
}
fh = -fh;
fy -= fh;
- prescale.scale[1] = -1.0f;
+ prescale->scale[1] = -1.0f;
invertY = TRUE;
}
if (fx < 0) {
- prescale.translate[0] += fx;
- prescale.scale[0] *= fw / (fw + fx);
+ prescale->translate[0] += fx;
+ prescale->scale[0] *= fw / (fw + fx);
fw += fx;
fx = 0.0f;
}
if (fy < 0) {
if (invertY) {
- prescale.translate[1] -= fy;
+ prescale->translate[1] -= fy;
}
else {
- prescale.translate[1] += fy;
+ prescale->translate[1] += fy;
}
- prescale.scale[1] *= fh / (fh + fy);
+ prescale->scale[1] *= fh / (fh + fy);
fh += fy;
fy = 0.0f;
}
if (fx + fw > fb_width) {
- prescale.scale[0] *= fw / (fb_width - fx);
- prescale.translate[0] -= fx * (fw / (fb_width - fx));
- prescale.translate[0] += fx;
+ prescale->scale[0] *= fw / (fb_width - fx);
+ prescale->translate[0] -= fx * (fw / (fb_width - fx));
+ prescale->translate[0] += fx;
fw = fb_width - fx;
}
if (fy + fh > fb_height) {
- prescale.scale[1] *= fh / (fb_height - fy);
+ prescale->scale[1] *= fh / (fb_height - fy);
if (invertY) {
float in = fb_height - fy; /* number of vp pixels inside view */
float out = fy + fh - fb_height; /* number of vp pixels out of view */
- prescale.translate[1] += fy * out / in;
+ prescale->translate[1] += fy * out / in;
}
else {
- prescale.translate[1] -= fy * (fh / (fb_height - fy));
- prescale.translate[1] += fy;
+ prescale->translate[1] -= fy * (fh / (fb_height - fy));
+ prescale->translate[1] += fy;
}
fh = fb_height - fy;
}
@@ -566,10 +582,10 @@ emit_viewport( struct svga_context *svga,
if (invertY)
adjust_y = -adjust_y;
- prescale.translate[0] += adjust_x;
- prescale.translate[1] += adjust_y;
- prescale.translate[2] = 0.5; /* D3D clip space */
- prescale.scale[2] = 0.5; /* D3D clip space */
+ prescale->translate[0] += adjust_x;
+ prescale->translate[1] += adjust_y;
+ prescale->translate[2] = 0.5; /* D3D clip space */
+ prescale->scale[2] = 0.5; /* D3D clip space */
}
range_min = viewport->scale[2] * -1.0f + viewport->translate[2];
@@ -584,7 +600,7 @@ emit_viewport( struct svga_context *svga,
range_tmp = range_min;
range_min = range_max;
range_max = range_tmp;
- prescale.scale[2] = -prescale.scale[2];
+ prescale->scale[2] = -prescale->scale[2];
}
/* If zmin is less than 0, clamp zmin to 0 and adjust the prescale.
@@ -594,21 +610,21 @@ emit_viewport( struct svga_context *svga,
if (range_min < 0.0f) {
range_min = -0.5f * viewport->scale[2] + 0.5f + viewport->translate[2];
range_max = 0.5f * viewport->scale[2] + 0.5f + viewport->translate[2];
- prescale.scale[2] *= 2.0f;
- prescale.translate[2] -= 0.5f;
+ prescale->scale[2] *= 2.0f;
+ prescale->translate[2] -= 0.5f;
}
- if (prescale.enabled) {
+ if (prescale->enabled) {
float H[2];
float J[2];
int i;
SVGA_DBG(DEBUG_VIEWPORT,
"prescale %f,%f %fx%f\n",
- prescale.translate[0],
- prescale.translate[1],
- prescale.scale[0],
- prescale.scale[1]);
+ prescale->translate[0],
+ prescale->translate[1],
+ prescale->scale[0],
+ prescale->scale[1]);
H[0] = (float)rect.w / 2.0f;
H[1] = -(float)rect.h / 2.0f;
@@ -645,16 +661,16 @@ emit_viewport( struct svga_context *svga,
* Overwrite prescale.translate with values for K:
*/
for (i = 0; i < 2; i++) {
- prescale.translate[i] = ((prescale.translate[i] +
- (prescale.scale[i] - 1.0f) * J[i]) / H[i]);
+ prescale->translate[i] = ((prescale->translate[i] +
+ (prescale->scale[i] - 1.0f) * J[i]) / H[i]);
}
SVGA_DBG(DEBUG_VIEWPORT,
"clipspace %f,%f %fx%f\n",
- prescale.translate[0],
- prescale.translate[1],
- prescale.scale[0],
- prescale.scale[1]);
+ prescale->translate[0],
+ prescale->translate[1],
+ prescale->scale[0],
+ prescale->scale[1]);
}
out:
@@ -663,59 +679,90 @@ out:
rect.y = 0;
rect.w = 1;
rect.h = 1;
- prescale.enabled = FALSE;
+ prescale->enabled = FALSE;
}
- if (!svga_rects_equal(&rect, &svga->state.hw_clear.viewport)) {
- if (svga_have_vgpu10(svga)) {
- emit_vgpu10_viewport = TRUE;
- }
- else {
+ vp->x = (float) rect.x;
+ vp->y = (float) rect.y;
+ vp->width = (float) rect.w;
+ vp->height = (float) rect.h;
+ vp->minDepth = range_min;
+ vp->maxDepth = range_max;
+}
+
+
+static enum pipe_error
+emit_viewport( struct svga_context *svga,
+ uint64_t dirty )
+{
+ struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
+ SVGA3dViewport viewports[SVGA3D_DX_MAX_VIEWPORTS];
+ struct svga_prescale prescale[SVGA3D_DX_MAX_VIEWPORTS];
+ unsigned i;
+ enum pipe_error ret;
+ unsigned max_viewports = svgascreen->max_viewports;
+
+ for (i = 0; i < max_viewports; i++) {
+ get_viewport_prescale(svga, &svga->curr.viewport[i],
+ &viewports[i], &prescale[i]);
+ }
+
+ if (memcmp(viewports, svga->state.hw_clear.viewports,
+ max_viewports * sizeof viewports[0]) != 0) {
+
+ if (!svga_have_vgpu10(svga)) {
+ SVGA3dRect rect;
+ SVGA3dViewport *vp = &viewports[0];
+
+ rect.x = (uint32)vp->x;
+ rect.y = (uint32)vp->y;
+ rect.w = (uint32)vp->width;
+ rect.h = (uint32)vp->height;
+
ret = SVGA3D_SetViewport(svga->swc, &rect);
if (ret != PIPE_OK)
return ret;
- svga->state.hw_clear.viewport = rect;
- }
- }
+ ret = SVGA3D_SetZRange(svga->swc, vp->minDepth, vp->maxDepth);
+ if (ret != PIPE_OK)
+ return ret;
- if (svga->state.hw_clear.depthrange.zmin != range_min ||
- svga->state.hw_clear.depthrange.zmax != range_max)
- {
- if (svga_have_vgpu10(svga)) {
- emit_vgpu10_viewport = TRUE;
+ svga->state.hw_clear.viewport = rect;
+ svga->state.hw_clear.depthrange.zmin = vp->minDepth;
+ svga->state.hw_clear.depthrange.zmax = vp->maxDepth;
}
else {
- ret = SVGA3D_SetZRange(svga->swc, range_min, range_max );
+ ret = SVGA3D_vgpu10_SetViewports(svga->swc, max_viewports,
+ viewports);
if (ret != PIPE_OK)
return ret;
-
- svga->state.hw_clear.depthrange.zmin = range_min;
- svga->state.hw_clear.depthrange.zmax = range_max;
}
+ memcpy(svga->state.hw_clear.viewports, viewports,
+ max_viewports * sizeof viewports[0]);
}
- if (emit_vgpu10_viewport) {
- SVGA3dViewport vp;
- vp.x = (float) rect.x;
- vp.y = (float) rect.y;
- vp.width = (float) rect.w;
- vp.height = (float) rect.h;
- vp.minDepth = range_min;
- vp.maxDepth = range_max;
- ret = SVGA3D_vgpu10_SetViewports(svga->swc, 1, &vp);
- if (ret != PIPE_OK)
- return ret;
-
- svga->state.hw_clear.viewport = rect;
-
- svga->state.hw_clear.depthrange.zmin = range_min;
- svga->state.hw_clear.depthrange.zmax = range_max;
- }
-
- if (memcmp(&prescale, &svga->state.hw_clear.prescale, sizeof prescale) != 0) {
+ if (memcmp(prescale, svga->state.hw_clear.prescale,
+ max_viewports * sizeof prescale[0]) != 0) {
svga->dirty |= SVGA_NEW_PRESCALE;
- svga->state.hw_clear.prescale = prescale;
+ memcpy(svga->state.hw_clear.prescale, prescale,
+ max_viewports * sizeof prescale[0]);
+
+ /*
+ * Determine number of unique prescales. This is to minimize the
+ * if check needed in the geometry shader to identify the prescale
+ * for the specified viewport.
+ */
+ unsigned last_prescale = SVGA3D_DX_MAX_VIEWPORTS - 1;
+ unsigned i;
+ for (i = SVGA3D_DX_MAX_VIEWPORTS-1; i > 0; i--) {
+ if (memcmp(&svga->state.hw_clear.prescale[i],
+ &svga->state.hw_clear.prescale[i-1],
+ sizeof svga->state.hw_clear.prescale[0])) {
+ break;
+ }
+ last_prescale--;
+ }
+ svga->state.hw_clear.num_prescale = last_prescale + 1;
}
return PIPE_OK;
@@ -738,32 +785,49 @@ struct svga_tracked_state svga_hw_viewport =
*/
static enum pipe_error
emit_scissor_rect( struct svga_context *svga,
- unsigned dirty )
+ uint64_t dirty )
{
- const struct pipe_scissor_state *scissor = &svga->curr.scissor;
+ struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
+ const struct pipe_scissor_state *scissor = svga->curr.scissor;
+ unsigned max_viewports = svgascreen->max_viewports;
+ enum pipe_error ret;
- if (svga_have_vgpu10(svga)) {
- SVGASignedRect rect;
+ if (memcmp(&svga->state.hw_clear.scissors[0], scissor,
+ max_viewports * sizeof *scissor) != 0) {
- rect.left = scissor->minx;
- rect.top = scissor->miny;
- rect.right = scissor->maxx;
- rect.bottom = scissor->maxy;
+ if (svga_have_vgpu10(svga)) {
+ SVGASignedRect rect[SVGA3D_DX_MAX_VIEWPORTS];
+ unsigned i;
+
+ for (i = 0; i < max_viewports; i++) {
+ rect[i].left = scissor[i].minx;
+ rect[i].top = scissor[i].miny;
+ rect[i].right = scissor[i].maxx;
+ rect[i].bottom = scissor[i].maxy;
+ }
- return SVGA3D_vgpu10_SetScissorRects(svga->swc, 1, &rect);
- }
- else {
- SVGA3dRect rect;
+ ret = SVGA3D_vgpu10_SetScissorRects(svga->swc, max_viewports, rect);
+ }
+ else {
+ SVGA3dRect rect;
- rect.x = scissor->minx;
- rect.y = scissor->miny;
- rect.w = scissor->maxx - scissor->minx; /* + 1 ?? */
- rect.h = scissor->maxy - scissor->miny; /* + 1 ?? */
+ rect.x = scissor[0].minx;
+ rect.y = scissor[0].miny;
+ rect.w = scissor[0].maxx - scissor[0].minx; /* + 1 ?? */
+ rect.h = scissor[0].maxy - scissor[0].miny; /* + 1 ?? */
- return SVGA3D_SetScissorRect(svga->swc, &rect);
+ ret = SVGA3D_SetScissorRect(svga->swc, &rect);
+ }
+
+ if (ret != PIPE_OK)
+ return ret;
+
+ memcpy(svga->state.hw_clear.scissors, scissor,
+ max_viewports * sizeof *scissor);
}
-}
+ return PIPE_OK;
+}
struct svga_tracked_state svga_hw_scissor =
{
@@ -779,7 +843,7 @@ struct svga_tracked_state svga_hw_scissor =
static enum pipe_error
emit_clip_planes( struct svga_context *svga,
- unsigned dirty )
+ uint64_t dirty )
{
unsigned i;
enum pipe_error ret;
diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
index d55a799d435..675fec96cf8 100644
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -196,8 +196,10 @@ make_fs_key(const struct svga_context *svga,
*/
if (svga->curr.gs) {
key->fs.gs_generic_outputs = svga->curr.gs->generic_outputs;
+ key->fs.layer_to_zero = !svga->curr.gs->base.info.writes_layer;
} else {
key->fs.vs_generic_outputs = svga->curr.vs->generic_outputs;
+ key->fs.layer_to_zero = 1;
}
/* Only need fragment shader fixup for twoside lighting if doing
@@ -276,7 +278,7 @@ make_fs_key(const struct svga_context *svga,
*
* SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER
*/
- svga_init_shader_key_common(svga, shader, key);
+ svga_init_shader_key_common(svga, shader, &fs->base, key);
for (i = 0; i < svga->curr.num_samplers[shader]; ++i) {
struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
@@ -317,15 +319,6 @@ make_fs_key(const struct svga_context *svga,
debug_warn_once("Unsupported shadow compare function");
}
}
- else {
- /* For other texture formats, just use the compare func/mode
- * as-is. Should be no-ops for color textures. For depth
- * textures, we do not get automatic depth compare. We have
- * to do it ourselves in the shader. And we don't get PCF.
- */
- key->tex[i].compare_mode = sampler->compare_mode;
- key->tex[i].compare_func = sampler->compare_func;
- }
}
}
}
@@ -401,22 +394,26 @@ svga_reemit_fs_bindings(struct svga_context *svga)
static enum pipe_error
-emit_hw_fs(struct svga_context *svga, unsigned dirty)
+emit_hw_fs(struct svga_context *svga, uint64_t dirty)
{
struct svga_shader_variant *variant = NULL;
enum pipe_error ret = PIPE_OK;
struct svga_fragment_shader *fs = svga->curr.fs;
struct svga_compile_key key;
+ struct svga_shader *prevShader = NULL; /* shader in the previous stage */
SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_EMITFS);
+ prevShader = svga->curr.gs ?
+ &svga->curr.gs->base : (svga->curr.tes ?
+ &svga->curr.tes->base : &svga->curr.vs->base);
+
/* Disable rasterization if rasterizer_discard flag is set or
* vs/gs does not output position.
*/
svga->disable_rasterizer =
svga->curr.rast->templ.rasterizer_discard ||
- (svga->curr.gs && !svga->curr.gs->base.info.writes_position) ||
- (!svga->curr.gs && !svga->curr.vs->base.info.writes_position);
+ !prevShader->info.writes_position;
/* Set FS to NULL when rasterization is to be disabled */
if (svga->disable_rasterizer) {
diff --git a/src/gallium/drivers/svga/svga_state_gs.c b/src/gallium/drivers/svga/svga_state_gs.c
index 1eb4cebc08d..670b757c45f 100644
--- a/src/gallium/drivers/svga/svga_state_gs.c
+++ b/src/gallium/drivers/svga/svga_state_gs.c
@@ -109,34 +109,45 @@ make_gs_key(struct svga_context *svga, struct svga_compile_key *key)
/*
* SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER
*/
- svga_init_shader_key_common(svga, PIPE_SHADER_GEOMETRY, key);
+ svga_init_shader_key_common(svga, PIPE_SHADER_GEOMETRY, &gs->base, key);
memcpy(key->generic_remap_table, gs->generic_remap_table,
sizeof(gs->generic_remap_table));
key->gs.vs_generic_outputs = svga->curr.vs->generic_outputs;
- key->gs.need_prescale = svga->state.hw_clear.prescale.enabled;
+ key->gs.need_prescale = svga->state.hw_clear.prescale[0].enabled;
key->gs.writes_psize = gs->base.info.writes_psize;
key->gs.wide_point = gs->wide_point;
+ key->gs.writes_viewport_index = gs->base.info.writes_viewport_index;
+ if (key->gs.writes_viewport_index) {
+ key->gs.num_prescale = svga->state.hw_clear.num_prescale;
+ } else {
+ key->gs.num_prescale = 1;
+ }
key->sprite_coord_enable = svga->curr.rast->templ.sprite_coord_enable;
key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode
== PIPE_SPRITE_COORD_LOWER_LEFT);
/* SVGA_NEW_RAST */
key->clip_plane_enable = svga->curr.rast->templ.clip_plane_enable;
+
+ /* Mark this as the last shader in the vertex processing stage */
+ key->last_vertex_stage = 1;
}
static enum pipe_error
-emit_hw_gs(struct svga_context *svga, unsigned dirty)
+emit_hw_gs(struct svga_context *svga, uint64_t dirty)
{
struct svga_shader_variant *variant;
struct svga_geometry_shader *gs = svga->curr.gs;
enum pipe_error ret = PIPE_OK;
struct svga_compile_key key;
+ assert(svga_have_vgpu10(svga));
+
SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_EMITGS);
/* If there's a user-defined GS, we should have a pointer to a derived
diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c
index f9cea143ac9..5a52c25a4c1 100644
--- a/src/gallium/drivers/svga/svga_state_need_swtnl.c
+++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c
@@ -33,7 +33,7 @@
static enum pipe_error
-update_need_swvfetch(struct svga_context *svga, unsigned dirty)
+update_need_swvfetch(struct svga_context *svga, uint64_t dirty)
{
if (!svga->curr.velems) {
/* No vertex elements bound. */
@@ -58,7 +58,7 @@ struct svga_tracked_state svga_update_need_swvfetch =
static enum pipe_error
-update_need_pipeline(struct svga_context *svga, unsigned dirty)
+update_need_pipeline(struct svga_context *svga, uint64_t dirty)
{
boolean need_pipeline = FALSE;
struct svga_vertex_shader *vs = svga->curr.vs;
@@ -156,7 +156,7 @@ struct svga_tracked_state svga_update_need_pipeline =
static enum pipe_error
-update_need_swtnl(struct svga_context *svga, unsigned dirty)
+update_need_swtnl(struct svga_context *svga, uint64_t dirty)
{
boolean need_swtnl;
diff --git a/src/gallium/drivers/svga/svga_state_rss.c b/src/gallium/drivers/svga/svga_state_rss.c
index 3c42b4e8595..3549ce2938d 100644
--- a/src/gallium/drivers/svga/svga_state_rss.c
+++ b/src/gallium/drivers/svga/svga_state_rss.c
@@ -97,7 +97,7 @@ translate_fill_mode(unsigned fill)
* the "to" state.
*/
static enum pipe_error
-emit_rss_vgpu9(struct svga_context *svga, unsigned dirty)
+emit_rss_vgpu9(struct svga_context *svga, uint64_t dirty)
{
struct svga_screen *screen = svga_screen(svga->pipe.screen);
struct rs_queue queue;
@@ -363,7 +363,7 @@ get_no_depth_stencil_test_state(struct svga_context *svga)
static enum pipe_error
-emit_rss_vgpu10(struct svga_context *svga, unsigned dirty)
+emit_rss_vgpu10(struct svga_context *svga, uint64_t dirty)
{
enum pipe_error ret = PIPE_OK;
@@ -487,7 +487,7 @@ emit_rss_vgpu10(struct svga_context *svga, unsigned dirty)
static enum pipe_error
-emit_rss(struct svga_context *svga, unsigned dirty)
+emit_rss(struct svga_context *svga, uint64_t dirty)
{
if (svga_have_vgpu10(svga)) {
return emit_rss_vgpu10(svga, dirty);
diff --git a/src/gallium/drivers/svga/svga_state_sampler.c b/src/gallium/drivers/svga/svga_state_sampler.c
index 306c55dbb11..bbfd889e9f4 100644
--- a/src/gallium/drivers/svga/svga_state_sampler.c
+++ b/src/gallium/drivers/svga/svga_state_sampler.c
@@ -131,7 +131,7 @@ svga_validate_pipe_sampler_view(struct svga_context *svga,
if (sv->id == SVGA3D_INVALID_ID) {
struct svga_screen *ss = svga_screen(svga->pipe.screen);
struct pipe_resource *texture = sv->base.texture;
- struct svga_winsys_surface *surface = svga_resource_handle(texture);
+ struct svga_winsys_surface *surface;
SVGA3dSurfaceFormat format;
SVGA3dResourceType resourceDim;
SVGA3dShaderResourceViewDesc viewDesc;
@@ -154,6 +154,7 @@ svga_validate_pipe_sampler_view(struct svga_context *svga,
svga_translate_texture_buffer_view_format(viewFormat,
&format,
&pf_flags);
+ surface = svga_buffer_handle(svga, texture, PIPE_BIND_SAMPLER_VIEW);
}
else {
format = svga_translate_format(ss, viewFormat,
@@ -161,6 +162,8 @@ svga_validate_pipe_sampler_view(struct svga_context *svga,
/* Convert the format to a sampler-friendly format, if needed */
format = svga_sampler_format(format);
+
+ surface = svga_texture(texture)->handle;
}
assert(format != SVGA3D_FORMAT_INVALID);
@@ -234,15 +237,14 @@ svga_validate_pipe_sampler_view(struct svga_context *svga,
static enum pipe_error
-update_sampler_resources(struct svga_context *svga, unsigned dirty)
+update_sampler_resources(struct svga_context *svga, uint64_t dirty)
{
enum pipe_error ret = PIPE_OK;
enum pipe_shader_type shader;
- if (!svga_have_vgpu10(svga))
- return PIPE_OK;
+ assert(svga_have_vgpu10(svga));
- for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+ for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_TESS_EVAL; shader++) {
SVGA3dShaderResourceViewId ids[PIPE_MAX_SAMPLERS];
struct svga_winsys_surface *surfaces[PIPE_MAX_SAMPLERS];
struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
@@ -349,7 +351,8 @@ update_sampler_resources(struct svga_context *svga, unsigned dirty)
/* Handle polygon stipple sampler view */
if (svga->curr.rast->templ.poly_stipple_enable) {
- const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+ const unsigned unit =
+ svga_fs_variant(svga->state.hw_draw.fs)->pstipple_sampler_unit;
struct svga_pipe_sampler_view *sv = svga->polygon_stipple.sampler_view;
struct svga_winsys_surface *surface;
@@ -385,15 +388,14 @@ struct svga_tracked_state svga_hw_sampler_bindings = {
static enum pipe_error
-update_samplers(struct svga_context *svga, unsigned dirty )
+update_samplers(struct svga_context *svga, uint64_t dirty )
{
enum pipe_error ret = PIPE_OK;
enum pipe_shader_type shader;
- if (!svga_have_vgpu10(svga))
- return PIPE_OK;
+ assert(svga_have_vgpu10(svga));
- for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_GEOMETRY; shader++) {
+ for (shader = PIPE_SHADER_VERTEX; shader <= PIPE_SHADER_TESS_EVAL; shader++) {
const unsigned count = svga->curr.num_samplers[shader];
SVGA3dSamplerId ids[PIPE_MAX_SAMPLERS];
unsigned i;
@@ -404,7 +406,8 @@ update_samplers(struct svga_context *svga, unsigned dirty )
/* _NEW_FS */
if (shader == PIPE_SHADER_FRAGMENT) {
- struct svga_shader_variant *fs = svga->state.hw_draw.fs;
+ struct svga_fs_variant *fs =
+ svga_fs_variant(svga->state.hw_draw.fs);
/* If the fragment shader is doing the shadow comparison
* for this texture unit, don't enable shadow compare in
* the texture sampler state.
@@ -449,7 +452,8 @@ update_samplers(struct svga_context *svga, unsigned dirty )
/* Handle polygon stipple sampler texture */
if (svga->curr.rast->templ.poly_stipple_enable) {
- const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+ const unsigned unit =
+ svga_fs_variant(svga->state.hw_draw.fs)->pstipple_sampler_unit;
struct svga_sampler_state *sampler = svga->polygon_stipple.sampler;
assert(sampler);
diff --git a/src/gallium/drivers/svga/svga_state_tgsi_transform.c b/src/gallium/drivers/svga/svga_state_tgsi_transform.c
index 1dcc05cfaf0..e0b054acbcf 100644
--- a/src/gallium/drivers/svga/svga_state_tgsi_transform.c
+++ b/src/gallium/drivers/svga/svga_state_tgsi_transform.c
@@ -29,7 +29,10 @@
#include "util/u_simple_shaders.h"
#include "tgsi/tgsi_ureg.h"
#include "tgsi/tgsi_point_sprite.h"
+#include "tgsi/tgsi_dynamic_indexing.h"
+#include "tgsi/tgsi_vpos.h"
#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
#include "svga_context.h"
#include "svga_shader.h"
@@ -49,6 +52,171 @@ bind_gs_state(struct svga_context *svga,
}
+static void
+insert_at_head(struct svga_shader *head, struct svga_shader *shader)
+{
+ shader->parent = head;
+ shader->next = head->next;
+ head->next = shader;
+}
+
+
+/**
+ * Bind shader
+ */
+static void
+bind_shader(struct svga_context *svga,
+ const enum pipe_shader_type shader_type,
+ struct svga_shader *shader)
+{
+ switch (shader_type) {
+ case PIPE_SHADER_VERTEX:
+ svga->pipe.bind_vs_state(&svga->pipe, shader);
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ /**
+ * Avoid pipe->bind_fs_state call because it goes through aapoint
+ * layer. We loose linked list of all transformed shaders if aapoint
+ * is used.
+ */
+ svga_bind_fs_state(&svga->pipe, shader);
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ svga->pipe.bind_gs_state(&svga->pipe, shader);
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ svga->pipe.bind_tcs_state(&svga->pipe, shader);
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ svga->pipe.bind_tes_state(&svga->pipe, shader);
+ break;
+ default:
+ return;
+ }
+}
+
+
+
+/**
+ * Create shader
+ */
+static void *
+create_shader(struct svga_context *svga,
+ const enum pipe_shader_type shader_type,
+ struct pipe_shader_state *state)
+{
+ switch (shader_type) {
+ case PIPE_SHADER_VERTEX:
+ return svga->pipe.create_vs_state(&svga->pipe, state);
+ case PIPE_SHADER_FRAGMENT:
+ /**
+ * Avoid pipe->create_fs_state call because it goes through aapoint
+ * layer. We loose linked list of all transformed shaders if aapoint
+ * is used.
+ */
+ return svga_create_fs_state(&svga->pipe, state);
+ case PIPE_SHADER_GEOMETRY:
+ return svga->pipe.create_gs_state(&svga->pipe, state);
+ case PIPE_SHADER_TESS_CTRL:
+ return svga->pipe.create_tcs_state(&svga->pipe, state);
+ case PIPE_SHADER_TESS_EVAL:
+ return svga->pipe.create_tes_state(&svga->pipe, state);
+ default:
+ return NULL;
+ }
+}
+
+
+static void
+write_vpos(struct svga_context *svga,
+ struct svga_shader *shader)
+{
+ struct svga_token_key key;
+ boolean use_existing = FALSE;
+ struct svga_shader *transform_shader;
+ const struct tgsi_shader_info *info = &shader->info;
+
+ /* Create a token key */
+ memset(&key, 0, sizeof key);
+ key.vs.write_position = 1;
+
+ if (shader->next) {
+ transform_shader = svga_search_shader_token_key(shader->next, &key);
+ if (transform_shader) {
+ use_existing = TRUE;
+ }
+ }
+
+ if (!use_existing) {
+ struct pipe_shader_state state;
+ struct tgsi_token *new_tokens = NULL;
+
+ new_tokens = tgsi_write_vpos(shader->tokens,
+ info->immediate_count);
+ if (!new_tokens)
+ return;
+
+ pipe_shader_state_from_tgsi(&state, new_tokens);
+
+ transform_shader = create_shader(svga, info->processor, &state);
+ insert_at_head(shader, transform_shader);
+ FREE(new_tokens);
+ }
+ transform_shader->token_key = key;
+ bind_shader(svga, info->processor, transform_shader);
+}
+
+
+/**
+ * transform_dynamic_indexing searches shader variant list to see if
+ * we have transformed shader for dynamic indexing and reuse/bind it. If we
+ * don't have transformed shader, then it will create new shader from which
+ * dynamic indexing will be removed. It will also be added to the shader
+ * variant list and this new shader will be bind to current svga state.
+ */
+static void
+transform_dynamic_indexing(struct svga_context *svga,
+ struct svga_shader *shader)
+{
+ struct svga_token_key key;
+ boolean use_existing = FALSE;
+ struct svga_shader *transform_shader;
+ const struct tgsi_shader_info *info = &shader->info;
+
+ /* Create a token key */
+ memset(&key, 0, sizeof key);
+ key.dynamic_indexing = 1;
+
+ if (shader->next) {
+ transform_shader = svga_search_shader_token_key(shader->next, &key);
+ if (transform_shader) {
+ use_existing = TRUE;
+ }
+ }
+
+ struct tgsi_token *new_tokens = NULL;
+
+ if (!use_existing) {
+ struct pipe_shader_state state;
+ new_tokens = tgsi_remove_dynamic_indexing(shader->tokens,
+ info->const_buffers_declared,
+ info->samplers_declared,
+ info->immediate_count);
+ if (!new_tokens)
+ return;
+
+ pipe_shader_state_from_tgsi(&state, new_tokens);
+
+ transform_shader = create_shader(svga, info->processor, &state);
+ insert_at_head(shader, transform_shader);
+ }
+ transform_shader->token_key = key;
+ bind_shader(svga, info->processor, transform_shader);
+ if (new_tokens)
+ FREE(new_tokens);
+}
+
+
/**
* emulate_point_sprite searches the shader variants list to see it there is
* a shader variant with a token string that matches the emulation
@@ -233,18 +401,49 @@ add_point_sprite_shader(struct svga_context *svga)
return &new_gs->base;
}
+
+static boolean
+has_dynamic_indexing(const struct tgsi_shader_info *info)
+{
+ return (info->dim_indirect_files & (1u << TGSI_FILE_CONSTANT)) ||
+ (info->indirect_files & (1u << TGSI_FILE_SAMPLER));
+}
+
+
/* update_tgsi_transform provides a hook to transform a shader if needed.
*/
static enum pipe_error
-update_tgsi_transform(struct svga_context *svga, unsigned dirty)
+update_tgsi_transform(struct svga_context *svga, uint64_t dirty)
{
struct svga_geometry_shader *gs = svga->curr.user_gs; /* current gs */
struct svga_vertex_shader *vs = svga->curr.vs; /* currently bound vs */
+ struct svga_fragment_shader *fs = svga->curr.fs; /* currently bound fs */
+ struct svga_tcs_shader *tcs = svga->curr.tcs; /* currently bound tcs */
+ struct svga_tes_shader *tes = svga->curr.tes; /* currently bound tes */
struct svga_shader *orig_gs; /* original gs */
struct svga_shader *new_gs; /* new gs */
- if (!svga_have_vgpu10(svga))
- return PIPE_OK;
+ assert(svga_have_vgpu10(svga));
+
+ if (vs->base.info.num_outputs == 0) {
+ write_vpos(svga, &vs->base);
+ }
+
+ if (vs && has_dynamic_indexing(&vs->base.info)) {
+ transform_dynamic_indexing(svga, &vs->base);
+ }
+ if (fs && has_dynamic_indexing(&fs->base.info)) {
+ transform_dynamic_indexing(svga, &fs->base);
+ }
+ if (gs && has_dynamic_indexing(&gs->base.info)) {
+ transform_dynamic_indexing(svga, &gs->base);
+ }
+ if (tcs && has_dynamic_indexing(&tcs->base.info)) {
+ transform_dynamic_indexing(svga, &tcs->base);
+ }
+ if (tes && has_dynamic_indexing(&tes->base.info)) {
+ transform_dynamic_indexing(svga, &tes->base);
+ }
if (svga->curr.reduced_prim == PIPE_PRIM_POINTS) {
/* If the current prim type is POINTS and the current geometry shader
diff --git a/src/gallium/drivers/svga/svga_state_ts.c b/src/gallium/drivers/svga/svga_state_ts.c
new file mode 100644
index 00000000000..890d153c7d6
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_ts.c
@@ -0,0 +1,392 @@
+/**********************************************************
+ * Copyright 2018-2020 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_simple_shaders.h"
+
+#include "svga_context.h"
+#include "svga_cmd.h"
+#include "svga_tgsi.h"
+#include "svga_shader.h"
+
+
+/**
+ * Translate TGSI shader into an svga shader variant.
+ */
+static enum pipe_error
+compile_tcs(struct svga_context *svga,
+ struct svga_tcs_shader *tcs,
+ const struct svga_compile_key *key,
+ struct svga_shader_variant **out_variant)
+{
+ struct svga_shader_variant *variant;
+ enum pipe_error ret = PIPE_ERROR;
+
+ variant = svga_tgsi_vgpu10_translate(svga, &tcs->base, key,
+ PIPE_SHADER_TESS_CTRL);
+ if (!variant)
+ return PIPE_ERROR;
+
+ ret = svga_define_shader(svga, variant);
+ if (ret != PIPE_OK) {
+ svga_destroy_shader_variant(svga, variant);
+ return ret;
+ }
+
+ *out_variant = variant;
+
+ return PIPE_OK;
+}
+
+
+static void
+make_tcs_key(struct svga_context *svga, struct svga_compile_key *key)
+{
+ struct svga_tcs_shader *tcs = svga->curr.tcs;
+
+ memset(key, 0, sizeof *key);
+
+ /*
+ * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER
+ */
+ svga_init_shader_key_common(svga, PIPE_SHADER_TESS_CTRL, &tcs->base, key);
+
+ /* SVGA_NEW_TCS_PARAM */
+ key->tcs.vertices_per_patch = svga->curr.vertices_per_patch;
+
+ /* The tessellator parameters come from the layout section in the
+ * tessellation evaluation shader. Get these parameters from the
+ * current tessellation evaluation shader variant.
+ * Note: this requires the tessellation evaluation shader to be
+ * compiled first.
+ */
+ struct svga_tes_variant *tes = svga_tes_variant(svga->state.hw_draw.tes);
+ key->tcs.prim_mode = tes->prim_mode;
+ key->tcs.spacing = tes->spacing;
+ key->tcs.vertices_order_cw = tes->vertices_order_cw;
+ key->tcs.point_mode = tes->point_mode;
+
+ if (svga->tcs.passthrough)
+ key->tcs.passthrough = 1;
+
+ key->clip_plane_enable = svga->curr.rast->templ.clip_plane_enable;
+
+ /* tcs is always followed by tes */
+ key->last_vertex_stage = 0;
+}
+
+
+static enum pipe_error
+emit_hw_tcs(struct svga_context *svga, uint64_t dirty)
+{
+ struct svga_shader_variant *variant;
+ struct svga_tcs_shader *tcs = svga->curr.tcs;
+ enum pipe_error ret = PIPE_OK;
+ struct svga_compile_key key;
+
+ assert(svga_have_sm5(svga));
+
+ SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_EMITTCS);
+
+ if (!tcs) {
+ /* If there is no active tcs, then there should not be
+ * active tes either
+ */
+ assert(!svga->curr.tes);
+ if (svga->state.hw_draw.tcs != NULL) {
+
+ /** The previous tessellation control shader is made inactive.
+ * Needs to unbind the tessellation control shader.
+ */
+ ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_HS, NULL);
+ if (ret != PIPE_OK)
+ goto done;
+ svga->state.hw_draw.tcs = NULL;
+ }
+ goto done;
+ }
+
+ make_tcs_key(svga, &key);
+
+ /* See if we already have a TCS variant that matches the key */
+ variant = svga_search_shader_key(&tcs->base, &key);
+
+ if (!variant) {
+ ret = compile_tcs(svga, tcs, &key, &variant);
+ if (ret != PIPE_OK)
+ goto done;
+
+ /* insert the new variant at head of linked list */
+ assert(variant);
+ variant->next = tcs->base.variants;
+ tcs->base.variants = variant;
+ }
+
+ if (variant != svga->state.hw_draw.tcs) {
+ /* Bind the new variant */
+ ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_HS, variant);
+ if (ret != PIPE_OK)
+ goto done;
+
+ svga->rebind.flags.tcs = FALSE;
+ svga->dirty |= SVGA_NEW_TCS_VARIANT;
+ svga->state.hw_draw.tcs = variant;
+ }
+
+done:
+ SVGA_STATS_TIME_POP(svga_sws(svga));
+ return ret;
+}
+
+
+struct svga_tracked_state svga_hw_tcs =
+{
+ "tessellation control shader (hwtnl)",
+ (SVGA_NEW_VS |
+ SVGA_NEW_TCS |
+ SVGA_NEW_TES |
+ SVGA_NEW_TEXTURE_BINDING |
+ SVGA_NEW_SAMPLER |
+ SVGA_NEW_RAST),
+ emit_hw_tcs
+};
+
+
+/**
+ * Translate TGSI shader into an svga shader variant.
+ */
+static enum pipe_error
+compile_tes(struct svga_context *svga,
+ struct svga_tes_shader *tes,
+ const struct svga_compile_key *key,
+ struct svga_shader_variant **out_variant)
+{
+ struct svga_shader_variant *variant;
+ enum pipe_error ret = PIPE_ERROR;
+
+ variant = svga_tgsi_vgpu10_translate(svga, &tes->base, key,
+ PIPE_SHADER_TESS_EVAL);
+ if (!variant)
+ return PIPE_ERROR;
+
+ ret = svga_define_shader(svga, variant);
+ if (ret != PIPE_OK) {
+ svga_destroy_shader_variant(svga, variant);
+ return ret;
+ }
+
+ *out_variant = variant;
+
+ return PIPE_OK;
+}
+
+
+static void
+make_tes_key(struct svga_context *svga, struct svga_compile_key *key)
+{
+ struct svga_tes_shader *tes = svga->curr.tes;
+
+ memset(key, 0, sizeof *key);
+
+ /*
+ * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER
+ */
+ svga_init_shader_key_common(svga, PIPE_SHADER_TESS_EVAL, &tes->base, key);
+
+ assert(svga->curr.tcs);
+ key->tes.vertices_per_patch =
+ svga->curr.tcs->base.info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
+
+ key->tes.need_prescale = svga->state.hw_clear.prescale[0].enabled &&
+ (svga->curr.gs == NULL);
+
+ /* tcs emits tessellation factors as extra outputs.
+ * Since tes depends on them, save the tessFactor output index
+ * from tcs in the tes compile key, so that if a different
+ * tcs is bound and if the tessFactor index is different,
+ * a different tes variant will be generated.
+ */
+ key->tes.tessfactor_index = svga->curr.tcs->base.info.num_outputs;
+
+ key->clip_plane_enable = svga->curr.rast->templ.clip_plane_enable;
+
+ /* This is the last vertex stage if there is no geometry shader. */
+ key->last_vertex_stage = !svga->curr.gs;
+
+ key->tes.need_tessinner = 0;
+ key->tes.need_tessouter = 0;
+
+ for (int i = 0; i < svga->curr.tcs->base.info.num_outputs; i++) {
+ switch (svga->curr.tcs->base.info.output_semantic_name[i]) {
+ case TGSI_SEMANTIC_TESSOUTER:
+ key->tes.need_tessouter = 1;
+ break;
+ case TGSI_SEMANTIC_TESSINNER:
+ key->tes.need_tessinner = 1;
+ break;
+ default:
+ break;
+ }
+ }
+
+}
+
+
+static void
+get_passthrough_tcs(struct svga_context *svga)
+{
+ if (svga->tcs.passthrough_tcs &&
+ svga->tcs.vs == svga->curr.vs &&
+ svga->tcs.tes == svga->curr.tes &&
+ svga->tcs.vertices_per_patch == svga->curr.vertices_per_patch) {
+ svga->pipe.bind_tcs_state(&svga->pipe,
+ svga->tcs.passthrough_tcs);
+ }
+ else {
+ struct svga_tcs_shader *new_tcs;
+
+ /* delete older passthrough shader*/
+ if (svga->tcs.passthrough_tcs) {
+ svga->pipe.delete_tcs_state(&svga->pipe,
+ svga->tcs.passthrough_tcs);
+ }
+
+ new_tcs = (struct svga_tcs_shader *)
+ util_make_tess_ctrl_passthrough_shader(&svga->pipe,
+ svga->curr.vs->base.info.num_outputs,
+ svga->curr.tes->base.info.num_inputs,
+ svga->curr.vs->base.info.output_semantic_name,
+ svga->curr.vs->base.info.output_semantic_index,
+ svga->curr.tes->base.info.input_semantic_name,
+ svga->curr.tes->base.info.input_semantic_index,
+ svga->curr.vertices_per_patch);
+ svga->pipe.bind_tcs_state(&svga->pipe, new_tcs);
+ svga->tcs.passthrough_tcs = new_tcs;
+ svga->tcs.vs = svga->curr.vs;
+ svga->tcs.tes = svga->curr.tes;
+ svga->tcs.vertices_per_patch = svga->curr.vertices_per_patch;
+ }
+
+ struct pipe_constant_buffer cb;
+
+ cb.buffer = NULL;
+ cb.user_buffer = (void *) svga->curr.default_tesslevels;
+ cb.buffer_offset = 0;
+ cb.buffer_size = 2 * 4 * sizeof(float);
+ svga->pipe.set_constant_buffer(&svga->pipe, PIPE_SHADER_TESS_CTRL, 0, &cb);
+}
+
+
+static enum pipe_error
+emit_hw_tes(struct svga_context *svga, uint64_t dirty)
+{
+ struct svga_shader_variant *variant;
+ struct svga_tes_shader *tes = svga->curr.tes;
+ enum pipe_error ret = PIPE_OK;
+ struct svga_compile_key key;
+
+ assert(svga_have_sm5(svga));
+
+ SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_EMITTES);
+
+ if (!tes) {
+ /* The GL spec implies that TES is optional when there's a TCS,
+ * but that's apparently a spec error. Assert if we have a TCS
+ * but no TES.
+ */
+ assert(!svga->curr.tcs);
+ if (svga->state.hw_draw.tes != NULL) {
+
+ /** The previous tessellation evaluation shader is made inactive.
+ * Needs to unbind the tessellation evaluation shader.
+ */
+ ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_DS, NULL);
+ if (ret != PIPE_OK)
+ goto done;
+ svga->state.hw_draw.tes = NULL;
+ }
+ goto done;
+ }
+
+ if (!svga->curr.tcs) {
+ /* TES state is processed before the TCS
+ * shader and that's why we're checking for and creating the
+ * passthough TCS in the emit_hw_tes() function.
+ */
+ get_passthrough_tcs(svga);
+ svga->tcs.passthrough = TRUE;
+ }
+ else {
+ svga->tcs.passthrough = FALSE;
+ }
+
+ make_tes_key(svga, &key);
+
+ /* See if we already have a TES variant that matches the key */
+ variant = svga_search_shader_key(&tes->base, &key);
+
+ if (!variant) {
+ ret = compile_tes(svga, tes, &key, &variant);
+ if (ret != PIPE_OK)
+ goto done;
+
+ /* insert the new variant at head of linked list */
+ assert(variant);
+ variant->next = tes->base.variants;
+ tes->base.variants = variant;
+ }
+
+ if (variant != svga->state.hw_draw.tes) {
+ /* Bind the new variant */
+ ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_DS, variant);
+ if (ret != PIPE_OK)
+ goto done;
+
+ svga->rebind.flags.tes = FALSE;
+ svga->dirty |= SVGA_NEW_TES_VARIANT;
+ svga->state.hw_draw.tes = variant;
+ }
+
+done:
+ SVGA_STATS_TIME_POP(svga_sws(svga));
+ return ret;
+}
+
+
+struct svga_tracked_state svga_hw_tes =
+{
+ "tessellation evaluation shader (hwtnl)",
+ /* TBD SVGA_NEW_VS/SVGA_NEW_FS/SVGA_NEW_GS are required or not*/
+ (SVGA_NEW_VS |
+ SVGA_NEW_FS |
+ SVGA_NEW_GS |
+ SVGA_NEW_TCS |
+ SVGA_NEW_TES |
+ SVGA_NEW_TEXTURE_BINDING |
+ SVGA_NEW_SAMPLER |
+ SVGA_NEW_RAST),
+ emit_hw_tes
+};
diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c
index 95b1a9e952d..75b0ac60f58 100644
--- a/src/gallium/drivers/svga/svga_state_tss.c
+++ b/src/gallium/drivers/svga/svga_state_tss.c
@@ -139,7 +139,7 @@ emit_tex_binding_unit(struct svga_context *svga,
static enum pipe_error
-update_tss_binding(struct svga_context *svga, unsigned dirty)
+update_tss_binding(struct svga_context *svga, uint64_t dirty )
{
const enum pipe_shader_type shader = PIPE_SHADER_FRAGMENT;
boolean reemit = svga->rebind.flags.texture_samplers;
@@ -149,8 +149,7 @@ update_tss_binding(struct svga_context *svga, unsigned dirty)
struct bind_queue queue;
- if (svga_have_vgpu10(svga))
- return PIPE_OK;
+ assert(!svga_have_vgpu10(svga));
queue.bind_count = 0;
@@ -167,7 +166,8 @@ update_tss_binding(struct svga_context *svga, unsigned dirty)
/* Polygon stipple */
if (svga->curr.rast->templ.poly_stipple_enable) {
- const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+ const unsigned unit =
+ svga_fs_variant(svga->state.hw_draw.fs)->pstipple_sampler_unit;
emit_tex_binding_unit(svga, unit,
svga->polygon_stipple.sampler,
&svga->polygon_stipple.sampler_view->base,
@@ -257,7 +257,8 @@ svga_reemit_tss_bindings(struct svga_context *svga)
/* Polygon stipple */
if (svga->curr.rast && svga->curr.rast->templ.poly_stipple_enable) {
- const unsigned unit = svga->state.hw_draw.fs->pstipple_sampler_unit;
+ const unsigned unit =
+ svga_fs_variant(svga->state.hw_draw.fs)->pstipple_sampler_unit;
struct svga_hw_view_state *view = &svga->state.hw_draw.views[unit];
if (view->v) {
@@ -380,14 +381,13 @@ emit_tss_unit(struct svga_context *svga, unsigned unit,
}
static enum pipe_error
-update_tss(struct svga_context *svga, unsigned dirty)
+update_tss(struct svga_context *svga, uint64_t dirty )
{
const enum pipe_shader_type shader = PIPE_SHADER_FRAGMENT;
unsigned i;
struct ts_queue queue;
- if (svga_have_vgpu10(svga))
- return PIPE_OK;
+ assert(!svga_have_vgpu10(svga));
queue.ts_count = 0;
for (i = 0; i < svga->curr.num_samplers[shader]; i++) {
@@ -400,7 +400,7 @@ update_tss(struct svga_context *svga, unsigned dirty)
/* polygon stipple sampler */
if (svga->curr.rast->templ.poly_stipple_enable) {
emit_tss_unit(svga,
- svga->state.hw_draw.fs->pstipple_sampler_unit,
+ svga_fs_variant(svga->state.hw_draw.fs)->pstipple_sampler_unit,
svga->polygon_stipple.sampler,
&queue);
}
diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c
index fd6a238ef16..a49bcd0a263 100644
--- a/src/gallium/drivers/svga/svga_state_vdecl.c
+++ b/src/gallium/drivers/svga/svga_state_vdecl.c
@@ -40,7 +40,7 @@
static enum pipe_error
-emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty)
+emit_hw_vs_vdecl(struct svga_context *svga, uint64_t dirty)
{
const struct pipe_vertex_element *ve = svga->curr.velems->velem;
SVGA3dVertexDecl decls[SVGA3D_INPUTREG_MAX];
@@ -136,7 +136,7 @@ emit_hw_vs_vdecl(struct svga_context *svga, unsigned dirty)
static enum pipe_error
-emit_hw_vdecl(struct svga_context *svga, unsigned dirty)
+emit_hw_vdecl(struct svga_context *svga, uint64_t dirty)
{
/* SVGA_NEW_NEED_SWTNL
*/
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index d63b52454ca..147b07aaeb1 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -164,7 +164,7 @@ compile_vs(struct svga_context *svga,
static void
make_vs_key(struct svga_context *svga, struct svga_compile_key *key)
{
- const enum pipe_shader_type shader = PIPE_SHADER_VERTEX;
+ struct svga_vertex_shader *vs = svga->curr.vs;
memset(key, 0, sizeof *key);
@@ -176,7 +176,8 @@ make_vs_key(struct svga_context *svga, struct svga_compile_key *key)
}
/* SVGA_NEW_PRESCALE */
- key->vs.need_prescale = svga->state.hw_clear.prescale.enabled &&
+ key->vs.need_prescale = svga->state.hw_clear.prescale[0].enabled &&
+ (svga->curr.tes == NULL) &&
(svga->curr.gs == NULL);
/* SVGA_NEW_RAST */
@@ -199,10 +200,16 @@ make_vs_key(struct svga_context *svga, struct svga_compile_key *key)
key->vs.attrib_puint_to_sscaled = svga->curr.velems->attrib_puint_to_sscaled;
/* SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER */
- svga_init_shader_key_common(svga, shader, key);
+ svga_init_shader_key_common(svga, PIPE_SHADER_VERTEX, &vs->base, key);
/* SVGA_NEW_RAST */
key->clip_plane_enable = svga->curr.rast->templ.clip_plane_enable;
+
+ /* Determine if this shader is the last shader in the vertex
+ * processing stage.
+ */
+ key->last_vertex_stage = !(svga->curr.gs ||
+ svga->curr.tcs || svga->curr.tes);
}
@@ -338,7 +345,7 @@ compile_passthrough_vs(struct svga_context *svga,
static enum pipe_error
-emit_hw_vs(struct svga_context *svga, unsigned dirty)
+emit_hw_vs(struct svga_context *svga, uint64_t dirty)
{
struct svga_shader_variant *variant;
struct svga_vertex_shader *vs = svga->curr.vs;
diff --git a/src/gallium/drivers/svga/svga_streamout.h b/src/gallium/drivers/svga/svga_streamout.h
index 1daa1ad5352..5e6db247b53 100644
--- a/src/gallium/drivers/svga/svga_streamout.h
+++ b/src/gallium/drivers/svga/svga_streamout.h
@@ -32,6 +32,9 @@ struct svga_stream_output {
struct pipe_stream_output_info info;
unsigned pos_out_index; // position output index
unsigned id;
+ unsigned streammask; // bitmask to specify which streams are enabled
+ unsigned buffer_stream;
+ struct svga_winsys_buffer *declBuf;
};
struct svga_stream_output *
@@ -50,4 +53,20 @@ svga_delete_stream_output(struct svga_context *svga,
enum pipe_error
svga_rebind_stream_output_targets(struct svga_context *svga);
+void
+svga_create_stream_output_queries(struct svga_context *svga);
+
+void
+svga_destroy_stream_output_queries(struct svga_context *svga);
+
+void
+svga_begin_stream_output_queries(struct svga_context *svga, unsigned mask);
+
+void
+svga_end_stream_output_queries(struct svga_context *svga, unsigned mask);
+
+unsigned
+svga_get_primcount_from_stream_output(struct svga_context *svga,
+ unsigned stream);
+
#endif /* SVGA_STREAMOUT_H */
diff --git a/src/gallium/drivers/svga/svga_surface.c b/src/gallium/drivers/svga/svga_surface.c
index 2c48a66186f..d3dd23d2d81 100644
--- a/src/gallium/drivers/svga/svga_surface.c
+++ b/src/gallium/drivers/svga/svga_surface.c
@@ -578,6 +578,16 @@ svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s)
}
}
+ /**
+ * Create an alternate surface view for the specified context if the
+ * view was created for another context.
+ */
+ if (s && s->base.context != &svga->pipe) {
+ struct pipe_surface *surf;
+ surf = svga_create_surface_view(&svga->pipe, s->base.texture, &s->base, FALSE);
+ s = svga_surface(surf);
+ }
+
if (s && s->view_id == SVGA3D_INVALID_ID) {
SVGA3dResourceType resType;
SVGA3dRenderTargetViewDesc desc;
diff --git a/src/gallium/drivers/svga/svga_surface.h b/src/gallium/drivers/svga/svga_surface.h
index 587632d0eb6..1413e3a4b52 100644
--- a/src/gallium/drivers/svga/svga_surface.h
+++ b/src/gallium/drivers/svga/svga_surface.h
@@ -146,6 +146,8 @@ static inline SVGA3dResourceType
svga_resource_type(enum pipe_texture_target target)
{
switch (target) {
+ case PIPE_BUFFER:
+ return SVGA3D_RESOURCE_BUFFER;
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY:
return SVGA3D_RESOURCE_TEXTURE1D;
diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c
index b6fd07fe346..3e8c90d8e1e 100644
--- a/src/gallium/drivers/svga/svga_swtnl_backend.c
+++ b/src/gallium/drivers/svga/svga_swtnl_backend.c
@@ -90,11 +90,12 @@ svga_vbuf_render_allocate_vertices(struct vbuf_render *render,
if (!svga_render->vbuf) {
svga_render->vbuf_size = MAX2(size, svga_render->vbuf_alloc_size);
- svga_render->vbuf = pipe_buffer_create(screen,
- PIPE_BIND_VERTEX_BUFFER,
- PIPE_USAGE_STREAM,
- svga_render->vbuf_size);
+ svga_render->vbuf = SVGA_TRY_PTR(pipe_buffer_create
+ (screen, PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STREAM,
+ svga_render->vbuf_size));
if (!svga_render->vbuf) {
+ svga_retry_enter(svga);
svga_context_flush(svga, NULL);
assert(!svga_render->vbuf);
svga_render->vbuf = pipe_buffer_create(screen,
@@ -104,6 +105,7 @@ svga_vbuf_render_allocate_vertices(struct vbuf_render *render,
/* The buffer allocation may fail if we run out of memory.
* The draw module's vbuf code should handle that without crashing.
*/
+ svga_retry_exit(svga);
}
svga->swtnl.new_vdecl = TRUE;
@@ -267,7 +269,7 @@ svga_vbuf_submit_state(struct svga_vbuf_render *svga_render)
else {
svga_hwtnl_set_flatshade(svga->hwtnl,
svga->curr.rast->templ.flatshade ||
- svga->state.hw_draw.fs->uses_flat_interp,
+ svga_is_using_flat_shading(svga),
svga->curr.rast->templ.flatshade_first);
svga_hwtnl_set_fillmode(svga->hwtnl, svga->curr.rast->hw_fillmode);
@@ -286,10 +288,10 @@ svga_vbuf_render_draw_arrays(struct vbuf_render *render,
struct svga_context *svga = svga_render->svga;
unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset)
/ svga_render->vertex_size;
- enum pipe_error ret = PIPE_OK;
/* instancing will already have been resolved at this point by 'draw' */
const unsigned start_instance = 0;
const unsigned instance_count = 1;
+ boolean retried;
SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_VBUFDRAWARRAYS);
@@ -301,17 +303,13 @@ svga_vbuf_render_draw_arrays(struct vbuf_render *render,
* redbook/polys.c
*/
svga_update_state_retry(svga, SVGA_STATE_HW_DRAW);
-
- ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias,
- nr, start_instance, instance_count);
- if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
- ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim,
- start + bias, nr,
- start_instance, instance_count);
+ SVGA_RETRY_CHECK(svga, svga_hwtnl_draw_arrays
+ (svga->hwtnl, svga_render->prim, start + bias,
+ nr, start_instance, instance_count, 0), retried);
+ if (retried) {
svga->swtnl.new_vbuf = TRUE;
- assert(ret == PIPE_OK);
}
+
SVGA_STATS_TIME_POP(svga_sws(svga));
}
@@ -325,7 +323,7 @@ svga_vbuf_render_draw_elements(struct vbuf_render *render,
struct svga_context *svga = svga_render->svga;
int bias = (svga_render->vbuf_offset - svga_render->vdecl_offset)
/ svga_render->vertex_size;
- boolean ret;
+ boolean retried;
/* instancing will already have been resolved at this point by 'draw' */
const struct pipe_draw_info info = {
.index_size = 2,
@@ -354,13 +352,12 @@ svga_vbuf_render_draw_elements(struct vbuf_render *render,
* redbook/polys.c
*/
svga_update_state_retry(svga, SVGA_STATE_HW_DRAW);
- ret = svga_hwtnl_draw_range_elements(svga->hwtnl, &info, nr_indices);
- if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
- ret = svga_hwtnl_draw_range_elements(svga->hwtnl, &info, nr_indices);
+ SVGA_RETRY_CHECK(svga, svga_hwtnl_draw_range_elements(svga->hwtnl, &info,
+ nr_indices), retried);
+ if (retried) {
svga->swtnl.new_vbuf = TRUE;
- assert(ret == PIPE_OK);
}
+
SVGA_STATS_TIME_POP(svga_sws(svga));
}
diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c
index 816fef1c4ea..789ed23e88b 100644
--- a/src/gallium/drivers/svga/svga_swtnl_state.c
+++ b/src/gallium/drivers/svga/svga_swtnl_state.c
@@ -51,7 +51,7 @@
static void
set_draw_viewport(struct svga_context *svga)
{
- struct pipe_viewport_state vp = svga->curr.viewport;
+ struct pipe_viewport_state vp = svga->curr.viewport[0];
float adjx = 0.0f;
float adjy = 0.0f;
@@ -98,7 +98,7 @@ set_draw_viewport(struct svga_context *svga)
}
static enum pipe_error
-update_swtnl_draw(struct svga_context *svga, unsigned dirty)
+update_swtnl_draw(struct svga_context *svga, uint64_t dirty)
{
SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_SWTNLUPDATEDRAW);
@@ -191,7 +191,6 @@ svga_vdecl_to_input_element(struct svga_context *svga,
{
SVGA3dElementLayoutId id;
SVGA3dInputElementDesc elements[PIPE_MAX_ATTRIBS];
- enum pipe_error ret;
unsigned i;
assert(num_decls <= PIPE_MAX_ATTRIBS);
@@ -208,13 +207,8 @@ svga_vdecl_to_input_element(struct svga_context *svga,
id = util_bitmask_add(svga->input_element_object_id_bm);
- ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, num_decls, id, elements);
- if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
- ret = SVGA3D_vgpu10_DefineElementLayout(svga->swc, num_decls,
- id, elements);
- assert(ret == PIPE_OK);
- }
+ SVGA_RETRY(svga, SVGA3D_vgpu10_DefineElementLayout(svga->swc, num_decls, id,
+ elements));
return id;
}
@@ -306,22 +300,14 @@ svga_swtnl_update_vdecl(struct svga_context *svga)
any_change = memcmp(svga_render->vdecl, vdecl, sizeof(vdecl));
if (svga_have_vgpu10(svga)) {
- enum pipe_error ret;
-
if (!any_change && svga_render->layout_id != SVGA3D_INVALID_ID) {
goto done;
}
if (svga_render->layout_id != SVGA3D_INVALID_ID) {
/* destroy old */
- ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc,
- svga_render->layout_id);
- if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
- ret = SVGA3D_vgpu10_DestroyElementLayout(svga->swc,
- svga_render->layout_id);
- assert(ret == PIPE_OK);
- }
+ SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyElementLayout
+ (svga->swc, svga_render->layout_id));
/**
* reset current layout id state after the element layout is
@@ -340,14 +326,8 @@ svga_swtnl_update_vdecl(struct svga_context *svga)
/* bind new */
if (svga->state.hw_draw.layout_id != svga_render->layout_id) {
- ret = SVGA3D_vgpu10_SetInputLayout(svga->swc, svga_render->layout_id);
- if (ret != PIPE_OK) {
- svga_context_flush(svga, NULL);
- ret = SVGA3D_vgpu10_SetInputLayout(svga->swc,
- svga_render->layout_id);
- assert(ret == PIPE_OK);
- }
-
+ SVGA_RETRY(svga, SVGA3D_vgpu10_SetInputLayout(svga->swc,
+ svga_render->layout_id));
svga->state.hw_draw.layout_id = svga_render->layout_id;
}
}
@@ -366,7 +346,7 @@ done:
static enum pipe_error
-update_swtnl_vdecl(struct svga_context *svga, unsigned dirty)
+update_swtnl_vdecl(struct svga_context *svga, uint64_t dirty)
{
return svga_swtnl_update_vdecl(svga);
}
diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c
index 5c3afee3845..0f7597f6157 100644
--- a/src/gallium/drivers/svga/svga_tgsi.c
+++ b/src/gallium/drivers/svga/svga_tgsi.c
@@ -238,14 +238,18 @@ svga_tgsi_vgpu9_translate(struct svga_context *svga,
memcpy(&variant->key, key, sizeof(*key));
variant->id = UTIL_BITMASK_INVALID_INDEX;
- variant->pstipple_sampler_unit = emit.pstipple_sampler_unit;
-
- /* If there was exactly one write to a fragment shader output register
- * and it came from a constant buffer, we know all fragments will have
- * the same color (except for blending).
- */
- variant->constant_color_output =
- emit.constant_color_output && emit.num_output_writes == 1;
+ if (unit == PIPE_SHADER_FRAGMENT) {
+ struct svga_fs_variant *fs_variant = svga_fs_variant(variant);
+
+ fs_variant->pstipple_sampler_unit = emit.pstipple_sampler_unit;
+
+ /* If there was exactly one write to a fragment shader output register
+ * and it came from a constant buffer, we know all fragments will have
+ * the same color (except for blending).
+ */
+ fs_variant->constant_color_output =
+ emit.constant_color_output && emit.num_output_writes == 1;
+ }
#if 0
if (!svga_shader_verify(variant->tokens, variant->nr_tokens) ||
diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h
index e98601127f4..9c467cc7814 100644
--- a/src/gallium/drivers/svga/svga_tgsi.h
+++ b/src/gallium/drivers/svga/svga_tgsi.h
@@ -30,7 +30,7 @@
#include "svga3d_reg.h"
-#define MAX_VGPU10_ADDR_REGS 2
+#define MAX_VGPU10_ADDR_REGS 4
struct svga_compile_key;
struct svga_context;
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index 099ede6017d..6e607cd0616 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -40,6 +40,7 @@
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_strings.h"
#include "tgsi/tgsi_two_side.h"
#include "tgsi/tgsi_aa_point.h"
#include "tgsi/tgsi_util.h"
@@ -87,6 +88,100 @@ enum clipping_mode
};
+/* Shader signature info */
+struct svga_shader_signature
+{
+ SVGA3dDXShaderSignatureHeader header;
+ SVGA3dDXShaderSignatureEntry inputs[PIPE_MAX_SHADER_INPUTS];
+ SVGA3dDXShaderSignatureEntry outputs[PIPE_MAX_SHADER_OUTPUTS];
+ SVGA3dDXShaderSignatureEntry patchConstants[PIPE_MAX_SHADER_OUTPUTS];
+};
+
+static inline void
+set_shader_signature_entry(SVGA3dDXShaderSignatureEntry *e,
+ unsigned index,
+ SVGA3dDXSignatureSemanticName sgnName,
+ unsigned mask,
+ SVGA3dDXSignatureRegisterComponentType compType,
+ SVGA3dDXSignatureMinPrecision minPrecision)
+{
+ e->registerIndex = index;
+ e->semanticName = sgnName;
+ e->mask = mask;
+ e->componentType = compType;
+ e->minPrecision = minPrecision;
+};
+
+static const SVGA3dDXSignatureSemanticName
+tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_COUNT] = {
+ SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
+};
+
+
+/**
+ * Map tgsi semantic name to SVGA signature semantic name
+ */
+static inline SVGA3dDXSignatureSemanticName
+map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)
+{
+ assert(name < TGSI_SEMANTIC_COUNT);
+
+ /* Do a few asserts here to spot check the mapping */
+ assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_PRIMID] ==
+ SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
+ assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_VIEWPORT_INDEX] ==
+ SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX);
+ assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_INVOCATIONID] ==
+ SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID);
+
+ return tgsi_semantic_to_sgn_name[name];
+}
+
+
struct svga_shader_emitter_v10
{
/* The token output buffer */
@@ -100,12 +195,16 @@ struct svga_shader_emitter_v10
unsigned unit;
unsigned version; /**< Either 40 or 41 at this time */
+ unsigned cur_tgsi_token; /**< current tgsi token position */
unsigned inst_start_token;
boolean discard_instruction; /**< throw away current instruction? */
+ boolean reemit_instruction; /**< reemit current instruction */
+ boolean skip_instruction; /**< skip current instruction */
union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
+ double (*immediates_dbl)[2];
unsigned num_immediates; /**< Number of immediates emitted */
- unsigned common_immediate_pos[8]; /**< literals for common immediates */
+ unsigned common_immediate_pos[10]; /**< literals for common immediates */
unsigned num_common_immediates;
boolean immediates_emitted;
@@ -126,8 +225,11 @@ struct svga_shader_emitter_v10
/** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
struct {
unsigned arrayId, index;
+ boolean initialized;
} temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
+ unsigned initialize_temp_index;
+
/** Number of constants used by original shader for each constant buffer.
* The size should probably always match with that of svga_state.constbufs.
*/
@@ -139,6 +241,16 @@ struct svga_shader_emitter_v10
ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */
ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */
+ /* Index Range declaration */
+ struct {
+ unsigned start_index;
+ unsigned count;
+ boolean required;
+ unsigned operandType;
+ unsigned size;
+ unsigned dim;
+ } index_range;
+
/* Address regs (really implemented with temps) */
unsigned num_address_regs;
unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
@@ -154,8 +266,12 @@ struct svga_shader_emitter_v10
unsigned out_index; /**< the real position output reg */
unsigned tmp_index; /**< the fake/temp position output reg */
unsigned so_index; /**< the non-adjusted position output reg */
+ unsigned prescale_cbuf_index; /* index to the const buf for prescale */
unsigned prescale_scale_index, prescale_trans_index;
- boolean need_prescale;
+ unsigned num_prescale; /* number of prescale factor in const buf */
+ unsigned viewport_index;
+ unsigned need_prescale:1;
+ unsigned have_prescale:1;
} vposition;
/* For vertex shaders only */
@@ -183,13 +299,20 @@ struct svga_shader_emitter_v10
unsigned fragcoord_input_index; /**< real fragment position input reg */
unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */
- /** Which texture units are doing shadow comparison in the FS code */
- unsigned shadow_compare_units;
-
unsigned sample_id_sys_index; /**< TGSI index of sample id sys value */
unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */
unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */
+
+ /** TGSI index of sample mask input sys value */
+ unsigned sample_mask_in_sys_index;
+
+ /** Which texture units are doing shadow comparison in the FS code */
+ unsigned shadow_compare_units;
+
+ /* layer */
+ unsigned layer_input_index; /**< TGSI index of layer */
+ unsigned layer_imm_index; /**< immediate for default layer 0 */
} fs;
/* For geometry shaders only */
@@ -199,8 +322,63 @@ struct svga_shader_emitter_v10
unsigned input_size; /**< size of input arrays */
unsigned prim_id_index; /**< primitive id register index */
unsigned max_out_vertices; /**< maximum number of output vertices */
+ unsigned invocations;
+ unsigned invocation_id_sys_index;
+
+ unsigned viewport_index_out_index;
+ unsigned viewport_index_tmp_index;
} gs;
+ /* For tessellation control shaders only */
+ struct {
+ unsigned vertices_per_patch_index; /**< vertices_per_patch system value index */
+ unsigned imm_index; /**< immediate for tcs */
+ unsigned vertices_out;
+ unsigned invocation_id_sys_index; /**< invocation id */
+ unsigned invocation_id_tmp_index;
+ unsigned instruction_token_pos; /* token pos for the first instruction */
+ unsigned control_point_input_index; /* control point input register index */
+ unsigned control_point_addr_index; /* control point input address register */
+ unsigned control_point_out_index; /* control point output register index */
+ unsigned control_point_tmp_index; /* control point temporary register */
+ unsigned control_point_out_count; /* control point output count */
+ boolean control_point_phase; /* true if in control point phase */
+ unsigned patch_generic_out_count; /* per-patch generic output count */
+ unsigned patch_generic_out_index; /* per-patch generic output register index*/
+ unsigned patch_generic_tmp_index; /* per-patch generic temporary register index*/
+ unsigned prim_id_index; /* primitive id */
+ struct {
+ unsigned out_index; /* real tessinner output register */
+ unsigned temp_index; /* tessinner temp register */
+ unsigned tgsi_index; /* tgsi tessinner output register */
+ } inner;
+ struct {
+ unsigned out_index; /* real tessouter output register */
+ unsigned temp_index; /* tessouter temp register */
+ unsigned tgsi_index; /* tgsi tessouter output register */
+ } outer;
+ } tcs;
+
+ /* For tessellation evaluation shaders only */
+ struct {
+ enum pipe_prim_type prim_mode;
+ enum pipe_tess_spacing spacing;
+ boolean vertices_order_cw;
+ boolean point_mode;
+ unsigned tesscoord_sys_index;
+ unsigned prim_id_index; /* primitive id */
+ struct {
+ unsigned in_index; /* real tessinner input register */
+ unsigned temp_index; /* tessinner temp register */
+ unsigned tgsi_index; /* tgsi tessinner input register */
+ } inner;
+ struct {
+ unsigned in_index; /* real tessouter input register */
+ unsigned temp_index; /* tessouter temp register */
+ unsigned tgsi_index; /* tgsi tessouter input register */
+ } outer;
+ } tes;
+
/* For vertex or geometry shaders */
enum clipping_mode clip_mode;
unsigned clip_dist_out_index; /**< clip distance output register index */
@@ -219,19 +397,41 @@ struct svga_shader_emitter_v10
boolean uses_flat_interp;
+ unsigned reserved_token; /* index to the reserved token */
+ boolean uses_precise_qualifier;
+
/* For all shaders: const reg index for RECT coord scaling */
unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
/* For all shaders: const reg index for texture buffer size */
unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
- /* VS/GS/FS Linkage info */
+ /* VS/TCS/TES/GS/FS Linkage info */
struct shader_linkage linkage;
+ /* Shader signature */
+ struct svga_shader_signature signature;
+
bool register_overflow; /**< Set if we exceed a VGPU10 register limit */
+
+ /* For pipe_debug_message */
+ struct pipe_debug_callback svga_debug_callback;
+
+ /* current loop depth in shader */
+ unsigned current_loop_depth;
};
+static void emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit);
+static void emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit);
+static boolean emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit);
+static boolean emit_constant_declaration(struct svga_shader_emitter_v10 *emit);
+static boolean emit_sampler_declarations(struct svga_shader_emitter_v10 *emit);
+static boolean emit_resource_declarations(struct svga_shader_emitter_v10 *emit);
+static boolean emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit);
+static boolean emit_index_range_declaration(struct svga_shader_emitter_v10 *emit);
+static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit);
+
static boolean
emit_post_helpers(struct svga_shader_emitter_v10 *emit);
@@ -239,6 +439,26 @@ static boolean
emit_vertex(struct svga_shader_emitter_v10 *emit,
const struct tgsi_full_instruction *inst);
+static boolean
+emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
+ unsigned inst_number,
+ const struct tgsi_full_instruction *inst);
+
+static void
+emit_input_declaration(struct svga_shader_emitter_v10 *emit,
+ unsigned opcodeType, unsigned operandType,
+ unsigned dim, unsigned index, unsigned size,
+ unsigned name, unsigned numComp,
+ unsigned selMode, unsigned usageMask,
+ unsigned interpMode,
+ boolean addSignature,
+ SVGA3dDXSignatureSemanticName sgnName);
+
+static void
+create_temp_array(struct svga_shader_emitter_v10 *emit,
+ unsigned arrayID, unsigned first, unsigned count,
+ unsigned startIndex);
+
static char err_buf[128];
static boolean
@@ -381,7 +601,11 @@ check_register_index(struct svga_shader_emitter_v10 *emit,
(emit->unit == PIPE_SHADER_GEOMETRY &&
index >= VGPU10_MAX_GS_INPUTS) ||
(emit->unit == PIPE_SHADER_FRAGMENT &&
- index >= VGPU10_MAX_FS_INPUTS)) {
+ index >= VGPU10_MAX_FS_INPUTS) ||
+ (emit->unit == PIPE_SHADER_TESS_CTRL &&
+ index >= VGPU11_MAX_HS_INPUTS) ||
+ (emit->unit == PIPE_SHADER_TESS_EVAL &&
+ index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) {
emit->register_overflow = TRUE;
}
break;
@@ -389,12 +613,22 @@ check_register_index(struct svga_shader_emitter_v10 *emit,
case VGPU10_OPCODE_DCL_OUTPUT:
case VGPU10_OPCODE_DCL_OUTPUT_SGV:
case VGPU10_OPCODE_DCL_OUTPUT_SIV:
+ /* Note: we are skipping two output indices in tcs for
+ * tessinner/outer levels. Implementation will not exceed
+ * number of output count but it allows index to go beyond
+ * VGPU11_MAX_HS_OUTPUTS.
+ * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2
+ */
if ((emit->unit == PIPE_SHADER_VERTEX &&
index >= VGPU10_MAX_VS_OUTPUTS) ||
(emit->unit == PIPE_SHADER_GEOMETRY &&
index >= VGPU10_MAX_GS_OUTPUTS) ||
(emit->unit == PIPE_SHADER_FRAGMENT &&
- index >= VGPU10_MAX_FS_OUTPUTS)) {
+ index >= VGPU10_MAX_FS_OUTPUTS) ||
+ (emit->unit == PIPE_SHADER_TESS_CTRL &&
+ index >= VGPU11_MAX_HS_OUTPUTS + 2) ||
+ (emit->unit == PIPE_SHADER_TESS_EVAL &&
+ index >= VGPU11_MAX_DS_OUTPUTS)) {
emit->register_overflow = TRUE;
}
break;
@@ -436,13 +670,33 @@ check_register_index(struct svga_shader_emitter_v10 *emit,
static void
determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
{
+ /* num_written_clipdistance in the shader info for tessellation
+ * control shader is always 0 because the TGSI_PROPERTY_NUM_CLIPDIST_ENABLED
+ * is not defined for this shader. So we go through all the output declarations
+ * to set the num_written_clipdistance. This is just to determine the
+ * clipping mode.
+ */
+ if (emit->unit == PIPE_SHADER_TESS_CTRL) {
+ unsigned i;
+ for (i = 0; i < emit->info.num_outputs; i++) {
+ if (emit->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) {
+ emit->info.num_written_clipdistance =
+ 4 * (emit->info.output_semantic_index[i] + 1);
+ }
+ }
+ }
+
if (emit->info.num_written_clipdistance > 0) {
emit->clip_mode = CLIP_DISTANCE;
}
else if (emit->info.writes_clipvertex) {
emit->clip_mode = CLIP_VERTEX;
}
- else if (emit->key.clip_plane_enable) {
+ else if (emit->key.clip_plane_enable && emit->key.last_vertex_stage) {
+ /*
+ * Only the last shader in the vertex processing stage needs to
+ * handle the legacy clip mode.
+ */
emit->clip_mode = CLIP_LEGACY;
}
else {
@@ -497,6 +751,12 @@ translate_shader_type(unsigned type)
return VGPU10_GEOMETRY_SHADER;
case PIPE_SHADER_FRAGMENT:
return VGPU10_PIXEL_SHADER;
+ case PIPE_SHADER_TESS_CTRL:
+ return VGPU10_HULL_SHADER;
+ case PIPE_SHADER_TESS_EVAL:
+ return VGPU10_DOMAIN_SHADER;
+ case PIPE_SHADER_COMPUTE:
+ return VGPU10_COMPUTE_SHADER;
default:
assert(!"Unexpected shader type");
return VGPU10_VERTEX_SHADER;
@@ -550,7 +810,7 @@ translate_opcode(enum tgsi_opcode opcode)
case TGSI_OPCODE_DIV:
return VGPU10_OPCODE_DIV;
case TGSI_OPCODE_IDIV:
- return VGPU10_OPCODE_IDIV;
+ return VGPU10_OPCODE_VMWARE;
case TGSI_OPCODE_DP2:
return VGPU10_OPCODE_DP2;
case TGSI_OPCODE_BRK:
@@ -652,6 +912,64 @@ translate_opcode(enum tgsi_opcode opcode)
return VGPU10_OPCODE_LT;
case TGSI_OPCODE_ROUND:
return VGPU10_OPCODE_ROUND_NE;
+ /* Begin SM5 opcodes */
+ case TGSI_OPCODE_F2D:
+ return VGPU10_OPCODE_FTOD;
+ case TGSI_OPCODE_D2F:
+ return VGPU10_OPCODE_DTOF;
+ case TGSI_OPCODE_DMUL:
+ return VGPU10_OPCODE_DMUL;
+ case TGSI_OPCODE_DADD:
+ return VGPU10_OPCODE_DADD;
+ case TGSI_OPCODE_DMAX:
+ return VGPU10_OPCODE_DMAX;
+ case TGSI_OPCODE_DMIN:
+ return VGPU10_OPCODE_DMIN;
+ case TGSI_OPCODE_DSEQ:
+ return VGPU10_OPCODE_DEQ;
+ case TGSI_OPCODE_DSGE:
+ return VGPU10_OPCODE_DGE;
+ case TGSI_OPCODE_DSLT:
+ return VGPU10_OPCODE_DLT;
+ case TGSI_OPCODE_DSNE:
+ return VGPU10_OPCODE_DNE;
+ case TGSI_OPCODE_IBFE:
+ return VGPU10_OPCODE_IBFE;
+ case TGSI_OPCODE_UBFE:
+ return VGPU10_OPCODE_UBFE;
+ case TGSI_OPCODE_BFI:
+ return VGPU10_OPCODE_BFI;
+ case TGSI_OPCODE_BREV:
+ return VGPU10_OPCODE_BFREV;
+ case TGSI_OPCODE_POPC:
+ return VGPU10_OPCODE_COUNTBITS;
+ case TGSI_OPCODE_LSB:
+ return VGPU10_OPCODE_FIRSTBIT_LO;
+ case TGSI_OPCODE_IMSB:
+ return VGPU10_OPCODE_FIRSTBIT_SHI;
+ case TGSI_OPCODE_UMSB:
+ return VGPU10_OPCODE_FIRSTBIT_HI;
+ case TGSI_OPCODE_INTERP_CENTROID:
+ return VGPU10_OPCODE_EVAL_CENTROID;
+ case TGSI_OPCODE_INTERP_SAMPLE:
+ return VGPU10_OPCODE_EVAL_SAMPLE_INDEX;
+ case TGSI_OPCODE_BARRIER:
+ return VGPU10_OPCODE_SYNC;
+
+ /* DX11.1 Opcodes */
+ case TGSI_OPCODE_DDIV:
+ return VGPU10_OPCODE_DDIV;
+ case TGSI_OPCODE_DRCP:
+ return VGPU10_OPCODE_DRCP;
+ case TGSI_OPCODE_D2I:
+ return VGPU10_OPCODE_DTOI;
+ case TGSI_OPCODE_D2U:
+ return VGPU10_OPCODE_DTOU;
+ case TGSI_OPCODE_I2D:
+ return VGPU10_OPCODE_ITOD;
+ case TGSI_OPCODE_U2D:
+ return VGPU10_OPCODE_UTOD;
+
case TGSI_OPCODE_SAMPLE_POS:
/* Note: we never actually get this opcode because there's no GLSL
* function to query multisample resource sample positions. There's
@@ -761,66 +1079,60 @@ remap_temp_index(const struct svga_shader_emitter_v10 *emit,
/**
* Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
* Note: the operandType field must already be initialized.
+ * \param file the register file being accessed
+ * \param indirect using indirect addressing of the register file?
+ * \param index2D if true, 2-D indexing is being used (const or temp registers)
+ * \param indirect2D if true, 2-D indirect indexing being used (for const buf)
*/
static VGPU10OperandToken0
setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
VGPU10OperandToken0 operand0,
enum tgsi_file_type file,
- boolean indirect, boolean index2D,
- unsigned tempArrayID)
+ boolean indirect,
+ boolean index2D, bool indirect2D)
{
- unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+ VGPU10_OPERAND_INDEX_REPRESENTATION index0Rep, index1Rep;
+ VGPU10_OPERAND_INDEX_DIMENSION indexDim;
/*
* Compute index dimensions
*/
if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
- operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
+ operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
+ operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
+ operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
+ operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP ||
+ operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) {
/* there's no swizzle for in-line immediates */
indexDim = VGPU10_OPERAND_INDEX_0D;
assert(operand0.selectionMode == 0);
}
+ else if (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT) {
+ indexDim = VGPU10_OPERAND_INDEX_0D;
+ }
else {
- if (index2D ||
- tempArrayID > 0 ||
- operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
- indexDim = VGPU10_OPERAND_INDEX_2D;
- }
- else {
- indexDim = VGPU10_OPERAND_INDEX_1D;
- }
+ indexDim = index2D ? VGPU10_OPERAND_INDEX_2D : VGPU10_OPERAND_INDEX_1D;
}
/*
- * Compute index representations (immediate, relative, etc).
+ * Compute index representation(s) (immediate vs relative).
*/
- if (tempArrayID > 0) {
- assert(file == TGSI_FILE_TEMPORARY);
- /* First index is the array ID, second index is the array element */
- index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
- if (indirect) {
- index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
- }
- else {
- index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
- }
+ if (indexDim == VGPU10_OPERAND_INDEX_2D) {
+ index0Rep = indirect2D ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
+ : VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+ index1Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
+ : VGPU10_OPERAND_INDEX_IMMEDIATE32;
}
- else if (indirect) {
- if (file == TGSI_FILE_CONSTANT) {
- /* index[0] indicates which constant buffer while index[1] indicates
- * the position in the constant buffer.
- */
- index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
- index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
- }
- else {
- /* All other register files are 1-dimensional */
- index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
- }
+ else if (indexDim == VGPU10_OPERAND_INDEX_1D) {
+ index0Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
+ : VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+ index1Rep = 0;
}
else {
- index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
- index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+ index0Rep = 0;
+ index1Rep = 0;
}
operand0.indexDimension = indexDim;
@@ -879,13 +1191,18 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit,
const unsigned sem_index = emit->info.output_semantic_index[index];
unsigned writemask = reg->Register.WriteMask;
const boolean indirect = reg->Register.Indirect;
- const unsigned tempArrayId = get_temp_array_id(emit, file, index);
- const boolean index2d = reg->Register.Dimension;
+ unsigned tempArrayId = get_temp_array_id(emit, file, index);
+ boolean index2d = reg->Register.Dimension || tempArrayId > 0;
VGPU10OperandToken0 operand0;
+ if (file == TGSI_FILE_TEMPORARY) {
+ emit->temp_map[index].initialized = TRUE;
+ }
+
if (file == TGSI_FILE_OUTPUT) {
if (emit->unit == PIPE_SHADER_VERTEX ||
- emit->unit == PIPE_SHADER_GEOMETRY) {
+ emit->unit == PIPE_SHADER_GEOMETRY ||
+ emit->unit == PIPE_SHADER_TESS_EVAL) {
if (index == emit->vposition.out_index &&
emit->vposition.tmp_index != INVALID_INDEX) {
/* replace OUTPUT[POS] with TEMP[POS]. We need to store the
@@ -913,6 +1230,21 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit,
file = TGSI_FILE_TEMPORARY;
index = emit->clip_vertex_tmp_index;
}
+ else if (sem_name == TGSI_SEMANTIC_COLOR &&
+ emit->key.clamp_vertex_color) {
+
+ /* set the saturate modifier of the instruction
+ * to clamp the vertex color.
+ */
+ VGPU10OpcodeToken0 *token =
+ (VGPU10OpcodeToken0 *)emit->buf + emit->inst_start_token;
+ token->saturate = TRUE;
+ }
+ else if (sem_name == TGSI_SEMANTIC_VIEWPORT_INDEX &&
+ emit->gs.viewport_index_out_index != INVALID_INDEX) {
+ file = TGSI_FILE_TEMPORARY;
+ index = emit->gs.viewport_index_tmp_index;
+ }
}
else if (emit->unit == PIPE_SHADER_FRAGMENT) {
if (sem_name == TGSI_SEMANTIC_POSITION) {
@@ -955,6 +1287,116 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit,
emit->num_output_writes++;
}
}
+ else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
+ if (index == emit->tcs.inner.tgsi_index) {
+ /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
+ * in temporary for now so that will be store into appropriate
+ * registers in post_helper() in patch constant phase.
+ */
+ if (emit->tcs.control_point_phase) {
+ /* Discard writing into tessfactor in control point phase */
+ emit->discard_instruction = TRUE;
+ }
+ else {
+ file = TGSI_FILE_TEMPORARY;
+ index = emit->tcs.inner.temp_index;
+ }
+ }
+ else if (index == emit->tcs.outer.tgsi_index) {
+ /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
+ * in temporary for now so that will be store into appropriate
+ * registers in post_helper().
+ */
+ if (emit->tcs.control_point_phase) {
+ /* Discard writing into tessfactor in control point phase */
+ emit->discard_instruction = TRUE;
+ }
+ else {
+ file = TGSI_FILE_TEMPORARY;
+ index = emit->tcs.outer.temp_index;
+ }
+ }
+ else if (index >= emit->tcs.patch_generic_out_index &&
+ index < (emit->tcs.patch_generic_out_index +
+ emit->tcs.patch_generic_out_count)) {
+ if (emit->tcs.control_point_phase) {
+ /* Discard writing into generic patch constant outputs in
+ control point phase */
+ emit->discard_instruction = TRUE;
+ }
+ else {
+ if (emit->reemit_instruction) {
+ /* Store results of reemitted instruction in temporary register. */
+ file = TGSI_FILE_TEMPORARY;
+ index = emit->tcs.patch_generic_tmp_index +
+ (index - emit->tcs.patch_generic_out_index);
+ /**
+ * Temporaries for patch constant data can be done
+ * as indexable temporaries.
+ */
+ tempArrayId = get_temp_array_id(emit, file, index);
+ index2d = tempArrayId > 0;
+
+ emit->reemit_instruction = FALSE;
+ }
+ else {
+ /* If per-patch outputs is been read in shader, we
+ * reemit instruction and store results in temporaries in
+ * patch constant phase. */
+ if (emit->info.reads_perpatch_outputs) {
+ emit->reemit_instruction = TRUE;
+ }
+ }
+ }
+ }
+ else if (reg->Register.Dimension) {
+ /* Only control point outputs are declared 2D in tgsi */
+ if (emit->tcs.control_point_phase) {
+ if (emit->reemit_instruction) {
+ /* Store results of reemitted instruction in temporary register. */
+ index2d = FALSE;
+ file = TGSI_FILE_TEMPORARY;
+ index = emit->tcs.control_point_tmp_index +
+ (index - emit->tcs.control_point_out_index);
+ emit->reemit_instruction = FALSE;
+ }
+ else {
+ /* The mapped control point outputs are 1-D */
+ index2d = FALSE;
+ if (emit->info.reads_pervertex_outputs) {
+ /* If per-vertex outputs is been read in shader, we
+ * reemit instruction and store results in temporaries
+ * control point phase. */
+ emit->reemit_instruction = TRUE;
+ }
+ }
+
+ if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
+ emit->clip_dist_tmp_index != INVALID_INDEX) {
+ /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
+ * We store the clip distance in a temporary first, then
+ * we'll copy it to the shadow copy and to CLIPDIST with the
+ * enabled planes mask in emit_clip_distance_instructions().
+ */
+ file = TGSI_FILE_TEMPORARY;
+ index = emit->clip_dist_tmp_index + sem_index;
+ }
+ else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
+ emit->clip_vertex_tmp_index != INVALID_INDEX) {
+ /* replace the CLIPVERTEX output register with a temporary */
+ assert(emit->clip_mode == CLIP_VERTEX);
+ assert(sem_index == 0);
+ file = TGSI_FILE_TEMPORARY;
+ index = emit->clip_vertex_tmp_index;
+ }
+ }
+ else {
+ /* Discard writing into control point outputs in
+ patch constant phase */
+ emit->discard_instruction = TRUE;
+ }
+ }
+ }
}
/* init operand tokens to all zero */
@@ -977,7 +1419,7 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit,
check_register_index(emit, operand0.operandType, index);
operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
- index2d, tempArrayId);
+ index2d, FALSE);
/* Emit tokens */
emit_dword(emit, operand0.value);
@@ -994,6 +1436,28 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit,
/**
+ * Check if temporary register needs to be initialize when
+ * shader is not using indirect addressing for temporary and uninitialized
+ * temporary is not used in loop. In these two scenarios, we cannot
+ * determine if temporary is initialized or not.
+ */
+static boolean
+need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit,
+ unsigned index)
+{
+ if (!(emit->info.indirect_files & (1u << TGSI_FILE_TEMPORARY))
+ && emit->current_loop_depth == 0) {
+ if (!emit->temp_map[index].initialized &&
+ emit->temp_map[index].index < emit->num_shader_temps) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+
+/**
* Translate a src register of a TGSI instruction and emit VGPU10 tokens.
* In quite a few cases, we do register substitution. For example, if
* the TGSI register is the front/back-face register, we replace that with
@@ -1006,19 +1470,23 @@ emit_src_register(struct svga_shader_emitter_v10 *emit,
enum tgsi_file_type file = reg->Register.File;
unsigned index = reg->Register.Index;
const boolean indirect = reg->Register.Indirect;
- const unsigned tempArrayId = get_temp_array_id(emit, file, index);
- const boolean index2d = reg->Register.Dimension;
- const unsigned swizzleX = reg->Register.SwizzleX;
- const unsigned swizzleY = reg->Register.SwizzleY;
- const unsigned swizzleZ = reg->Register.SwizzleZ;
- const unsigned swizzleW = reg->Register.SwizzleW;
+ unsigned tempArrayId = get_temp_array_id(emit, file, index);
+ boolean index2d = (reg->Register.Dimension ||
+ tempArrayId > 0 ||
+ file == TGSI_FILE_CONSTANT);
+ unsigned index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
+ boolean indirect2d = reg->Dimension.Indirect;
+ unsigned swizzleX = reg->Register.SwizzleX;
+ unsigned swizzleY = reg->Register.SwizzleY;
+ unsigned swizzleZ = reg->Register.SwizzleZ;
+ unsigned swizzleW = reg->Register.SwizzleW;
const boolean absolute = reg->Register.Absolute;
const boolean negate = reg->Register.Negate;
- bool is_prim_id = FALSE;
-
VGPU10OperandToken0 operand0;
VGPU10OperandToken1 operand1;
+ operand0.value = operand1.value = 0;
+
if (emit->unit == PIPE_SHADER_FRAGMENT){
if (file == TGSI_FILE_INPUT) {
if (index == emit->fs.face_input_index) {
@@ -1031,6 +1499,12 @@ emit_src_register(struct svga_shader_emitter_v10 *emit,
file = TGSI_FILE_TEMPORARY;
index = emit->fs.fragcoord_tmp_index;
}
+ else if (index == emit->fs.layer_input_index) {
+ /* Replace INPUT[LAYER] with zero.x */
+ file = TGSI_FILE_IMMEDIATE;
+ index = emit->fs.layer_imm_index;
+ swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
+ }
else {
/* We remap fragment shader inputs to that FS input indexes
* match up with VS/GS output indexes.
@@ -1045,6 +1519,23 @@ emit_src_register(struct svga_shader_emitter_v10 *emit,
file = TGSI_FILE_TEMPORARY;
index = emit->fs.sample_pos_tmp_index;
}
+ else if (index == emit->fs.sample_mask_in_sys_index) {
+ /* Emitted as vCoverage0.x */
+ /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32)
+ * elements where s is the maximum number of color samples supported
+ * by the implementation. With current implementation, we should not
+ * have more than one element. So assert if Index != 0
+ */
+ assert((!reg->Register.Indirect && reg->Register.Index == 0) ||
+ reg->Register.Indirect);
+ operand0.value = 0;
+ operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
+ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+ operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
+ emit_dword(emit, operand0.value);
+ return;
+ }
else {
/* Map the TGSI system value to a VGPU10 input register */
assert(index < ARRAY_SIZE(emit->system_value_indexes));
@@ -1055,9 +1546,19 @@ emit_src_register(struct svga_shader_emitter_v10 *emit,
}
else if (emit->unit == PIPE_SHADER_GEOMETRY) {
if (file == TGSI_FILE_INPUT) {
- is_prim_id = (index == emit->gs.prim_id_index);
+ if (index == emit->gs.prim_id_index) {
+ operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+ operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
+ }
index = emit->linkage.input_map[index];
}
+ else if (file == TGSI_FILE_SYSTEM_VALUE &&
+ index == emit->gs.invocation_id_sys_index) {
+ /* Emitted as vGSInstanceID0.x */
+ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+ operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID;
+ index = 0;
+ }
}
else if (emit->unit == PIPE_SHADER_VERTEX) {
if (file == TGSI_FILE_INPUT) {
@@ -1080,23 +1581,178 @@ emit_src_register(struct svga_shader_emitter_v10 *emit,
index = emit->system_value_indexes[index];
}
}
+ else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
- operand0.value = operand1.value = 0;
+ if (file == TGSI_FILE_SYSTEM_VALUE) {
+ if (index == emit->tcs.vertices_per_patch_index) {
+ /**
+ * if source register is the system value for vertices_per_patch,
+ * replace it with the immediate.
+ */
+ file = TGSI_FILE_IMMEDIATE;
+ index = emit->tcs.imm_index;
+ swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
+ }
+ else if (index == emit->tcs.invocation_id_sys_index) {
+ if (emit->tcs.control_point_phase) {
+ /**
+ * Emitted as vOutputControlPointID.x
+ */
+ operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
+ operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID;
+ index = 0;
+ }
+ else {
+ /* There is no control point ID input declaration in
+ * the patch constant phase in hull shader.
+ * Since for now we are emitting all instructions in
+ * the patch constant phase, we are replacing the
+ * control point ID reference with the immediate 0.
+ */
+ file = TGSI_FILE_IMMEDIATE;
+ index = emit->tcs.imm_index;
+ swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_W;
+ }
+ }
+ else if (index == emit->tcs.prim_id_index) {
+ /**
+ * Emitted as vPrim.x
+ */
+ operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
+ operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
+ index = 0;
+ }
+ }
+ else if (file == TGSI_FILE_INPUT) {
+ index = emit->linkage.input_map[index];
+ if (!emit->tcs.control_point_phase) {
+ /* Emitted as vicp */
+ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+ operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
+ assert(reg->Register.Dimension);
+ }
+ }
+ else if (file == TGSI_FILE_OUTPUT) {
+ if ((index >= emit->tcs.patch_generic_out_index &&
+ index < (emit->tcs.patch_generic_out_index +
+ emit->tcs.patch_generic_out_count)) ||
+ index == emit->tcs.inner.tgsi_index ||
+ index == emit->tcs.outer.tgsi_index) {
+ if (emit->tcs.control_point_phase) {
+ emit->discard_instruction = TRUE;
+ }
+ else {
+ /* Device doesn't allow reading from output so
+ * use corresponding temporary register as source */
+ file = TGSI_FILE_TEMPORARY;
+ if (index == emit->tcs.inner.tgsi_index) {
+ index = emit->tcs.inner.temp_index;
+ }
+ else if (index == emit->tcs.outer.tgsi_index) {
+ index = emit->tcs.outer.temp_index;
+ }
+ else {
+ index = emit->tcs.patch_generic_tmp_index +
+ (index - emit->tcs.patch_generic_out_index);
+ }
- if (is_prim_id) {
- /* NOTE: we should be using VGPU10_OPERAND_1_COMPONENT here, but
- * our virtual GPU accepts this as-is.
- */
- operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
- operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
+ /**
+ * Temporaries for patch constant data can be done
+ * as indexable temporaries.
+ */
+ tempArrayId = get_temp_array_id(emit, file, index);
+ index2d = tempArrayId > 0;
+ index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
+ }
+ }
+ else if (index2d) {
+ if (emit->tcs.control_point_phase) {
+ /* Device doesn't allow reading from output so
+ * use corresponding temporary register as source */
+ file = TGSI_FILE_TEMPORARY;
+ index2d = FALSE;
+ index = emit->tcs.control_point_tmp_index +
+ (index - emit->tcs.control_point_out_index);
+ }
+ else {
+ emit->discard_instruction = TRUE;
+ }
+ }
+ }
}
- else {
+ else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
+ if (file == TGSI_FILE_SYSTEM_VALUE) {
+ if (index == emit->tes.tesscoord_sys_index) {
+ /**
+ * Emitted as vDomain
+ */
+ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+ operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT;
+ index = 0;
+ }
+ else if (index == emit->tes.inner.tgsi_index) {
+ file = TGSI_FILE_TEMPORARY;
+ index = emit->tes.inner.temp_index;
+ }
+ else if (index == emit->tes.outer.tgsi_index) {
+ file = TGSI_FILE_TEMPORARY;
+ index = emit->tes.outer.temp_index;
+ }
+ else if (index == emit->tes.prim_id_index) {
+ /**
+ * Emitted as vPrim.x
+ */
+ operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
+ operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
+ index = 0;
+ }
+
+ }
+ else if (file == TGSI_FILE_INPUT) {
+ if (index2d) {
+ /* 2D input is emitted as vcp (input control point). */
+ operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
+ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+
+ /* index specifies the element index and is remapped
+ * to align with the tcs output index.
+ */
+ index = emit->linkage.input_map[index];
+ }
+ else {
+ if (index < emit->key.tes.tessfactor_index)
+ /* index specifies the generic patch index.
+ * Remapped to match up with the tcs output index.
+ */
+ index = emit->linkage.input_map[index];
+
+ operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
+ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+ }
+ }
+ }
+
+ if (file == TGSI_FILE_ADDRESS) {
+ index = emit->address_reg_index[index];
+ file = TGSI_FILE_TEMPORARY;
+ }
+
+ if (file == TGSI_FILE_TEMPORARY) {
+ if (need_temp_reg_initialization(emit, index)) {
+ emit->initialize_temp_index = index;
+ emit->discard_instruction = TRUE;
+ }
+ }
+
+ if (operand0.value == 0) {
+ /* if operand0 was not set above for a special case, do the general
+ * case now.
+ */
operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
operand0.operandType = translate_register_file(file, tempArrayId > 0);
}
-
operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
- index2d, tempArrayId);
+ index2d, indirect2d);
if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
@@ -1149,13 +1805,12 @@ emit_src_register(struct svga_shader_emitter_v10 *emit,
}
else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
/* Emit the register index(es) */
- if (index2d ||
- operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
- emit_dword(emit, reg->Dimension.Index);
- }
+ if (index2d) {
+ emit_dword(emit, index2);
- if (tempArrayId > 0) {
- emit_dword(emit, tempArrayId);
+ if (indirect2d) {
+ emit_indirect_register(emit, reg->DimIndirect.Index);
+ }
}
emit_dword(emit, remap_temp_index(emit, file, index));
@@ -1271,12 +1926,34 @@ emit_rasterizer_register(struct svga_shader_emitter_v10 *emit)
/**
- * Emit the token for a VGPU10 opcode.
+ * Emit tokens for the "stream" register used by the
+ * DCL_STREAM, CUT_STREAM, EMIT_STREAM instructions.
+ */
+static void
+emit_stream_register(struct svga_shader_emitter_v10 *emit, unsigned index)
+{
+ VGPU10OperandToken0 operand0;
+
+ /* init */
+ operand0.value = 0;
+
+ /* No register index for rasterizer index (there's only one) */
+ operand0.operandType = VGPU10_OPERAND_TYPE_STREAM;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+ operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+
+ emit_dword(emit, operand0.value);
+ emit_dword(emit, index);
+}
+
+
+/**
+ * Emit the token for a VGPU10 opcode, with precise parameter.
* \param saturate clamp result to [0,1]?
*/
static void
-emit_opcode(struct svga_shader_emitter_v10 *emit,
- VGPU10_OPCODE_TYPE vgpu10_opcode, boolean saturate)
+emit_opcode_precise(struct svga_shader_emitter_v10 *emit,
+ unsigned vgpu10_opcode, boolean saturate, boolean precise)
{
VGPU10OpcodeToken0 token0;
@@ -1285,7 +1962,26 @@ emit_opcode(struct svga_shader_emitter_v10 *emit,
token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
token0.saturate = saturate;
+ /* Mesa's GLSL IR -> TGSI translator will set the TGSI precise flag for
+ * 'invariant' declarations. Only set preciseValues=1 if we have SM5.
+ */
+ token0.preciseValues = precise && emit->version >= 50;
+
emit_dword(emit, token0.value);
+
+ emit->uses_precise_qualifier |= token0.preciseValues;
+}
+
+
+/**
+ * Emit the token for a VGPU10 opcode.
+ * \param saturate clamp result to [0,1]?
+ */
+static void
+emit_opcode(struct svga_shader_emitter_v10 *emit,
+ unsigned vgpu10_opcode, boolean saturate)
+{
+ emit_opcode_precise(emit, vgpu10_opcode, saturate, FALSE);
}
@@ -1695,6 +2391,32 @@ find_immediate(struct svga_shader_emitter_v10 *emit,
/**
+ * As above, but search for a double[2] pair.
+ */
+static int
+find_immediate_dbl(struct svga_shader_emitter_v10 *emit,
+ double x, double y)
+{
+ const unsigned endIndex = emit->num_immediates;
+ unsigned i;
+
+ assert(emit->immediates_emitted);
+
+ /* Search immediates for x, y, z, w */
+ for (i = 0; i < endIndex; i++) {
+ if (x == emit->immediates_dbl[i][0] &&
+ y == emit->immediates_dbl[i][1]) {
+ return i;
+ }
+ }
+ /* Should never try to use an immediate value that wasn't pre-declared */
+ assert(!"find_immediate_dbl() failed!");
+ return -1;
+}
+
+
+
+/**
* Return a tgsi_full_src_register for an immediate/literal
* union tgsi_immediate_data[4] value.
* Note: the values must have been previously declared/allocated in
@@ -1831,6 +2553,26 @@ make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
}
+static struct tgsi_full_src_register
+make_immediate_reg_double(struct svga_shader_emitter_v10 *emit, double value)
+{
+ struct tgsi_full_src_register reg;
+ int immpos = find_immediate_dbl(emit, value, value);
+
+ assert(immpos >= 0);
+
+ memset(&reg, 0, sizeof(reg));
+ reg.Register.File = TGSI_FILE_IMMEDIATE;
+ reg.Register.Index = immpos;
+ reg.Register.SwizzleX = TGSI_SWIZZLE_X;
+ reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
+ reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
+ reg.Register.SwizzleW = TGSI_SWIZZLE_W;
+
+ return reg;
+}
+
+
/**
* Allocate space for a union tgsi_immediate_data[4] immediate.
* \return the index/position of the immediate.
@@ -1884,6 +2626,20 @@ alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
}
+static unsigned
+alloc_immediate_double2(struct svga_shader_emitter_v10 *emit,
+ double x, double y)
+{
+ unsigned n = emit->num_immediates++;
+ assert(!emit->immediates_emitted);
+ assert(n < ARRAY_SIZE(emit->immediates));
+ emit->immediates_dbl[n][0] = x;
+ emit->immediates_dbl[n][1] = y;
+ return n;
+
+}
+
+
/**
* Allocate a shader input to store a system value.
*/
@@ -2057,8 +2813,39 @@ emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
emit->gs.max_out_vertices = prop->u[0].Data;
break;
- default:
+ case TGSI_PROPERTY_GS_INVOCATIONS:
+ emit->gs.invocations = prop->u[0].Data;
+ break;
+
+ case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
+ case TGSI_PROPERTY_NEXT_SHADER:
+ case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
+ /* no-op */
break;
+
+ case TGSI_PROPERTY_TCS_VERTICES_OUT:
+ emit->tcs.vertices_out = prop->u[0].Data;
+ break;
+
+ case TGSI_PROPERTY_TES_PRIM_MODE:
+ emit->tes.prim_mode = prop->u[0].Data;
+ break;
+
+ case TGSI_PROPERTY_TES_SPACING:
+ emit->tes.spacing = prop->u[0].Data;
+ break;
+
+ case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
+ emit->tes.vertices_order_cw = prop->u[0].Data;
+ break;
+
+ case TGSI_PROPERTY_TES_POINT_MODE:
+ emit->tes.point_mode = prop->u[0].Data;
+ break;
+
+ default:
+ debug_printf("Unexpected TGSI property %s\n",
+ tgsi_property_names[prop->Property.PropertyName]);
}
return TRUE;
@@ -2094,16 +2881,498 @@ emit_property_instructions(struct svga_shader_emitter_v10 *emit)
opcode0.primitive = emit->gs.prim_type;
emit_property_instruction(emit, opcode0, 0, 0);
- /* emit output primitive topology declaration */
- opcode0.value = 0;
- opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
- opcode0.primitiveTopology = emit->gs.prim_topology;
- emit_property_instruction(emit, opcode0, 0, 0);
-
/* emit max output vertices */
opcode0.value = 0;
opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
+
+ if (emit->version >= 50 && emit->gs.invocations > 0) {
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT;
+ emit_property_instruction(emit, opcode0, 1, emit->gs.invocations);
+ }
+}
+
+
+/**
+ * A helper function to declare tessellator domain in a hull shader or
+ * in the domain shader.
+ */
+static void
+emit_tessellator_domain(struct svga_shader_emitter_v10 *emit,
+ enum pipe_prim_type prim_mode)
+{
+ VGPU10OpcodeToken0 opcode0;
+
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_DOMAIN;
+ switch (prim_mode) {
+ case PIPE_PRIM_QUADS:
+ case PIPE_PRIM_LINES:
+ opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_QUAD;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_TRI;
+ break;
+ default:
+ debug_printf("Invalid tessellator prim mode %d\n", prim_mode);
+ opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_UNDEFINED;
+ }
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ end_emit_instruction(emit);
+}
+
+
+/**
+ * Emit domain shader declarations.
+ */
+static void
+emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ VGPU10OpcodeToken0 opcode0;
+
+ assert(emit->unit == PIPE_SHADER_TESS_EVAL);
+
+ /* Emit the input control point count */
+ assert(emit->key.tes.vertices_per_patch > 0 &&
+ emit->key.tes.vertices_per_patch <= 32);
+
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
+ opcode0.controlPointCount = emit->key.tes.vertices_per_patch;
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ end_emit_instruction(emit);
+
+ emit_tessellator_domain(emit, emit->tes.prim_mode);
+}
+
+
+/**
+ * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
+ * to implement some instructions. We pre-allocate those values here
+ * in the immediate constant buffer.
+ */
+static void
+alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned n = 0;
+
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
+
+ if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) {
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f);
+ }
+
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_int4(emit, 0, 1, 0, -1);
+
+ if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 ||
+ emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) {
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_int4(emit, 31, 0, 0, 0);
+ }
+
+ if (emit->info.opcode_count[TGSI_OPCODE_UBFE] > 0 ||
+ emit->info.opcode_count[TGSI_OPCODE_IBFE] > 0 ||
+ emit->info.opcode_count[TGSI_OPCODE_BFI] > 0) {
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_int4(emit, 32, 0, 0, 0);
+ }
+
+ if (emit->key.vs.attrib_puint_to_snorm) {
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f);
+ }
+
+ if (emit->key.vs.attrib_puint_to_uscaled) {
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
+ }
+
+ if (emit->key.vs.attrib_puint_to_sscaled) {
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_int4(emit, 22, 12, 2, 0);
+
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_int4(emit, 22, 30, 0, 0);
+ }
+
+ if (emit->vposition.num_prescale > 1) {
+ unsigned i;
+ for (i = 0; i < emit->vposition.num_prescale; i+=4) {
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_int4(emit, i, i+1, i+2, i+3);
+ }
+ }
+
+ emit->immediates_dbl = (double (*)[2]) emit->immediates;
+
+ if (emit->info.opcode_count[TGSI_OPCODE_DNEG] > 0) {
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_double2(emit, -1.0, -1.0);
+ }
+
+ if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0) {
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_double2(emit, 0.0, 0.0);
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_double2(emit, 1.0, 1.0);
+ }
+
+ if (emit->info.opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0) {
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_float4(emit, 16.0f, -16.0f, 0.0, 0.0);
+ }
+
+ assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
+
+ unsigned i;
+
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ if (emit->key.tex[i].texel_bias) {
+ /* Replace 0.0f if more immediate float value is needed */
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f);
+ break;
+ }
+ }
+
+ assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
+ emit->num_common_immediates = n;
+}
+
+
+/**
+ * Emit hull shader declarations.
+*/
+static void
+emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ VGPU10OpcodeToken0 opcode0;
+
+ /* Emit the input control point count */
+ assert(emit->key.tcs.vertices_per_patch > 0 &&
+ emit->key.tcs.vertices_per_patch <= 32);
+
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
+ opcode0.controlPointCount = emit->key.tcs.vertices_per_patch;
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ end_emit_instruction(emit);
+
+ /* Emit the output control point count */
+ assert(emit->tcs.vertices_out >= 0 && emit->tcs.vertices_out <= 32);
+
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT;
+ opcode0.controlPointCount = emit->tcs.vertices_out;
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ end_emit_instruction(emit);
+
+ /* Emit tessellator domain */
+ emit_tessellator_domain(emit, emit->key.tcs.prim_mode);
+
+ /* Emit tessellator output primitive */
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE;
+ if (emit->key.tcs.point_mode) {
+ opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_POINT;
+ }
+ else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) {
+ opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_LINE;
+ }
+ else {
+ assert(emit->key.tcs.prim_mode == PIPE_PRIM_QUADS ||
+ emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES);
+
+ if (emit->key.tcs.vertices_order_cw)
+ opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CCW;
+ else
+ opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CW;
+ }
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ end_emit_instruction(emit);
+
+ /* Emit tessellator partitioning */
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_PARTITIONING;
+ switch (emit->key.tcs.spacing) {
+ case PIPE_TESS_SPACING_FRACTIONAL_ODD:
+ opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
+ break;
+ case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
+ opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
+ break;
+ case PIPE_TESS_SPACING_EQUAL:
+ opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_INTEGER;
+ break;
+ default:
+ debug_printf("invalid tessellator spacing %d\n", emit->key.tcs.spacing);
+ opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_UNDEFINED;
+ }
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ end_emit_instruction(emit);
+
+ /* Declare constant registers */
+ emit_constant_declaration(emit);
+
+ /* Declare samplers and resources */
+ emit_sampler_declarations(emit);
+ emit_resource_declarations(emit);
+
+ alloc_common_immediates(emit);
+
+ int nVertices = emit->key.tcs.vertices_per_patch;
+ emit->tcs.imm_index =
+ alloc_immediate_int4(emit, nVertices, nVertices, nVertices, 0);
+
+ /* Now, emit the constant block containing all the immediates
+ * declared by shader, as well as the extra ones seen above.
+ */
+ emit_vgpu10_immediates_block(emit);
+
+}
+
+
+/**
+ * A helper function to determine if control point phase is needed.
+ * Returns TRUE if there is control point output.
+ */
+static boolean
+needs_control_point_phase(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned i;
+
+ assert(emit->unit == PIPE_SHADER_TESS_CTRL);
+
+ /* If output control point count does not match the input count,
+ * we need a control point phase to explicitly set the output control
+ * points.
+ */
+ if (emit->key.tcs.vertices_per_patch != emit->tcs.vertices_out)
+ return TRUE;
+
+ for (i = 0; i < emit->info.num_outputs; i++) {
+ switch (emit->info.output_semantic_name[i]) {
+ case TGSI_SEMANTIC_PATCH:
+ case TGSI_SEMANTIC_TESSOUTER:
+ case TGSI_SEMANTIC_TESSINNER:
+ break;
+ default:
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
+/**
+ * Start the hull shader control point phase
+ */
+static boolean
+emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit)
+{
+ VGPU10OpcodeToken0 opcode0;
+
+ /* If there is no control point output, skip the control point phase. */
+ if (!needs_control_point_phase(emit))
+ return FALSE;
+
+ /* Start the control point phase in the hull shader */
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_HS_CONTROL_POINT_PHASE;
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ end_emit_instruction(emit);
+
+ /* Declare the output control point ID */
+ if (emit->tcs.invocation_id_sys_index == INVALID_INDEX) {
+ /* Add invocation id declaration if it does not exist */
+ emit->tcs.invocation_id_sys_index = emit->info.num_system_values + 1;
+ }
+
+ emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
+ VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID,
+ VGPU10_OPERAND_INDEX_0D,
+ 0, 1,
+ VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_0_COMPONENT, 0,
+ 0,
+ VGPU10_INTERPOLATION_CONSTANT, TRUE,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
+
+ if (emit->tcs.prim_id_index != INVALID_INDEX) {
+ emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
+ VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
+ VGPU10_OPERAND_INDEX_0D,
+ 0, 1,
+ VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_0_COMPONENT,
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+ 0,
+ VGPU10_INTERPOLATION_UNDEFINED, TRUE,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
+ }
+
+ return TRUE;
+}
+
+
+/**
+ * Start the hull shader patch constant phase and
+ * do the second pass of the tcs translation and emit
+ * the relevant declarations and instructions for this phase.
+ */
+static boolean
+emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit,
+ struct tgsi_parse_context *parse)
+{
+ unsigned inst_number = 0;
+ boolean ret = TRUE;
+ VGPU10OpcodeToken0 opcode0;
+
+ emit->skip_instruction = FALSE;
+
+ /* Start the patch constant phase */
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_HS_FORK_PHASE;
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ end_emit_instruction(emit);
+
+ /* Set the current phase to patch constant phase */
+ emit->tcs.control_point_phase = FALSE;
+
+ if (emit->tcs.prim_id_index != INVALID_INDEX) {
+ emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
+ VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
+ VGPU10_OPERAND_INDEX_0D,
+ 0, 1,
+ VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_0_COMPONENT,
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+ 0,
+ VGPU10_INTERPOLATION_UNDEFINED, TRUE,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
+ }
+
+ /* Emit declarations for this phase */
+ emit->index_range.required =
+ emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE;
+ emit_tcs_input_declarations(emit);
+
+ if (emit->index_range.start_index != INVALID_INDEX) {
+ emit_index_range_declaration(emit);
+ }
+
+ emit->index_range.required =
+ emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE;
+ emit_tcs_output_declarations(emit);
+
+ if (emit->index_range.start_index != INVALID_INDEX) {
+ emit_index_range_declaration(emit);
+ }
+ emit->index_range.required = FALSE;
+
+ emit_temporaries_declaration(emit);
+
+ /* Reset the token position to the first instruction token
+ * in preparation for the second pass of the shader
+ */
+ parse->Position = emit->tcs.instruction_token_pos;
+
+ while (!tgsi_parse_end_of_tokens(parse)) {
+ tgsi_parse_token(parse);
+
+ assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
+ ret = emit_vgpu10_instruction(emit, inst_number++,
+ &parse->FullToken.FullInstruction);
+
+ /* Usually this applies to TCS only. If shader is reading output of
+ * patch constant in fork phase, we should reemit all instructions
+ * which are writting into ouput of patch constant in fork phase
+ * to store results into temporaries.
+ */
+ if (emit->reemit_instruction) {
+ assert(emit->unit == PIPE_SHADER_TESS_CTRL);
+ ret = emit_vgpu10_instruction(emit, inst_number,
+ &parse->FullToken.FullInstruction);
+ }
+
+ if (!ret)
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+/**
+ * Emit index range declaration.
+ */
+static boolean
+emit_index_range_declaration(struct svga_shader_emitter_v10 *emit)
+{
+ if (emit->version < 50)
+ return TRUE;
+
+ assert(emit->index_range.start_index != INVALID_INDEX);
+ assert(emit->index_range.count != 0);
+ assert(emit->index_range.required);
+ assert(emit->index_range.operandType != VGPU10_NUM_OPERANDS);
+ assert(emit->index_range.dim != 0);
+ assert(emit->index_range.size != 0);
+
+ VGPU10OpcodeToken0 opcode0;
+ VGPU10OperandToken0 operand0;
+
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEX_RANGE;
+
+ operand0.value = 0;
+ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+ operand0.indexDimension = emit->index_range.dim;
+ operand0.operandType = emit->index_range.operandType;
+ operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+ operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+ if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D)
+ operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+ begin_emit_instruction(emit);
+ emit_dword(emit, opcode0.value);
+ emit_dword(emit, operand0.value);
+
+ if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) {
+ emit_dword(emit, emit->index_range.size);
+ emit_dword(emit, emit->index_range.start_index);
+ emit_dword(emit, emit->index_range.count);
+ }
+ else {
+ emit_dword(emit, emit->index_range.start_index);
+ emit_dword(emit, emit->index_range.count);
+ }
+
+ end_emit_instruction(emit);
+
+ /* Reset fields in emit->index_range struct except
+ * emit->index_range.required which will be reset afterwards
+ */
+ emit->index_range.count = 0;
+ emit->index_range.operandType = VGPU10_NUM_OPERANDS;
+ emit->index_range.start_index = INVALID_INDEX;
+ emit->index_range.size = 0;
+ emit->index_range.dim = 0;
+
+ return TRUE;
}
@@ -2123,8 +3392,14 @@ emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
{
assert(opcode0.opcodeType);
assert(operand0.mask ||
+ (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT) ||
(operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) ||
- (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK));
+ (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK) ||
+ (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) ||
+ (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) ||
+ (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID) ||
+ (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK) ||
+ (operand0.operandType == VGPU10_OPERAND_TYPE_STREAM));
begin_emit_instruction(emit);
emit_dword(emit, opcode0.value);
@@ -2159,7 +3434,8 @@ emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
* \param index the input register index
* \param size array size of the operand. In most cases, it is 1,
* but for inputs to geometry shader, the array size varies
- * depending on the primitive type.
+ * depending on the primitive type. For tessellation control
+ * shader, the array size is the vertex count per patch.
* \param name one of VGPU10_NAME_x
* \parma numComp number of components
* \param selMode component selection mode
@@ -2176,7 +3452,9 @@ emit_input_declaration(struct svga_shader_emitter_v10 *emit,
VGPU10_OPERAND_NUM_COMPONENTS numComp,
VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,
unsigned usageMask,
- VGPU10_INTERPOLATION_MODE interpMode)
+ VGPU10_INTERPOLATION_MODE interpMode,
+ boolean addSignature,
+ SVGA3dDXSignatureSemanticName sgnName)
{
VGPU10OpcodeToken0 opcode0;
VGPU10OperandToken0 operand0;
@@ -2185,11 +3463,22 @@ emit_input_declaration(struct svga_shader_emitter_v10 *emit,
assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
+ opcodeType == VGPU10_OPCODE_DCL_INPUT_SGV ||
opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV ||
opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
- operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
+ operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
+ operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK ||
+ operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
+ operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID ||
+ operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT ||
+ operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT ||
+ operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT ||
+ operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
+ operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID ||
+ operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
+
assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
assert(dim <= VGPU10_OPERAND_INDEX_3D);
@@ -2199,7 +3488,9 @@ emit_input_declaration(struct svga_shader_emitter_v10 *emit,
name == VGPU10_NAME_VERTEX_ID ||
name == VGPU10_NAME_PRIMITIVE_ID ||
name == VGPU10_NAME_IS_FRONT_FACE ||
- name == VGPU10_NAME_SAMPLE_INDEX);
+ name == VGPU10_NAME_SAMPLE_INDEX ||
+ name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
+ name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
interpMode == VGPU10_INTERPOLATION_CONSTANT ||
@@ -2229,6 +3520,78 @@ emit_input_declaration(struct svga_shader_emitter_v10 *emit,
name_token.name = name;
emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
+
+ if (addSignature) {
+ struct svga_shader_signature *sgn = &emit->signature;
+ if (operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT) {
+ /* Set patch constant signature */
+ SVGA3dDXShaderSignatureEntry *sgnEntry =
+ &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
+ set_shader_signature_entry(sgnEntry, index,
+ sgnName, usageMask,
+ SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
+ SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
+
+ } else if (operandType == VGPU10_OPERAND_TYPE_INPUT ||
+ operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT) {
+ /* Set input signature */
+ SVGA3dDXShaderSignatureEntry *sgnEntry =
+ &sgn->inputs[sgn->header.numInputSignatures++];
+ set_shader_signature_entry(sgnEntry, index,
+ sgnName, usageMask,
+ SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
+ SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
+ }
+ }
+
+ if (emit->index_range.required) {
+ /* Here, index_range declaration is only applicable for opcodeType
+ * VGPU10_OPCODE_DCL_INPUT and VGPU10_OPCODE_DCL_INPUT_PS and
+ * for operandType VGPU10_OPERAND_TYPE_INPUT,
+ * VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT and
+ * VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT.
+ */
+ if ((opcodeType != VGPU10_OPCODE_DCL_INPUT &&
+ opcodeType != VGPU10_OPCODE_DCL_INPUT_PS) ||
+ (operandType != VGPU10_OPERAND_TYPE_INPUT &&
+ operandType != VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT &&
+ operandType != VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT)) {
+ if (emit->index_range.start_index != INVALID_INDEX) {
+ emit_index_range_declaration(emit);
+ }
+ return;
+ }
+
+ if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
+ /* Need record new index_range */
+ emit->index_range.count = 1;
+ emit->index_range.operandType = operandType;
+ emit->index_range.start_index = index;
+ emit->index_range.size = size;
+ emit->index_range.dim = dim;
+ }
+ else if (index !=
+ (emit->index_range.start_index + emit->index_range.count) ||
+ emit->index_range.operandType != operandType) {
+ /* Input index is not contiguous with index range or operandType is
+ * different from index range's operandType. We need to emit current
+ * index_range first and then start recording next index range.
+ */
+ emit_index_range_declaration(emit);
+
+ emit->index_range.count = 1;
+ emit->index_range.operandType = operandType;
+ emit->index_range.start_index = index;
+ emit->index_range.size = size;
+ emit->index_range.dim = dim;
+ }
+ else if (emit->index_range.operandType == operandType) {
+ /* Since input index is contiguous with index range and operandType
+ * is same as index range's operandType, increment index range count.
+ */
+ emit->index_range.count++;
+ }
+ }
}
@@ -2243,13 +3606,15 @@ static void
emit_output_declaration(struct svga_shader_emitter_v10 *emit,
VGPU10_OPCODE_TYPE type, unsigned index,
VGPU10_SYSTEM_NAME name,
- unsigned usageMask)
+ unsigned writemask,
+ boolean addSignature,
+ SVGA3dDXSignatureSemanticName sgnName)
{
VGPU10OpcodeToken0 opcode0;
VGPU10OperandToken0 operand0;
VGPU10NameToken name_token;
- assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+ assert(writemask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
@@ -2257,6 +3622,7 @@ emit_output_declaration(struct svga_shader_emitter_v10 *emit,
name == VGPU10_NAME_POSITION ||
name == VGPU10_NAME_PRIMITIVE_ID ||
name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
+ name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX ||
name == VGPU10_NAME_CLIP_DISTANCE);
check_register_index(emit, type, index);
@@ -2267,13 +3633,66 @@ emit_output_declaration(struct svga_shader_emitter_v10 *emit,
operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
- operand0.mask = usageMask;
+ operand0.mask = writemask;
operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
name_token.name = name;
emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
+
+ /* Capture output signature */
+ if (addSignature) {
+ struct svga_shader_signature *sgn = &emit->signature;
+ SVGA3dDXShaderSignatureEntry *sgnEntry =
+ &sgn->outputs[sgn->header.numOutputSignatures++];
+ set_shader_signature_entry(sgnEntry, index,
+ sgnName, writemask,
+ SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
+ SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
+ }
+
+ if (emit->index_range.required) {
+ /* Here, index_range declaration is only applicable for opcodeType
+ * VGPU10_OPCODE_DCL_OUTPUT and for operandType
+ * VGPU10_OPERAND_TYPE_OUTPUT.
+ */
+ if (type != VGPU10_OPCODE_DCL_OUTPUT) {
+ if (emit->index_range.start_index != INVALID_INDEX) {
+ emit_index_range_declaration(emit);
+ }
+ return;
+ }
+
+ if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
+ /* Need record new index_range */
+ emit->index_range.count = 1;
+ emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
+ emit->index_range.start_index = index;
+ emit->index_range.size = 1;
+ emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
+ }
+ else if (index !=
+ (emit->index_range.start_index + emit->index_range.count)) {
+ /* Output index is not contiguous with index range. We need to
+ * emit current index_range first and then start recording next
+ * index range.
+ */
+ emit_index_range_declaration(emit);
+
+ emit->index_range.count = 1;
+ emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
+ emit->index_range.start_index = index;
+ emit->index_range.size = 1;
+ emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
+ }
+ else {
+ /* Since output index is contiguous with index range, increment
+ * index range count.
+ */
+ emit->index_range.count++;
+ }
+ }
}
@@ -2327,6 +3746,563 @@ emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit)
/**
+ * Emit output declarations for fragment shader.
+ */
+static void
+emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned int i;
+
+ for (i = 0; i < emit->info.num_outputs; i++) {
+ /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
+ const enum tgsi_semantic semantic_name =
+ emit->info.output_semantic_name[i];
+ const unsigned semantic_index = emit->info.output_semantic_index[i];
+ unsigned index = i;
+
+ if (semantic_name == TGSI_SEMANTIC_COLOR) {
+ assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index));
+
+ emit->fs.color_out_index[semantic_index] = index;
+
+ emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs,
+ index + 1);
+
+ /* The semantic index is the shader's color output/buffer index */
+ emit_output_declaration(emit,
+ VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
+ VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ TRUE,
+ map_tgsi_semantic_to_sgn_name(semantic_name));
+
+ if (semantic_index == 0) {
+ if (emit->key.fs.write_color0_to_n_cbufs > 1) {
+ /* Emit declarations for the additional color outputs
+ * for broadcasting.
+ */
+ unsigned j;
+ for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
+ /* Allocate a new output index */
+ unsigned idx = emit->info.num_outputs + j - 1;
+ emit->fs.color_out_index[j] = idx;
+ emit_output_declaration(emit,
+ VGPU10_OPCODE_DCL_OUTPUT, idx,
+ VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ TRUE,
+ map_tgsi_semantic_to_sgn_name(semantic_name));
+ emit->info.output_semantic_index[idx] = j;
+ }
+
+ emit->fs.num_color_outputs =
+ emit->key.fs.write_color0_to_n_cbufs;
+ }
+ }
+ else {
+ assert(!emit->key.fs.write_color0_to_n_cbufs);
+ }
+ }
+ else if (semantic_name == TGSI_SEMANTIC_POSITION) {
+ /* Fragment depth output */
+ emit_fragdepth_output_declaration(emit);
+ }
+ else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) {
+ /* Sample mask output */
+ emit_samplemask_output_declaration(emit);
+ }
+ else {
+ assert(!"Bad output semantic name");
+ }
+ }
+}
+
+
+/**
+ * Emit common output declaration for vertex processing.
+ */
+static void
+emit_vertex_output_declaration(struct svga_shader_emitter_v10 *emit,
+ unsigned index, unsigned writemask,
+ boolean addSignature)
+{
+ const enum tgsi_semantic semantic_name =
+ emit->info.output_semantic_name[index];
+ const unsigned semantic_index = emit->info.output_semantic_index[index];
+ unsigned name, type;
+ unsigned final_mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+
+ assert(emit->unit != PIPE_SHADER_FRAGMENT &&
+ emit->unit != PIPE_SHADER_COMPUTE);
+
+ switch (semantic_name) {
+ case TGSI_SEMANTIC_POSITION:
+ if (emit->unit == PIPE_SHADER_TESS_CTRL) {
+ /* position will be declared in control point only */
+ assert(emit->tcs.control_point_phase);
+ type = VGPU10_OPCODE_DCL_OUTPUT;
+ name = VGPU10_NAME_UNDEFINED;
+ emit_output_declaration(emit, type, index, name, final_mask, TRUE,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
+ return;
+ }
+ else {
+ type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
+ name = VGPU10_NAME_POSITION;
+ }
+ /* Save the index of the vertex position output register */
+ emit->vposition.out_index = index;
+ break;
+ case TGSI_SEMANTIC_CLIPDIST:
+ type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
+ name = VGPU10_NAME_CLIP_DISTANCE;
+ /* save the starting index of the clip distance output register */
+ if (semantic_index == 0)
+ emit->clip_dist_out_index = index;
+ final_mask = apply_clip_plane_mask(emit, writemask, semantic_index);
+ if (final_mask == 0x0)
+ return; /* discard this do-nothing declaration */
+ break;
+ case TGSI_SEMANTIC_CLIPVERTEX:
+ type = VGPU10_OPCODE_DCL_OUTPUT;
+ name = VGPU10_NAME_UNDEFINED;
+ emit->clip_vertex_out_index = index;
+ break;
+ default:
+ /* generic output */
+ type = VGPU10_OPCODE_DCL_OUTPUT;
+ name = VGPU10_NAME_UNDEFINED;
+ }
+
+ emit_output_declaration(emit, type, index, name, final_mask, addSignature,
+ map_tgsi_semantic_to_sgn_name(semantic_name));
+}
+
+
+/**
+ * Emit declaration for outputs in vertex shader.
+ */
+static void
+emit_vs_output_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned i;
+ for (i = 0; i < emit->info.num_outputs; i++) {
+ emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE);
+ }
+}
+
+
+/**
+ * A helper function to determine the writemask for an output
+ * for the specified stream.
+ */
+static unsigned
+output_writemask_for_stream(unsigned stream, ubyte output_streams,
+ ubyte output_usagemask)
+{
+ unsigned i;
+ unsigned writemask = 0;
+
+ for (i = 0; i < 4; i++) {
+ if ((output_streams & 0x3) == stream)
+ writemask |= (VGPU10_OPERAND_4_COMPONENT_MASK_X << i);
+ output_streams >>= 2;
+ }
+ return writemask & output_usagemask;
+}
+
+
+/**
+ * Emit declaration for outputs in geometry shader.
+ */
+static void
+emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned i;
+ VGPU10OpcodeToken0 opcode0;
+ unsigned numStreamsSupported = 1;
+ int s;
+
+ if (emit->version >= 50) {
+ numStreamsSupported = ARRAY_SIZE(emit->info.num_stream_output_components);
+ }
+
+ /**
+ * Start emitting from the last stream first, so we end with
+ * stream 0, so any of the auxiliary output declarations will
+ * go to stream 0.
+ */
+ for (s = numStreamsSupported-1; s >= 0; s--) {
+
+ if (emit->info.num_stream_output_components[s] == 0)
+ continue;
+
+ if (emit->version >= 50) {
+ /* DCL_STREAM stream */
+ begin_emit_instruction(emit);
+ emit_opcode(emit, VGPU10_OPCODE_DCL_STREAM, FALSE);
+ emit_stream_register(emit, s);
+ end_emit_instruction(emit);
+ }
+
+ /* emit output primitive topology declaration */
+ opcode0.value = 0;
+ opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
+ opcode0.primitiveTopology = emit->gs.prim_topology;
+ emit_property_instruction(emit, opcode0, 0, 0);
+
+ for (i = 0; i < emit->info.num_outputs; i++) {
+ unsigned writemask;
+
+ /* find out the writemask for this stream */
+ writemask = output_writemask_for_stream(s, emit->info.output_streams[i],
+ emit->output_usage_mask[i]);
+
+ if (writemask) {
+ enum tgsi_semantic semantic_name =
+ emit->info.output_semantic_name[i];
+
+ /* TODO: Still need to take care of a special case where a
+ * single varying spans across multiple output registers.
+ */
+ switch(semantic_name) {
+ case TGSI_SEMANTIC_PRIMID:
+ emit_output_declaration(emit,
+ VGPU10_OPCODE_DCL_OUTPUT_SGV, i,
+ VGPU10_NAME_PRIMITIVE_ID,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ FALSE,
+ map_tgsi_semantic_to_sgn_name(semantic_name));
+ break;
+ case TGSI_SEMANTIC_LAYER:
+ emit_output_declaration(emit,
+ VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
+ VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX,
+ VGPU10_OPERAND_4_COMPONENT_MASK_X,
+ FALSE,
+ map_tgsi_semantic_to_sgn_name(semantic_name));
+ break;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ emit_output_declaration(emit,
+ VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
+ VGPU10_NAME_VIEWPORT_ARRAY_INDEX,
+ VGPU10_OPERAND_4_COMPONENT_MASK_X,
+ FALSE,
+ map_tgsi_semantic_to_sgn_name(semantic_name));
+ emit->gs.viewport_index_out_index = i;
+ break;
+ default:
+ emit_vertex_output_declaration(emit, i, writemask, FALSE);
+ }
+ }
+ }
+ }
+
+ /* For geometry shader outputs, it is possible the same register is
+ * declared multiple times for different streams. So to avoid
+ * redundant signature entries, geometry shader output signature is done
+ * outside of the declaration.
+ */
+ struct svga_shader_signature *sgn = &emit->signature;
+ SVGA3dDXShaderSignatureEntry *sgnEntry;
+
+ for (i = 0; i < emit->info.num_outputs; i++) {
+ if (emit->output_usage_mask[i]) {
+ enum tgsi_semantic sem_name = emit->info.output_semantic_name[i];
+
+ sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
+ set_shader_signature_entry(sgnEntry, i,
+ map_tgsi_semantic_to_sgn_name(sem_name),
+ emit->output_usage_mask[i],
+ SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
+ SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
+ }
+ }
+}
+
+
+/**
+ * Emit the declaration for the tess inner/outer output.
+ * \param opcodeType either VGPU10_OPCODE_DCL_OUTPUT_SIV or _INPUT_SIV
+ * \param operandType either VGPU10_OPERAND_TYPE_OUTPUT or _INPUT
+ * \param name VGPU10_NAME_FINAL_*_TESSFACTOR value
+ */
+static void
+emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit,
+ unsigned index, unsigned opcodeType,
+ unsigned operandType, VGPU10_SYSTEM_NAME name,
+ SVGA3dDXSignatureSemanticName sgnName)
+{
+ VGPU10OpcodeToken0 opcode0;
+ VGPU10OperandToken0 operand0;
+ VGPU10NameToken name_token;
+
+ assert(emit->version >= 50);
+ assert(name >= VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR ||
+ (emit->key.tcs.prim_mode == PIPE_PRIM_LINES &&
+ name == VGPU10_NAME_UNDEFINED));
+ assert(name <= VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
+
+ assert(operandType == VGPU10_OPERAND_TYPE_OUTPUT ||
+ operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
+
+ opcode0.value = operand0.value = name_token.value = 0;
+
+ opcode0.opcodeType = opcodeType;
+ operand0.operandType = operandType;
+ operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+ operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
+ operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
+ operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
+ operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
+
+ name_token.name = name;
+ emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
+
+ /* Capture patch constant signature */
+ struct svga_shader_signature *sgn = &emit->signature;
+ SVGA3dDXShaderSignatureEntry *sgnEntry =
+ &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
+ set_shader_signature_entry(sgnEntry, index,
+ sgnName, SVGA3DWRITEMASK_0,
+ SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
+ SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
+}
+
+
+/**
+ * Emit output declarations for tessellation control shader.
+ */
+static void
+emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned int i;
+ unsigned outputIndex = emit->num_outputs;
+ struct svga_shader_signature *sgn = &emit->signature;
+
+ /**
+ * Initialize patch_generic_out_count so it won't be counted twice
+ * since this function is called twice, one for control point phase
+ * and another time for patch constant phase.
+ */
+ emit->tcs.patch_generic_out_count = 0;
+
+ for (i = 0; i < emit->info.num_outputs; i++) {
+ unsigned index = i;
+ const enum tgsi_semantic semantic_name =
+ emit->info.output_semantic_name[i];
+
+ switch (semantic_name) {
+ case TGSI_SEMANTIC_TESSINNER:
+ emit->tcs.inner.tgsi_index = i;
+
+ /* skip per-patch output declarations in control point phase */
+ if (emit->tcs.control_point_phase)
+ break;
+
+ emit->tcs.inner.out_index = outputIndex;
+ switch (emit->key.tcs.prim_mode) {
+ case PIPE_PRIM_QUADS:
+ emit_tesslevel_declaration(emit, outputIndex++,
+ VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
+ VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
+
+ emit_tesslevel_declaration(emit, outputIndex++,
+ VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
+ VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ emit_tesslevel_declaration(emit, outputIndex++,
+ VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
+ VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
+ break;
+ case PIPE_PRIM_LINES:
+ break;
+ default:
+ debug_printf("Unsupported primitive type");
+ }
+ break;
+
+ case TGSI_SEMANTIC_TESSOUTER:
+ emit->tcs.outer.tgsi_index = i;
+
+ /* skip per-patch output declarations in control point phase */
+ if (emit->tcs.control_point_phase)
+ break;
+
+ emit->tcs.outer.out_index = outputIndex;
+ switch (emit->key.tcs.prim_mode) {
+ case PIPE_PRIM_QUADS:
+ for (int j = 0; j < 4; j++) {
+ emit_tesslevel_declaration(emit, outputIndex++,
+ VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
+ VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j);
+ }
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ for (int j = 0; j < 3; j++) {
+ emit_tesslevel_declaration(emit, outputIndex++,
+ VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
+ VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j);
+ }
+ break;
+ case PIPE_PRIM_LINES:
+ for (int j = 0; j < 2; j++) {
+ emit_tesslevel_declaration(emit, outputIndex++,
+ VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
+ VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j);
+ }
+ break;
+ default:
+ debug_printf("Unsupported primitive type");
+ }
+ break;
+
+ case TGSI_SEMANTIC_PATCH:
+ if (emit->tcs.patch_generic_out_index == INVALID_INDEX)
+ emit->tcs.patch_generic_out_index= i;
+ emit->tcs.patch_generic_out_count++;
+
+ /* skip per-patch output declarations in control point phase */
+ if (emit->tcs.control_point_phase)
+ break;
+
+ emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, index,
+ VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ FALSE,
+ map_tgsi_semantic_to_sgn_name(semantic_name));
+
+ SVGA3dDXShaderSignatureEntry *sgnEntry =
+ &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
+ set_shader_signature_entry(sgnEntry, index,
+ map_tgsi_semantic_to_sgn_name(semantic_name),
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
+ SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
+
+ break;
+
+ default:
+ /* save the starting index of control point outputs */
+ if (emit->tcs.control_point_out_index == INVALID_INDEX)
+ emit->tcs.control_point_out_index = i;
+ emit->tcs.control_point_out_count++;
+
+ /* skip control point output declarations in patch constant phase */
+ if (!emit->tcs.control_point_phase)
+ break;
+
+ emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i],
+ TRUE);
+
+ }
+ }
+
+ if (emit->tcs.control_point_phase) {
+ /**
+ * Add missing control point output in control point phase.
+ */
+ if (emit->tcs.control_point_out_index == INVALID_INDEX) {
+ /* use register index after tessellation factors */
+ switch (emit->key.tcs.prim_mode) {
+ case PIPE_PRIM_QUADS:
+ emit->tcs.control_point_out_index = outputIndex + 6;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ emit->tcs.control_point_out_index = outputIndex + 4;
+ break;
+ default:
+ emit->tcs.control_point_out_index = outputIndex + 2;
+ break;
+ }
+ emit->tcs.control_point_out_count++;
+ emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV,
+ emit->tcs.control_point_out_index,
+ VGPU10_NAME_POSITION,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ TRUE,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
+
+ /* If tcs does not output any control point output,
+ * we can end the hull shader control point phase here
+ * after emitting the default control point output.
+ */
+ emit->skip_instruction = TRUE;
+ }
+ }
+ else {
+ if (emit->tcs.outer.out_index == INVALID_INDEX) {
+ /* since the TCS did not declare out outer tess level output register,
+ * we declare it here for patch constant phase only.
+ */
+ emit->tcs.outer.out_index = outputIndex;
+ if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
+ for (int i = 0; i < 4; i++) {
+ emit_tesslevel_declaration(emit, outputIndex++,
+ VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
+ VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
+ }
+ }
+ else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
+ for (int i = 0; i < 3; i++) {
+ emit_tesslevel_declaration(emit, outputIndex++,
+ VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
+ VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
+ }
+ }
+ }
+
+ if (emit->tcs.inner.out_index == INVALID_INDEX) {
+ /* since the TCS did not declare out inner tess level output register,
+ * we declare it here
+ */
+ emit->tcs.inner.out_index = outputIndex;
+ if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
+ emit_tesslevel_declaration(emit, outputIndex++,
+ VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
+ VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
+ emit_tesslevel_declaration(emit, outputIndex++,
+ VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
+ VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
+ }
+ else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
+ emit_tesslevel_declaration(emit, outputIndex++,
+ VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
+ VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
+ }
+ }
+ }
+ emit->num_outputs = outputIndex;
+}
+
+
+/**
+ * Emit output declarations for tessellation evaluation shader.
+ */
+static void
+emit_tes_output_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned int i;
+
+ for (i = 0; i < emit->info.num_outputs; i++) {
+ emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE);
+ }
+}
+
+
+/**
* Emit the declaration for a system value input/output.
*/
static void
@@ -2344,7 +4320,8 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
VGPU10_OPERAND_4_COMPONENT,
VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
VGPU10_OPERAND_4_COMPONENT_MASK_X,
- VGPU10_INTERPOLATION_UNDEFINED);
+ VGPU10_INTERPOLATION_UNDEFINED, TRUE,
+ map_tgsi_semantic_to_sgn_name(semantic_name));
break;
case TGSI_SEMANTIC_VERTEXID:
index = alloc_system_value_index(emit, index);
@@ -2356,7 +4333,8 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
VGPU10_OPERAND_4_COMPONENT,
VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
VGPU10_OPERAND_4_COMPONENT_MASK_X,
- VGPU10_INTERPOLATION_UNDEFINED);
+ VGPU10_INTERPOLATION_UNDEFINED, TRUE,
+ map_tgsi_semantic_to_sgn_name(semantic_name));
break;
case TGSI_SEMANTIC_SAMPLEID:
assert(emit->unit == PIPE_SHADER_FRAGMENT);
@@ -2370,7 +4348,8 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
VGPU10_OPERAND_4_COMPONENT,
VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
VGPU10_OPERAND_4_COMPONENT_MASK_X,
- VGPU10_INTERPOLATION_CONSTANT);
+ VGPU10_INTERPOLATION_CONSTANT, TRUE,
+ map_tgsi_semantic_to_sgn_name(semantic_name));
break;
case TGSI_SEMANTIC_SAMPLEPOS:
/* This system value contains the position of the current sample
@@ -2382,9 +4361,118 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
emit->fs.sample_pos_sys_index = index;
index = alloc_system_value_index(emit, index);
break;
+ case TGSI_SEMANTIC_INVOCATIONID:
+ /* Note: invocation id input is mapped to different register depending
+ * on the shader type. In GS, it will be mapped to vGSInstanceID#.
+ * In TCS, it will be mapped to vOutputControlPointID#.
+ * Since in both cases, the mapped name is unique rather than
+ * just a generic input name ("v#"), so there is no need to remap
+ * the index value.
+ */
+ assert(emit->unit == PIPE_SHADER_GEOMETRY ||
+ emit->unit == PIPE_SHADER_TESS_CTRL);
+ assert(emit->version >= 50);
+
+ if (emit->unit == PIPE_SHADER_GEOMETRY) {
+ emit->gs.invocation_id_sys_index = index;
+ emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
+ VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID,
+ VGPU10_OPERAND_INDEX_0D,
+ index, 1,
+ VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_0_COMPONENT,
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+ 0,
+ VGPU10_INTERPOLATION_UNDEFINED, TRUE,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
+ } else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
+ /* The emission of the control point id will be done
+ * in the control point phase in emit_hull_shader_control_point_phase().
+ */
+ emit->tcs.invocation_id_sys_index = index;
+ }
+ break;
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ /* Note: the PS sample mask input has a unique name ("vCoverage#")
+ * rather than just a generic input name ("v#") so no need to remap the
+ * index value.
+ */
+ assert(emit->unit == PIPE_SHADER_FRAGMENT);
+ assert(emit->version >= 50);
+ emit->fs.sample_mask_in_sys_index = index;
+ emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
+ VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK,
+ VGPU10_OPERAND_INDEX_0D,
+ index, 1,
+ VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_1_COMPONENT,
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+ 0,
+ VGPU10_INTERPOLATION_CONSTANT, TRUE,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
+ break;
+ case TGSI_SEMANTIC_TESSCOORD:
+ assert(emit->version >= 50);
+
+ unsigned usageMask = 0;
+
+ if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) {
+ usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XYZ;
+ }
+ else if (emit->tes.prim_mode == PIPE_PRIM_LINES ||
+ emit->tes.prim_mode == PIPE_PRIM_QUADS) {
+ usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XY;
+ }
+
+ emit->tes.tesscoord_sys_index = index;
+ emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
+ VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT,
+ VGPU10_OPERAND_INDEX_0D,
+ index, 1,
+ VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_4_COMPONENT,
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+ usageMask,
+ VGPU10_INTERPOLATION_UNDEFINED, TRUE,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
+ break;
+ case TGSI_SEMANTIC_TESSINNER:
+ assert(emit->version >= 50);
+ emit->tes.inner.tgsi_index = index;
+ break;
+ case TGSI_SEMANTIC_TESSOUTER:
+ assert(emit->version >= 50);
+ emit->tes.outer.tgsi_index = index;
+ break;
+ case TGSI_SEMANTIC_VERTICESIN:
+ assert(emit->unit == PIPE_SHADER_TESS_CTRL);
+ assert(emit->version >= 50);
+
+ /* save the system value index */
+ emit->tcs.vertices_per_patch_index = index;
+ break;
+ case TGSI_SEMANTIC_PRIMID:
+ assert(emit->version >= 50);
+ if (emit->unit == PIPE_SHADER_TESS_CTRL) {
+ emit->tcs.prim_id_index = index;
+ }
+ else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
+ emit->tes.prim_id_index = index;
+ emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
+ VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
+ VGPU10_OPERAND_INDEX_0D,
+ index, 1,
+ VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_0_COMPONENT,
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+ 0,
+ VGPU10_INTERPOLATION_UNDEFINED, TRUE,
+ map_tgsi_semantic_to_sgn_name(semantic_name));
+ }
+ break;
default:
- debug_printf("unexpected sytem value semantic index %u\n",
- semantic_name);
+ debug_printf("unexpected system value semantic index %u / %s\n",
+ semantic_name, tgsi_semantic_names[semantic_name]);
}
}
@@ -2414,24 +4502,12 @@ emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
* and the size of the array.
*/
const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
- unsigned i;
-
assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
/* Save this array so we can emit the declaration for it later */
- emit->temp_arrays[arrayID].start = decl->Range.First;
- emit->temp_arrays[arrayID].size =
- decl->Range.Last - decl->Range.First + 1;
-
- emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
- assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
- emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
-
- /* Fill in the temp_map entries for this array */
- for (i = decl->Range.First; i <= decl->Range.Last; i++) {
- emit->temp_map[i].arrayId = arrayID;
- emit->temp_map[i].index = i - decl->Range.First;
- }
+ create_temp_array(emit, arrayID, decl->Range.First,
+ decl->Range.Last - decl->Range.First + 1,
+ decl->Range.First);
}
/* for all temps, indexed or not, keep track of highest index */
@@ -2514,275 +4590,472 @@ emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
/**
- * Emit all input declarations.
+ * Emit input declarations for fragment shader.
*/
-static boolean
-emit_input_declarations(struct svga_shader_emitter_v10 *emit)
+static void
+emit_fs_input_declarations(struct svga_shader_emitter_v10 *emit)
{
unsigned i;
- if (emit->unit == PIPE_SHADER_FRAGMENT) {
-
- for (i = 0; i < emit->linkage.num_inputs; i++) {
- enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
- unsigned usage_mask = emit->info.input_usage_mask[i];
- unsigned index = emit->linkage.input_map[i];
- VGPU10_OPCODE_TYPE type;
- VGPU10_INTERPOLATION_MODE interpolationMode;
- VGPU10_SYSTEM_NAME name;
-
- if (usage_mask == 0)
- continue; /* register is not actually used */
-
- if (semantic_name == TGSI_SEMANTIC_POSITION) {
- /* fragment position input */
- type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
- interpolationMode = VGPU10_INTERPOLATION_LINEAR;
- name = VGPU10_NAME_POSITION;
- if (usage_mask & TGSI_WRITEMASK_W) {
- /* we need to replace use of 'w' with '1/w' */
- emit->fs.fragcoord_input_index = i;
- }
+ for (i = 0; i < emit->linkage.num_inputs; i++) {
+ enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
+ unsigned usage_mask = emit->info.input_usage_mask[i];
+ unsigned index = emit->linkage.input_map[i];
+ unsigned type, interpolationMode, name;
+ unsigned mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+
+ if (usage_mask == 0)
+ continue; /* register is not actually used */
+
+ if (semantic_name == TGSI_SEMANTIC_POSITION) {
+ /* fragment position input */
+ type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
+ interpolationMode = VGPU10_INTERPOLATION_LINEAR;
+ name = VGPU10_NAME_POSITION;
+ if (usage_mask & TGSI_WRITEMASK_W) {
+ /* we need to replace use of 'w' with '1/w' */
+ emit->fs.fragcoord_input_index = i;
}
- else if (semantic_name == TGSI_SEMANTIC_FACE) {
- /* fragment front-facing input */
- type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
- interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
- name = VGPU10_NAME_IS_FRONT_FACE;
- emit->fs.face_input_index = i;
- }
- else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
- /* primitive ID */
- type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
- interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
- name = VGPU10_NAME_PRIMITIVE_ID;
- }
- else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) {
- /* sample index / ID */
+ }
+ else if (semantic_name == TGSI_SEMANTIC_FACE) {
+ /* fragment front-facing input */
+ type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
+ interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
+ name = VGPU10_NAME_IS_FRONT_FACE;
+ emit->fs.face_input_index = i;
+ }
+ else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
+ /* primitive ID */
+ type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
+ interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
+ name = VGPU10_NAME_PRIMITIVE_ID;
+ }
+ else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) {
+ /* sample index / ID */
+ type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
+ interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
+ name = VGPU10_NAME_SAMPLE_INDEX;
+ }
+ else if (semantic_name == TGSI_SEMANTIC_LAYER) {
+ /* render target array index */
+ if (emit->key.fs.layer_to_zero) {
+ /**
+ * The shader from the previous stage does not write to layer,
+ * so reading the layer index in fragment shader should return 0.
+ */
+ emit->fs.layer_input_index = i;
+ continue;
+ } else {
type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
- name = VGPU10_NAME_SAMPLE_INDEX;
+ name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
+ mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
}
- else {
- /* general fragment input */
- type = VGPU10_OPCODE_DCL_INPUT_PS;
- interpolationMode =
+ }
+ else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) {
+ /* viewport index */
+ type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
+ interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
+ name = VGPU10_NAME_VIEWPORT_ARRAY_INDEX;
+ mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
+ }
+ else {
+ /* general fragment input */
+ type = VGPU10_OPCODE_DCL_INPUT_PS;
+ interpolationMode =
translate_interpolation(emit,
emit->info.input_interpolate[i],
emit->info.input_interpolate_loc[i]);
- /* keeps track if flat interpolation mode is being used */
- emit->uses_flat_interp |=
+ /* keeps track if flat interpolation mode is being used */
+ emit->uses_flat_interp = emit->uses_flat_interp ||
(interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
- name = VGPU10_NAME_UNDEFINED;
- }
-
- emit_input_declaration(emit, type,
- VGPU10_OPERAND_TYPE_INPUT,
- VGPU10_OPERAND_INDEX_1D, index, 1,
- name,
- VGPU10_OPERAND_4_COMPONENT,
- VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
- VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
- interpolationMode);
+ name = VGPU10_NAME_UNDEFINED;
}
+
+ emit_input_declaration(emit, type,
+ VGPU10_OPERAND_TYPE_INPUT,
+ VGPU10_OPERAND_INDEX_1D, index, 1,
+ name,
+ VGPU10_OPERAND_4_COMPONENT,
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+ mask,
+ interpolationMode, TRUE,
+ map_tgsi_semantic_to_sgn_name(semantic_name));
}
- else if (emit->unit == PIPE_SHADER_GEOMETRY) {
+}
- for (i = 0; i < emit->info.num_inputs; i++) {
- enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
- unsigned usage_mask = emit->info.input_usage_mask[i];
- unsigned index = emit->linkage.input_map[i];
- VGPU10_OPCODE_TYPE opcodeType, operandType;
- VGPU10_OPERAND_NUM_COMPONENTS numComp;
- VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode;
- VGPU10_SYSTEM_NAME name;
- VGPU10_OPERAND_INDEX_DIMENSION dim;
-
- if (usage_mask == 0)
- continue; /* register is not actually used */
-
- opcodeType = VGPU10_OPCODE_DCL_INPUT;
- operandType = VGPU10_OPERAND_TYPE_INPUT;
- numComp = VGPU10_OPERAND_4_COMPONENT;
- selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
- name = VGPU10_NAME_UNDEFINED;
- /* all geometry shader inputs are two dimensional except
- * gl_PrimitiveID
+/**
+ * Emit input declarations for vertex shader.
+ */
+static void
+emit_vs_input_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned i;
+
+ for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
+ unsigned usage_mask = emit->info.input_usage_mask[i];
+ unsigned index = i;
+
+ if (usage_mask == 0)
+ continue; /* register is not actually used */
+
+ emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
+ VGPU10_OPERAND_TYPE_INPUT,
+ VGPU10_OPERAND_INDEX_1D, index, 1,
+ VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_4_COMPONENT,
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ VGPU10_INTERPOLATION_UNDEFINED, TRUE,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
+ }
+}
+
+
+/**
+ * Emit input declarations for geometry shader.
+ */
+static void
+emit_gs_input_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned i;
+
+ for (i = 0; i < emit->info.num_inputs; i++) {
+ enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
+ unsigned usage_mask = emit->info.input_usage_mask[i];
+ unsigned index = emit->linkage.input_map[i];
+ unsigned opcodeType, operandType;
+ unsigned numComp, selMode;
+ unsigned name;
+ unsigned dim;
+
+ if (usage_mask == 0)
+ continue; /* register is not actually used */
+
+ opcodeType = VGPU10_OPCODE_DCL_INPUT;
+ operandType = VGPU10_OPERAND_TYPE_INPUT;
+ numComp = VGPU10_OPERAND_4_COMPONENT;
+ selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
+ name = VGPU10_NAME_UNDEFINED;
+
+ /* all geometry shader inputs are two dimensional except
+ * gl_PrimitiveID
+ */
+ dim = VGPU10_OPERAND_INDEX_2D;
+
+ if (semantic_name == TGSI_SEMANTIC_PRIMID) {
+ /* Primitive ID */
+ operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
+ dim = VGPU10_OPERAND_INDEX_0D;
+ numComp = VGPU10_OPERAND_0_COMPONENT;
+ selMode = 0;
+
+ /* also save the register index so we can check for
+ * primitive id when emit src register. We need to modify the
+ * operand type, index dimension when emit primitive id src reg.
*/
- dim = VGPU10_OPERAND_INDEX_2D;
+ emit->gs.prim_id_index = i;
+ }
+ else if (semantic_name == TGSI_SEMANTIC_POSITION) {
+ /* vertex position input */
+ opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
+ name = VGPU10_NAME_POSITION;
+ }
- if (semantic_name == TGSI_SEMANTIC_PRIMID) {
- /* Primitive ID */
- operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
- dim = VGPU10_OPERAND_INDEX_0D;
- numComp = VGPU10_OPERAND_0_COMPONENT;
- selMode = 0;
+ emit_input_declaration(emit, opcodeType, operandType,
+ dim, index,
+ emit->gs.input_size,
+ name,
+ numComp, selMode,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ VGPU10_INTERPOLATION_UNDEFINED, TRUE,
+ map_tgsi_semantic_to_sgn_name(semantic_name));
+ }
+}
- /* also save the register index so we can check for
- * primitive id when emit src register. We need to modify the
- * operand type, index dimension when emit primitive id src reg.
- */
- emit->gs.prim_id_index = i;
- }
- else if (semantic_name == TGSI_SEMANTIC_POSITION) {
- /* vertex position input */
- opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
- name = VGPU10_NAME_POSITION;
- }
- emit_input_declaration(emit, opcodeType, operandType,
- dim, index,
- emit->gs.input_size,
- name,
- numComp, selMode,
- VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
- VGPU10_INTERPOLATION_UNDEFINED);
+/**
+ * Emit input declarations for tessellation control shader.
+ */
+static void
+emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ unsigned i;
+ unsigned size = emit->key.tcs.vertices_per_patch;
+ unsigned indicesMask = 0;
+
+ for (i = 0; i < emit->info.num_inputs; i++) {
+ unsigned usage_mask = emit->info.input_usage_mask[i];
+ unsigned index = emit->linkage.input_map[i];
+ enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
+ VGPU10_SYSTEM_NAME name = VGPU10_NAME_UNDEFINED;
+ VGPU10_OPERAND_TYPE operandType = VGPU10_OPERAND_TYPE_INPUT;
+ boolean addSignature = TRUE;
+
+ /* indices that are declared */
+ indicesMask |= 1 << index;
+
+ if (semantic_name == TGSI_SEMANTIC_POSITION ||
+ index == emit->linkage.position_index) {
+ /* save the input control point index for later use */
+ emit->tcs.control_point_input_index = i;
+ }
+ else if (usage_mask == 0) {
+ continue; /* register is not actually used */
+ }
+
+ /* input control points in the patch constant phase are emitted in the
+ * vicp register rather than the v register.
+ */
+ if (!emit->tcs.control_point_phase) {
+ operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
+ addSignature = emit->tcs.control_point_out_count == 0;
}
+
+ /* Tessellation control shader inputs are two dimensional.
+ * The array size is determined by the patch vertex count.
+ */
+ emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
+ operandType,
+ VGPU10_OPERAND_INDEX_2D,
+ index, size, name,
+ VGPU10_OPERAND_4_COMPONENT,
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ VGPU10_INTERPOLATION_UNDEFINED,
+ addSignature,
+ map_tgsi_semantic_to_sgn_name(semantic_name));
+
+ }
+
+ if (emit->tcs.control_point_phase) {
+ if (emit->tcs.control_point_input_index == INVALID_INDEX) {
+
+ /* Add input control point declaration if it does not exist */
+ if ((indicesMask & (1 << emit->linkage.position_index)) == 0) {
+ emit->linkage.input_map[emit->linkage.num_inputs] =
+ emit->linkage.position_index;
+ emit->tcs.control_point_input_index = emit->linkage.num_inputs++;
+
+ emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
+ VGPU10_OPERAND_TYPE_INPUT,
+ VGPU10_OPERAND_INDEX_2D,
+ emit->linkage.position_index,
+ emit->key.tcs.vertices_per_patch,
+ VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_4_COMPONENT,
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ VGPU10_INTERPOLATION_UNDEFINED, TRUE,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
+ }
+ }
+
+ /* Also add an address register for the indirection to the
+ * input control points
+ */
+ emit->tcs.control_point_addr_index = emit->num_address_regs++;
}
- else {
- assert(emit->unit == PIPE_SHADER_VERTEX);
+}
- for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
- unsigned usage_mask = emit->info.input_usage_mask[i];
- unsigned index = i;
- if (usage_mask == 0)
- continue; /* register is not actually used */
+static void
+emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit)
+{
- emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
- VGPU10_OPERAND_TYPE_INPUT,
- VGPU10_OPERAND_INDEX_1D, index, 1,
- VGPU10_NAME_UNDEFINED,
- VGPU10_OPERAND_4_COMPONENT,
- VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
- VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
- VGPU10_INTERPOLATION_UNDEFINED);
+ /* In tcs, tess factors are emitted as extra outputs.
+ * The starting register index for the tess factors is captured
+ * in the compile key.
+ */
+ unsigned inputIndex = emit->key.tes.tessfactor_index;
+
+ if (emit->tes.prim_mode == PIPE_PRIM_QUADS) {
+ if (emit->key.tes.need_tessouter) {
+ emit->tes.outer.in_index = inputIndex;
+ for (int i = 0; i < 4; i++) {
+ emit_tesslevel_declaration(emit, inputIndex++,
+ VGPU10_OPCODE_DCL_INPUT_SIV,
+ VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
+ VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
+ }
+ }
+
+ if (emit->key.tes.need_tessinner) {
+ emit->tes.inner.in_index = inputIndex;
+ emit_tesslevel_declaration(emit, inputIndex++,
+ VGPU10_OPCODE_DCL_INPUT_SIV,
+ VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
+ VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
+
+ emit_tesslevel_declaration(emit, inputIndex++,
+ VGPU10_OPCODE_DCL_INPUT_SIV,
+ VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
+ VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
}
}
+ else if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) {
+ if (emit->key.tes.need_tessouter) {
+ emit->tes.outer.in_index = inputIndex;
+ for (int i = 0; i < 3; i++) {
+ emit_tesslevel_declaration(emit, inputIndex++,
+ VGPU10_OPCODE_DCL_INPUT_SIV,
+ VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
+ VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
+ }
+ }
- return TRUE;
+ if (emit->key.tes.need_tessinner) {
+ emit->tes.inner.in_index = inputIndex;
+ emit_tesslevel_declaration(emit, inputIndex++,
+ VGPU10_OPCODE_DCL_INPUT_SIV,
+ VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
+ VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
+ }
+ }
+ else if (emit->tes.prim_mode == PIPE_PRIM_LINES) {
+ if (emit->key.tes.need_tessouter) {
+ emit->tes.outer.in_index = inputIndex;
+ emit_tesslevel_declaration(emit, inputIndex++,
+ VGPU10_OPCODE_DCL_INPUT_SIV,
+ VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
+ VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
+
+ emit_tesslevel_declaration(emit, inputIndex++,
+ VGPU10_OPCODE_DCL_INPUT_SIV,
+ VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
+ VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
+ }
+ }
}
/**
- * Emit all output declarations.
+ * Emit input declarations for tessellation evaluation shader.
*/
-static boolean
-emit_output_declarations(struct svga_shader_emitter_v10 *emit)
+static void
+emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit)
{
unsigned i;
- for (i = 0; i < emit->info.num_outputs; i++) {
- /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
+ for (i = 0; i < emit->info.num_inputs; i++) {
+ unsigned usage_mask = emit->info.input_usage_mask[i];
+ unsigned index = emit->linkage.input_map[i];
+ unsigned size;
const enum tgsi_semantic semantic_name =
- emit->info.output_semantic_name[i];
- const unsigned semantic_index = emit->info.output_semantic_index[i];
- unsigned index = i;
+ emit->info.input_semantic_name[i];
+ SVGA3dDXSignatureSemanticName sgn_name;
+ VGPU10_OPERAND_TYPE operandType;
+ VGPU10_OPERAND_INDEX_DIMENSION dim;
+
+ if (usage_mask == 0)
+ usage_mask = 1; /* at least set usage mask to one */
+
+ if (semantic_name == TGSI_SEMANTIC_PATCH) {
+ operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
+ dim = VGPU10_OPERAND_INDEX_1D;
+ size = 1;
+ sgn_name = map_tgsi_semantic_to_sgn_name(semantic_name);
+ }
+ else {
+ operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
+ dim = VGPU10_OPERAND_INDEX_2D;
+ size = emit->key.tes.vertices_per_patch;
+ sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
+ }
+
+ emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, operandType,
+ dim, index, size, VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_4_COMPONENT,
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ VGPU10_INTERPOLATION_UNDEFINED,
+ TRUE, sgn_name);
+ }
- if (emit->unit == PIPE_SHADER_FRAGMENT) {
- if (semantic_name == TGSI_SEMANTIC_COLOR) {
- assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index));
+ emit_tessfactor_input_declarations(emit);
+}
- emit->fs.color_out_index[semantic_index] = index;
- emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs,
- index + 1);
+/**
+ * Emit all input declarations.
+ */
+static boolean
+emit_input_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ emit->index_range.required =
+ emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE;
- /* The semantic index is the shader's color output/buffer index */
- emit_output_declaration(emit,
- VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
- VGPU10_NAME_UNDEFINED,
- VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+ switch (emit->unit) {
+ case PIPE_SHADER_FRAGMENT:
+ emit_fs_input_declarations(emit);
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ emit_gs_input_declarations(emit);
+ break;
+ case PIPE_SHADER_VERTEX:
+ emit_vs_input_declarations(emit);
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ emit_tcs_input_declarations(emit);
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ emit_tes_input_declarations(emit);
+ break;
+ case PIPE_SHADER_COMPUTE:
+ //XXX emit_cs_input_declarations(emit);
+ break;
+ default:
+ assert(0);
+ }
- if (semantic_index == 0) {
- if (emit->key.fs.write_color0_to_n_cbufs > 1) {
- /* Emit declarations for the additional color outputs
- * for broadcasting.
- */
- unsigned j;
- for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
- /* Allocate a new output index */
- unsigned idx = emit->info.num_outputs + j - 1;
- emit->fs.color_out_index[j] = idx;
- emit_output_declaration(emit,
- VGPU10_OPCODE_DCL_OUTPUT, idx,
- VGPU10_NAME_UNDEFINED,
- VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
- emit->info.output_semantic_index[idx] = j;
- }
+ if (emit->index_range.start_index != INVALID_INDEX) {
+ emit_index_range_declaration(emit);
+ }
+ emit->index_range.required = FALSE;
+ return TRUE;
+}
- emit->fs.num_color_outputs =
- emit->key.fs.write_color0_to_n_cbufs;
- }
- }
- else {
- assert(!emit->key.fs.write_color0_to_n_cbufs);
- }
- }
- else if (semantic_name == TGSI_SEMANTIC_POSITION) {
- /* Fragment depth output */
- emit_fragdepth_output_declaration(emit);
- }
- else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) {
- /* Fragment depth output */
- emit_samplemask_output_declaration(emit);
- }
- else {
- assert(!"Bad output semantic name");
- }
- }
- else {
- /* VS or GS */
- VGPU10_COMPONENT_NAME name;
- VGPU10_OPCODE_TYPE type;
- unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
-
- switch (semantic_name) {
- case TGSI_SEMANTIC_POSITION:
- assert(emit->unit != PIPE_SHADER_FRAGMENT);
- type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
- name = VGPU10_NAME_POSITION;
- /* Save the index of the vertex position output register */
- emit->vposition.out_index = index;
- break;
- case TGSI_SEMANTIC_CLIPDIST:
- type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
- name = VGPU10_NAME_CLIP_DISTANCE;
- /* save the starting index of the clip distance output register */
- if (semantic_index == 0)
- emit->clip_dist_out_index = index;
- writemask = emit->output_usage_mask[index];
- writemask = apply_clip_plane_mask(emit, writemask, semantic_index);
- if (writemask == 0x0) {
- continue; /* discard this do-nothing declaration */
- }
- break;
- case TGSI_SEMANTIC_PRIMID:
- assert(emit->unit == PIPE_SHADER_GEOMETRY);
- type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
- name = VGPU10_NAME_PRIMITIVE_ID;
- break;
- case TGSI_SEMANTIC_LAYER:
- assert(emit->unit == PIPE_SHADER_GEOMETRY);
- type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
- name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
- break;
- case TGSI_SEMANTIC_CLIPVERTEX:
- type = VGPU10_OPCODE_DCL_OUTPUT;
- name = VGPU10_NAME_UNDEFINED;
- emit->clip_vertex_out_index = index;
- break;
- default:
- /* generic output */
- type = VGPU10_OPCODE_DCL_OUTPUT;
- name = VGPU10_NAME_UNDEFINED;
- }
- emit_output_declaration(emit, type, index, name, writemask);
- }
+/**
+ * Emit all output declarations.
+ */
+static boolean
+emit_output_declarations(struct svga_shader_emitter_v10 *emit)
+{
+ emit->index_range.required =
+ emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE;
+
+ switch (emit->unit) {
+ case PIPE_SHADER_FRAGMENT:
+ emit_fs_output_declarations(emit);
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ emit_gs_output_declarations(emit);
+ break;
+ case PIPE_SHADER_VERTEX:
+ emit_vs_output_declarations(emit);
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ emit_tcs_output_declarations(emit);
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ emit_tes_output_declarations(emit);
+ break;
+ case PIPE_SHADER_COMPUTE:
+ //XXX emit_cs_output_declarations(emit);
+ break;
+ default:
+ assert(0);
}
if (emit->vposition.so_index != INVALID_INDEX &&
@@ -2796,7 +5069,9 @@ emit_output_declarations(struct svga_shader_emitter_v10 *emit)
emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
emit->vposition.so_index,
VGPU10_NAME_UNDEFINED,
- VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ TRUE,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
}
if (emit->clip_dist_so_index != INVALID_INDEX &&
@@ -2811,22 +5086,56 @@ emit_output_declarations(struct svga_shader_emitter_v10 *emit)
emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
emit->clip_dist_so_index,
VGPU10_NAME_UNDEFINED,
- emit->output_usage_mask[emit->clip_dist_out_index]);
+ emit->output_usage_mask[emit->clip_dist_out_index],
+ TRUE,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
if (emit->info.num_written_clipdistance > 4) {
/* for the second clip distance register, each handles 4 planes */
emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
emit->clip_dist_so_index + 1,
VGPU10_NAME_UNDEFINED,
- emit->output_usage_mask[emit->clip_dist_out_index+1]);
+ emit->output_usage_mask[emit->clip_dist_out_index+1],
+ TRUE,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
}
}
+ if (emit->index_range.start_index != INVALID_INDEX) {
+ emit_index_range_declaration(emit);
+ }
+ emit->index_range.required = FALSE;
return TRUE;
}
/**
+ * A helper function to create a temporary indexable array
+ * and initialize the corresponding entries in the temp_map array.
+ */
+static void
+create_temp_array(struct svga_shader_emitter_v10 *emit,
+ unsigned arrayID, unsigned first, unsigned count,
+ unsigned startIndex)
+{
+ unsigned i, tempIndex = startIndex;
+
+ emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
+ assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
+ emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
+
+ emit->temp_arrays[arrayID].start = first;
+ emit->temp_arrays[arrayID].size = count;
+
+ /* Fill in the temp_map entries for this temp array */
+ for (i = 0; i < count; i++, tempIndex++) {
+ emit->temp_map[tempIndex].arrayId = arrayID;
+ emit->temp_map[tempIndex].index = i;
+ }
+}
+
+
+/**
* Emit the declaration for the temporary registers.
*/
static boolean
@@ -2844,18 +5153,7 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
*/
if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) &&
emit->num_temp_arrays == 0) {
- unsigned arrayID;
-
- arrayID = 1;
- emit->num_temp_arrays = arrayID + 1;
- emit->temp_arrays[arrayID].start = 0;
- emit->temp_arrays[arrayID].size = total_temps;
-
- /* Fill in the temp_map entries for this temp array */
- for (i = 0; i < total_temps; i++) {
- emit->temp_map[i].arrayId = arrayID;
- emit->temp_map[i].index = i;
- }
+ create_temp_array(emit, 1, 0, total_temps, 0);
}
/* Allocate extra temps for specially-implemented instructions,
@@ -2863,6 +5161,29 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
*/
total_temps += MAX_INTERNAL_TEMPS;
+ /* Allocate extra temps for clip distance or clip vertex.
+ */
+ if (emit->clip_mode == CLIP_DISTANCE) {
+ /* We need to write the clip distance to a temporary register
+ * first. Then it will be copied to the shadow copy for
+ * the clip distance varying variable and stream output purpose.
+ * It will also be copied to the actual CLIPDIST register
+ * according to the enabled clip planes
+ */
+ emit->clip_dist_tmp_index = total_temps++;
+ if (emit->info.num_written_clipdistance > 4)
+ total_temps++; /* second clip register */
+ }
+ else if (emit->clip_mode == CLIP_VERTEX && emit->key.last_vertex_stage) {
+ /* If the current shader is in the last vertex processing stage,
+ * We need to convert the TGSI CLIPVERTEX output to one or more
+ * clip distances. Allocate a temp reg for the clipvertex here.
+ */
+ assert(emit->info.writes_clipvertex > 0);
+ emit->clip_vertex_tmp_index = total_temps;
+ total_temps++;
+ }
+
if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
emit->key.clip_plane_enable ||
@@ -2871,6 +5192,11 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
total_temps += 1;
}
+ if (emit->vposition.need_prescale) {
+ emit->vposition.prescale_scale_index = total_temps++;
+ emit->vposition.prescale_trans_index = total_temps++;
+ }
+
if (emit->unit == PIPE_SHADER_VERTEX) {
unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
emit->key.vs.adjust_attrib_itof |
@@ -2884,25 +5210,9 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
emit->vs.adjusted_input[index] = total_temps++;
}
}
-
- if (emit->clip_mode == CLIP_DISTANCE) {
- /* We need to write the clip distance to a temporary register
- * first. Then it will be copied to the shadow copy for
- * the clip distance varying variable and stream output purpose.
- * It will also be copied to the actual CLIPDIST register
- * according to the enabled clip planes
- */
- emit->clip_dist_tmp_index = total_temps++;
- if (emit->info.num_written_clipdistance > 4)
- total_temps++; /* second clip register */
- }
- else if (emit->clip_mode == CLIP_VERTEX) {
- /* We need to convert the TGSI CLIPVERTEX output to one or more
- * clip distances. Allocate a temp reg for the clipvertex here.
- */
- assert(emit->info.writes_clipvertex > 0);
- emit->clip_vertex_tmp_index = total_temps;
- total_temps++;
+ else if (emit->unit == PIPE_SHADER_GEOMETRY) {
+ if (emit->key.gs.writes_viewport_index)
+ emit->gs.viewport_index_tmp_index = total_temps++;
}
}
else if (emit->unit == PIPE_SHADER_FRAGMENT) {
@@ -2930,6 +5240,63 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
emit->fs.sample_pos_tmp_index = total_temps++;
}
}
+ else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
+ if (emit->vposition.need_prescale) {
+ emit->vposition.tmp_index = total_temps++;
+ emit->vposition.prescale_scale_index = total_temps++;
+ emit->vposition.prescale_trans_index = total_temps++;
+ }
+
+ if (emit->tes.inner.tgsi_index) {
+ emit->tes.inner.temp_index = total_temps;
+ total_temps += 1;
+ }
+
+ if (emit->tes.outer.tgsi_index) {
+ emit->tes.outer.temp_index = total_temps;
+ total_temps += 1;
+ }
+ }
+ else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
+ if (emit->tcs.inner.tgsi_index != INVALID_INDEX) {
+ if (!emit->tcs.control_point_phase) {
+ emit->tcs.inner.temp_index = total_temps;
+ total_temps += 1;
+ }
+ }
+ if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
+ if (!emit->tcs.control_point_phase) {
+ emit->tcs.outer.temp_index = total_temps;
+ total_temps += 1;
+ }
+ }
+
+ if (emit->tcs.control_point_phase &&
+ emit->info.reads_pervertex_outputs) {
+ emit->tcs.control_point_tmp_index = total_temps;
+ total_temps += emit->tcs.control_point_out_count;
+ }
+ else if (!emit->tcs.control_point_phase &&
+ emit->info.reads_perpatch_outputs) {
+
+ /* If there is indirect access to the patch constant outputs
+ * in the control point phase, then an indexable temporary array
+ * will be created for these patch constant outputs.
+ * Note, indirect access can only be applicable to
+ * patch constant outputs in the control point phase.
+ */
+ if (emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
+ unsigned arrayID =
+ emit->num_temp_arrays ? emit->num_temp_arrays : 1;
+ create_temp_array(emit, arrayID, 0,
+ emit->tcs.patch_generic_out_count, total_temps);
+ }
+ emit->tcs.patch_generic_tmp_index = total_temps;
+ total_temps += emit->tcs.patch_generic_out_count;
+ }
+
+ emit->tcs.invocation_id_tmp_index = total_temps++;
+ }
for (i = 0; i < emit->num_address_regs; i++) {
emit->address_reg_index[i] = total_temps++;
@@ -3065,8 +5432,8 @@ emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
/* Vertex position scale/translation */
if (emit->vposition.need_prescale) {
- emit->vposition.prescale_scale_index = total_consts++;
- emit->vposition.prescale_trans_index = total_consts++;
+ emit->vposition.prescale_cbuf_index = total_consts;
+ total_consts += (2 * emit->vposition.num_prescale);
}
if (emit->unit == PIPE_SHADER_VERTEX) {
@@ -3078,8 +5445,8 @@ emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
/* user-defined clip planes */
if (emit->key.clip_plane_enable) {
unsigned n = util_bitcount(emit->key.clip_plane_enable);
- assert(emit->unit == PIPE_SHADER_VERTEX ||
- emit->unit == PIPE_SHADER_GEOMETRY);
+ assert(emit->unit != PIPE_SHADER_FRAGMENT &&
+ emit->unit != PIPE_SHADER_COMPUTE);
for (i = 0; i < n; i++) {
emit->clip_plane_const[i] = total_consts++;
}
@@ -3309,34 +5676,48 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
return TRUE;
}
+/**
+ * Emit instruction with n=1, 2 or 3 source registers.
+ */
static void
-emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
- VGPU10_OPCODE_TYPE opcode,
+emit_instruction_opn(struct svga_shader_emitter_v10 *emit,
+ unsigned opcode,
const struct tgsi_full_dst_register *dst,
- const struct tgsi_full_src_register *src,
- boolean saturate)
+ const struct tgsi_full_src_register *src1,
+ const struct tgsi_full_src_register *src2,
+ const struct tgsi_full_src_register *src3,
+ boolean saturate, bool precise)
{
begin_emit_instruction(emit);
- emit_opcode(emit, opcode, saturate);
+ emit_opcode_precise(emit, opcode, saturate, precise);
emit_dst_register(emit, dst);
- emit_src_register(emit, src);
+ emit_src_register(emit, src1);
+ if (src2) {
+ emit_src_register(emit, src2);
+ }
+ if (src3) {
+ emit_src_register(emit, src3);
+ }
end_emit_instruction(emit);
}
static void
+emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
+ unsigned opcode,
+ const struct tgsi_full_dst_register *dst,
+ const struct tgsi_full_src_register *src)
+{
+ emit_instruction_opn(emit, opcode, dst, src, NULL, NULL, FALSE, FALSE);
+}
+
+static void
emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
VGPU10_OPCODE_TYPE opcode,
const struct tgsi_full_dst_register *dst,
const struct tgsi_full_src_register *src1,
- const struct tgsi_full_src_register *src2,
- boolean saturate)
+ const struct tgsi_full_src_register *src2)
{
- begin_emit_instruction(emit);
- emit_opcode(emit, opcode, saturate);
- emit_dst_register(emit, dst);
- emit_src_register(emit, src1);
- emit_src_register(emit, src2);
- end_emit_instruction(emit);
+ emit_instruction_opn(emit, opcode, dst, src1, src2, NULL, FALSE, FALSE);
}
static void
@@ -3345,19 +5726,115 @@ emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
const struct tgsi_full_dst_register *dst,
const struct tgsi_full_src_register *src1,
const struct tgsi_full_src_register *src2,
- const struct tgsi_full_src_register *src3,
- boolean saturate)
+ const struct tgsi_full_src_register *src3)
+{
+ emit_instruction_opn(emit, opcode, dst, src1, src2, src3, FALSE, FALSE);
+}
+
+static void
+emit_instruction_op0(struct svga_shader_emitter_v10 *emit,
+ VGPU10_OPCODE_TYPE opcode)
{
begin_emit_instruction(emit);
- emit_opcode(emit, opcode, saturate);
- emit_dst_register(emit, dst);
- emit_src_register(emit, src1);
- emit_src_register(emit, src2);
- emit_src_register(emit, src3);
+ emit_opcode(emit, opcode, FALSE);
end_emit_instruction(emit);
}
/**
+ * Tessellation inner/outer levels needs to be store into its
+ * appropriate registers depending on prim_mode.
+ */
+static void
+store_tesslevels(struct svga_shader_emitter_v10 *emit)
+{
+ int i;
+
+ /* tessellation levels are required input/out in hull shader.
+ * emitting the inner/outer tessellation levels, either from
+ * values provided in tcs or fallback default values which is 1.0
+ */
+ if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
+ struct tgsi_full_src_register temp_src;
+
+ if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
+ temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
+ else
+ temp_src = make_immediate_reg_float(emit, 1.0f);
+
+ for (i = 0; i < 2; i++) {
+ struct tgsi_full_src_register src =
+ scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
+ struct tgsi_full_dst_register dst =
+ make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index + i);
+ dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
+ }
+
+ if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
+ temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
+ else
+ temp_src = make_immediate_reg_float(emit, 1.0f);
+
+ for (i = 0; i < 4; i++) {
+ struct tgsi_full_src_register src =
+ scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
+ struct tgsi_full_dst_register dst =
+ make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
+ dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
+ }
+ }
+ else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
+ struct tgsi_full_src_register temp_src;
+
+ if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
+ temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
+ else
+ temp_src = make_immediate_reg_float(emit, 1.0f);
+
+ struct tgsi_full_src_register src =
+ scalar_src(&temp_src, TGSI_SWIZZLE_X);
+ struct tgsi_full_dst_register dst =
+ make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index);
+ dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
+
+ if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
+ temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
+ else
+ temp_src = make_immediate_reg_float(emit, 1.0f);
+
+ for (i = 0; i < 3; i++) {
+ struct tgsi_full_src_register src =
+ scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
+ struct tgsi_full_dst_register dst =
+ make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
+ dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
+ }
+ }
+ else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) {
+ if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
+ struct tgsi_full_src_register temp_src =
+ make_src_temp_reg(emit->tcs.outer.temp_index);
+ for (i = 0; i < 2; i++) {
+ struct tgsi_full_src_register src =
+ scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
+ struct tgsi_full_dst_register dst =
+ make_dst_reg(TGSI_FILE_OUTPUT,
+ emit->tcs.outer.out_index + i);
+ dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
+ }
+ }
+ }
+ else {
+ debug_printf("Unsupported primitive type");
+ }
+}
+
+
+/**
* Emit the actual clip distance instructions to be used for clipping
* by copying the clip distance from the temporary registers to the
* CLIPDIST registers written with the enabled planes mask.
@@ -3399,7 +5876,7 @@ emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
/* MOV clip_dist_so, tmp_clip_dist */
emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
- &tmp_clip_dist_src, FALSE);
+ &tmp_clip_dist_src);
/**
* copy those clip distances to enabled clipping planes
@@ -3412,7 +5889,7 @@ emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
/* MOV CLIPDIST, tmp_clip_dist */
emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
- &tmp_clip_dist_src, FALSE);
+ &tmp_clip_dist_src);
}
/* four clip planes per clip register */
clip_plane_enable >>= 4;
@@ -3434,8 +5911,7 @@ emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
unsigned index = emit->num_outputs;
unsigned plane_mask;
- assert(emit->unit == PIPE_SHADER_VERTEX ||
- emit->unit == PIPE_SHADER_GEOMETRY);
+ assert(emit->unit != PIPE_SHADER_FRAGMENT);
assert(num_clip_planes <= 8);
if (emit->clip_mode != CLIP_LEGACY &&
@@ -3446,6 +5922,10 @@ emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
if (num_clip_planes == 0)
return;
+ /* Convert clip vertex to clip distances only in the last vertex stage */
+ if (!emit->key.last_vertex_stage)
+ return;
+
/* Declare one or two clip output registers. The number of components
* in the mask reflects the number of clip planes. For example, if 5
* clip planes are needed, we'll declare outputs similar to:
@@ -3458,13 +5938,15 @@ emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
if (plane_mask & 0xf) {
unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
- VGPU10_NAME_CLIP_DISTANCE, cmask);
+ VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
emit->num_outputs++;
}
if (plane_mask & 0xf0) {
unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
- VGPU10_NAME_CLIP_DISTANCE, cmask);
+ VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE,
+ SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
emit->num_outputs++;
}
}
@@ -3488,7 +5970,8 @@ emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
assert(num_clip_planes <= 8);
assert(emit->unit == PIPE_SHADER_VERTEX ||
- emit->unit == PIPE_SHADER_GEOMETRY);
+ emit->unit == PIPE_SHADER_GEOMETRY ||
+ emit->unit == PIPE_SHADER_TESS_EVAL);
for (i = 0; i < num_clip_planes; i++) {
struct tgsi_full_dst_register dst;
@@ -3506,7 +5989,7 @@ emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
/* DP4 clip_dist, plane, vpos */
emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
- &plane_src, &vpos_src, FALSE);
+ &plane_src, &vpos_src);
}
}
@@ -3527,7 +6010,8 @@ emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
assert(emit->unit == PIPE_SHADER_VERTEX ||
- emit->unit == PIPE_SHADER_GEOMETRY);
+ emit->unit == PIPE_SHADER_GEOMETRY ||
+ emit->unit == PIPE_SHADER_TESS_EVAL);
assert(emit->clip_mode == CLIP_VERTEX);
@@ -3547,7 +6031,7 @@ emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
/* DP4 clip_dist, plane, vpos */
emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
- &plane_src, &clipvert_src, FALSE);
+ &plane_src, &clipvert_src);
}
/* copy temporary clip vertex register to the clip vertex register */
@@ -3564,7 +6048,7 @@ emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
/* MOV clip_vertex, clip_vertex_tmp */
dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
- &dst, &clipvert_src, FALSE);
+ &dst, &clipvert_src);
/**
* set the temporary clip vertex register index back to the
@@ -3613,20 +6097,18 @@ emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
/* val = src * 2.0 */
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst,
- src, &two, FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, src, &two);
/* bias = src > 0.5 */
- emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst,
- src, &half, FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, src, &half);
/* bias = bias & -2.0 */
emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
- &bias_src, &neg_two, FALSE);
+ &bias_src, &neg_two);
/* dst = val + bias */
emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
- &val_src, &bias_src, FALSE);
+ &val_src, &bias_src);
free_temp_indexes(emit);
}
@@ -3642,7 +6124,7 @@ emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
/* dst = src * scale */
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale);
}
@@ -3671,10 +6153,10 @@ emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
* dst = i_to_f(r,g,b,a); # convert to float
*/
emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
- &src_xxxx, &lshift, FALSE);
+ &src_xxxx, &lshift);
emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
- &tmp_src, &rshift, FALSE);
- emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE);
+ &tmp_src, &rshift);
+ emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src);
free_temp_indexes(emit);
}
@@ -3693,6 +6175,7 @@ emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
assert(index < MAX_VGPU10_ADDR_REGS);
dst = make_dst_temp_reg(emit->address_reg_index[index]);
+ dst = writemask_dst(&dst, inst->Dst[0].Register.WriteMask);
/* ARL dst, s0
* Translates into:
@@ -3707,7 +6190,7 @@ emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
else
opcode = VGPU10_OPCODE_MOV;
- emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE);
+ emit_instruction_op1(emit, opcode, &dst, &inst->Src[0]);
return TRUE;
}
@@ -3751,7 +6234,7 @@ emit_iabs(struct svga_shader_emitter_v10 *emit,
*/
struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
- &inst->Src[0], &neg_src, FALSE);
+ &inst->Src[0], &neg_src);
return TRUE;
}
@@ -3778,11 +6261,12 @@ emit_cmp(struct svga_shader_emitter_v10 *emit,
struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
- emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst,
- &inst->Src[0], &zero, FALSE);
- emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
+ emit_instruction_opn(emit, VGPU10_OPCODE_LT, &tmp_dst,
+ &inst->Src[0], &zero, NULL, FALSE,
+ inst->Instruction.Precise);
+ emit_instruction_opn(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
&tmp_src, &inst->Src[1], &inst->Src[2],
- inst->Instruction.Saturate);
+ inst->Instruction.Saturate, FALSE);
free_temp_indexes(emit);
@@ -3827,7 +6311,7 @@ emit_dst(struct svga_shader_emitter_v10 *emit,
writemask_dst(&move_dst, TGSI_WRITEMASK_X);
struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
}
/* MUL dst.y, s0.y, s1.y */
@@ -3835,8 +6319,9 @@ emit_dst(struct svga_shader_emitter_v10 *emit,
struct tgsi_full_dst_register dst_y =
writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
- &s1_yyyy, inst->Instruction.Saturate);
+ emit_instruction_opn(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
+ &s1_yyyy, NULL, inst->Instruction.Saturate,
+ inst->Instruction.Precise);
}
/* MOV dst.z, s0.z */
@@ -3844,8 +6329,10 @@ emit_dst(struct svga_shader_emitter_v10 *emit,
struct tgsi_full_dst_register dst_z =
writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz,
- inst->Instruction.Saturate);
+ emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
+ &dst_z, &s0_zzzz, NULL, NULL,
+ inst->Instruction.Saturate,
+ inst->Instruction.Precise);
}
/* MOV dst.w, s1.w */
@@ -3853,18 +6340,30 @@ emit_dst(struct svga_shader_emitter_v10 *emit,
struct tgsi_full_dst_register dst_w =
writemask_dst(&move_dst, TGSI_WRITEMASK_W);
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww,
- inst->Instruction.Saturate);
+ emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
+ &dst_w, &s1_wwww, NULL, NULL,
+ inst->Instruction.Saturate,
+ inst->Instruction.Precise);
}
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
- FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
free_temp_indexes(emit);
return TRUE;
}
+/**
+ * A helper function to return the stream index as specified in
+ * the immediate register
+ */
+static inline unsigned
+find_stream_index(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_src_register *src)
+{
+ return emit->immediates[src->Register.Index][src->Register.SwizzleX].Int;
+}
+
/**
* Emit code for TGSI_OPCODE_ENDPRIM (GS only)
@@ -3875,11 +6374,25 @@ emit_endprim(struct svga_shader_emitter_v10 *emit,
{
assert(emit->unit == PIPE_SHADER_GEOMETRY);
- /* We can't use emit_simple() because the TGSI instruction has one
- * operand (vertex stream number) which we must ignore for VGPU10.
- */
begin_emit_instruction(emit);
- emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE);
+ if (emit->version >= 50) {
+ unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
+
+ if (emit->info.num_stream_output_components[streamIndex] == 0) {
+ /**
+ * If there is no output for this stream, discard this instruction.
+ */
+ emit->discard_instruction = TRUE;
+ }
+ else {
+ emit_opcode(emit, VGPU10_OPCODE_CUT_STREAM, FALSE);
+ assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
+ emit_stream_register(emit, streamIndex);
+ }
+ }
+ else {
+ emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE);
+ }
end_emit_instruction(emit);
return TRUE;
}
@@ -3904,8 +6417,10 @@ emit_ex2(struct svga_shader_emitter_v10 *emit,
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
/* EXP tmp, s0.xxxx */
- emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
- inst->Instruction.Saturate);
+ emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
+ NULL, NULL,
+ inst->Instruction.Saturate,
+ inst->Instruction.Precise);
return TRUE;
}
@@ -3945,15 +6460,17 @@ emit_exp(struct svga_shader_emitter_v10 *emit,
/* ROUND_NI tmp.x, s0.x */
emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
- &src_xxxx, FALSE); /* round to -infinity */
+ &src_xxxx); /* round to -infinity */
/* EXP dst.x, tmp.x */
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
struct tgsi_full_dst_register dst_x =
writemask_dst(&move_dst, TGSI_WRITEMASK_X);
- emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
- inst->Instruction.Saturate);
+ emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
+ NULL, NULL,
+ inst->Instruction.Saturate,
+ inst->Instruction.Precise);
}
/* ADD dst.y, s0.x, -tmp */
@@ -3962,8 +6479,10 @@ emit_exp(struct svga_shader_emitter_v10 *emit,
writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
- emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
- &neg_tmp_src, inst->Instruction.Saturate);
+ emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
+ &neg_tmp_src, NULL,
+ inst->Instruction.Saturate,
+ inst->Instruction.Precise);
}
/* EXP dst.z, s0.x */
@@ -3971,8 +6490,10 @@ emit_exp(struct svga_shader_emitter_v10 *emit,
struct tgsi_full_dst_register dst_z =
writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
- emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
- inst->Instruction.Saturate);
+ emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
+ NULL, NULL,
+ inst->Instruction.Saturate,
+ inst->Instruction.Precise);
}
/* MOV dst.w, 1.0 */
@@ -3981,12 +6502,10 @@ emit_exp(struct svga_shader_emitter_v10 *emit,
writemask_dst(&move_dst, TGSI_WRITEMASK_W);
struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one,
- FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
}
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
- FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
free_temp_indexes(emit);
@@ -3999,14 +6518,14 @@ emit_exp(struct svga_shader_emitter_v10 *emit,
*/
static boolean
emit_if(struct svga_shader_emitter_v10 *emit,
- const struct tgsi_full_instruction *inst)
+ const struct tgsi_full_src_register *src)
{
VGPU10OpcodeToken0 opcode0;
/* The src register should be a scalar */
- assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY &&
- inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ &&
- inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW);
+ assert(src->Register.SwizzleX == src->Register.SwizzleY &&
+ src->Register.SwizzleX == src->Register.SwizzleZ &&
+ src->Register.SwizzleX == src->Register.SwizzleW);
/* The only special thing here is that we need to set the
* VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
@@ -4018,7 +6537,7 @@ emit_if(struct svga_shader_emitter_v10 *emit,
begin_emit_instruction(emit);
emit_dword(emit, opcode0.value);
- emit_src_register(emit, &inst->Src[0]);
+ emit_src_register(emit, src);
end_emit_instruction(emit);
return TRUE;
@@ -4045,8 +6564,7 @@ emit_kill_if(struct svga_shader_emitter_v10 *emit,
scalar_src(&tmp_src, TGSI_SWIZZLE_X);
/* tmp = src[0] < 0.0 */
- emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
- &zero, FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], &zero);
if (!same_swizzle_terms(&inst->Src[0])) {
/* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
@@ -4061,11 +6579,11 @@ emit_kill_if(struct svga_shader_emitter_v10 *emit,
scalar_src(&tmp_src, TGSI_SWIZZLE_W);
emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
- &tmp_src_yyyy, FALSE);
+ &tmp_src_yyyy);
emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
- &tmp_src_zzzz, FALSE);
+ &tmp_src_zzzz);
emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
- &tmp_src_wwww, FALSE);
+ &tmp_src_wwww);
}
begin_emit_instruction(emit);
@@ -4117,8 +6635,10 @@ emit_lg2(struct svga_shader_emitter_v10 *emit,
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
/* LOG tmp, s0.xxxx */
- emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx,
- inst->Instruction.Saturate);
+ emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
+ &inst->Dst[0], &src_xxxx, NULL, NULL,
+ inst->Instruction.Saturate,
+ inst->Instruction.Precise);
return TRUE;
}
@@ -4152,14 +6672,14 @@ emit_lit(struct svga_shader_emitter_v10 *emit,
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
struct tgsi_full_dst_register dst_x =
writemask_dst(&move_dst, TGSI_WRITEMASK_X);
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
}
/* MOV dst.w, 1.0 */
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
struct tgsi_full_dst_register dst_w =
writemask_dst(&move_dst, TGSI_WRITEMASK_W);
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
}
/* MAX dst.y, src.x, 0.0 */
@@ -4172,8 +6692,8 @@ emit_lit(struct svga_shader_emitter_v10 *emit,
swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
- emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
- &zero, inst->Instruction.Saturate);
+ emit_instruction_opn(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
+ &zero, NULL, inst->Instruction.Saturate, FALSE);
}
/*
@@ -4223,42 +6743,37 @@ emit_lit(struct svga_shader_emitter_v10 *emit,
make_immediate_reg_float(emit, 128.0f);
emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
- &lowerbound, FALSE);
+ &lowerbound);
emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
- &upperbound, FALSE);
+ &upperbound);
emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
- &zero, FALSE);
+ &zero);
/* POW tmp1, tmp2, tmp1 */
/* LOG tmp2, tmp2 */
- emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src,
- FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src);
/* MUL tmp1, tmp2, tmp1 */
emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
- &tmp1_src, FALSE);
+ &tmp1_src);
/* EXP tmp1, tmp1 */
- emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src,
- FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src);
/* EQ tmp2, 0, src.w */
- emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero,
- &src_wwww, FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, &src_wwww);
/* MOVC tmp1.z, tmp2, tmp1, 1.0 */
emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
- &tmp2_src, &one, &tmp1_src, FALSE);
+ &tmp2_src, &one, &tmp1_src);
/* LT tmp2, 0, src.x */
- emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero,
- &src_xxxx, FALSE);
+ emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, &src_xxxx);
/* MOVC dst.z, tmp2, tmp1, 0.0 */
emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
- &tmp2_src, &tmp1_src, &zero, FALSE);
+ &tmp2_src, &tmp1_src, &zero);
}
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
- FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
free_temp_indexes(emit);
return TRUE;
@@ -4316,8 +6831,7 @@ emit_log(struct svga_shader_emitter_v10 *emit,
/* LOG tmp.x, abs(s0.x) */
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
- emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst,
- &abs_src_xxxx, FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &abs_src_xxxx);
}
/* MOV dst.z, tmp.x */
@@ -4325,14 +6839,14 @@ emit_log(struct svga_shader_emitter_v10 *emit,
struct tgsi_full_dst_register dst_z =
writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z,
- &tmp_src, inst->Instruction.Saturate);
+ emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
+ &dst_z, &tmp_src, NULL, NULL,
+ inst->Instruction.Saturate, FALSE);
}
/* FLR tmp.x, tmp.x */
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
- emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
- &tmp_src, FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, &tmp_src);
}
/* MOV dst.x, tmp.x */
@@ -4340,8 +6854,9 @@ emit_log(struct svga_shader_emitter_v10 *emit,
struct tgsi_full_dst_register dst_x =
writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src,
- inst->Instruction.Saturate);
+ emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
+ &dst_x, &tmp_src, NULL, NULL,
+ inst->Instruction.Saturate, FALSE);
}
/* EXP tmp.x, tmp.x */
@@ -4350,10 +6865,9 @@ emit_log(struct svga_shader_emitter_v10 *emit,
struct tgsi_full_dst_register dst_y =
writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
- emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src,
- FALSE);
- emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
- &tmp_src, inst->Instruction.Saturate);
+ emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src);
+ emit_instruction_opn(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
+ &tmp_src, NULL, inst->Instruction.Saturate, FALSE);
}
/* MOV dst.w, 1.0 */
@@ -4363,7 +6877,7 @@ emit_log(struct svga_shader_emitter_v10 *emit,
struct tgsi_full_src_register one =
make_immediate_reg_float(emit, 1.0f);
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
}
free_temp_indexes(emit);
@@ -4391,13 +6905,15 @@ emit_lrp(struct svga_shader_emitter_v10 *emit,
struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
/* ADD tmp, s1, -s2 */
- emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp,
- &inst->Src[1], &neg_src2, FALSE);
+ emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_tmp,
+ &inst->Src[1], &neg_src2, NULL, FALSE,
+ inst->Instruction.Precise);
/* MAD dst, s1, tmp, s3 */
- emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
+ emit_instruction_opn(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
&inst->Src[0], &src_tmp, &inst->Src[2],
- inst->Instruction.Saturate);
+ inst->Instruction.Saturate,
+ inst->Instruction.Precise);
free_temp_indexes(emit);
@@ -4429,16 +6945,20 @@ emit_pow(struct svga_shader_emitter_v10 *emit,
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
/* LOG tmp, s0.xxxx */
- emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx,
- FALSE);
+ emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
+ &tmp_dst, &src0_xxxx, NULL, NULL,
+ FALSE, inst->Instruction.Precise);
/* MUL tmp, tmp, s1.xxxx */
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src,
- &src1_xxxx, FALSE);
+ emit_instruction_opn(emit, VGPU10_OPCODE_MUL,
+ &tmp_dst, &tmp_src, &src1_xxxx, NULL,
+ FALSE, inst->Instruction.Precise);
/* EXP tmp, s0.xxxx */
- emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0],
- &tmp_src, inst->Instruction.Saturate);
+ emit_instruction_opn(emit, VGPU10_OPCODE_EXP,
+ &inst->Dst[0], &tmp_src, NULL, NULL,
+ inst->Instruction.Saturate,
+ inst->Instruction.Precise);
/* free tmp */
free_temp_indexes(emit);
@@ -4454,26 +6974,49 @@ static boolean
emit_rcp(struct svga_shader_emitter_v10 *emit,
const struct tgsi_full_instruction *inst)
{
- struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+ if (emit->version >= 50) {
+ /* use new RCP instruction. But VGPU10_OPCODE_RCP is component-wise
+ * while TGSI_OPCODE_RCP computes dst.xyzw = 1.0 / src.xxxx so we need
+ * to manipulate the src register's swizzle.
+ */
+ struct tgsi_full_src_register src = inst->Src[0];
+ src.Register.SwizzleY =
+ src.Register.SwizzleZ =
+ src.Register.SwizzleW = src.Register.SwizzleX;
- unsigned tmp = get_temp_index(emit);
- struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
- struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+ begin_emit_instruction(emit);
+ emit_opcode_precise(emit, VGPU10_OPCODE_RCP,
+ inst->Instruction.Saturate,
+ inst->Instruction.Precise);
+ emit_dst_register(emit, &inst->Dst[0]);
+ emit_src_register(emit, &src);
+ end_emit_instruction(emit);
+ }
+ else {
+ struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
- struct tgsi_full_dst_register tmp_dst_x =
- writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
- struct tgsi_full_src_register tmp_src_xxxx =
- scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
- /* DIV tmp.x, 1.0, s0 */
- emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one,
- &inst->Src[0], FALSE);
+ struct tgsi_full_dst_register tmp_dst_x =
+ writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+ struct tgsi_full_src_register tmp_src_xxxx =
+ scalar_src(&tmp_src, TGSI_SWIZZLE_X);
- /* MOV dst, tmp.xxxx */
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
- &tmp_src_xxxx, inst->Instruction.Saturate);
+ /* DIV tmp.x, 1.0, s0 */
+ emit_instruction_opn(emit, VGPU10_OPCODE_DIV,
+ &tmp_dst_x, &one, &inst->Src[0], NULL,
+ FALSE, inst->Instruction.Precise);
- free_temp_indexes(emit);
+ /* MOV dst, tmp.xxxx */
+ emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
+ &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
+ inst->Instruction.Saturate,
+ inst->Instruction.Precise);
+
+ free_temp_indexes(emit);
+ }
return TRUE;
}
@@ -4503,12 +7046,15 @@ emit_rsq(struct svga_shader_emitter_v10 *emit,
scalar_src(&tmp_src, TGSI_SWIZZLE_X);
/* RSQ tmp, src.x */
- emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x,
- &inst->Src[0], FALSE);
+ emit_instruction_opn(emit, VGPU10_OPCODE_RSQ,
+ &tmp_dst_x, &inst->Src[0], NULL, NULL,
+ FALSE, inst->Instruction.Precise);
/* MOV dst, tmp.xxxx */
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
- &tmp_src_xxxx, inst->Instruction.Saturate);
+ emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
+ &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
+ inst->Instruction.Saturate,
+ inst->Instruction.Precise);
/* free tmp */
free_temp_indexes(emit);
@@ -4538,11 +7084,11 @@ emit_seq(struct svga_shader_emitter_v10 *emit,
/* EQ tmp, s0, s1 */
emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
- &inst->Src[1], FALSE);
+ &inst->Src[1]);
/* MOVC dst, tmp, one, zero */
emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
- &one, &zero, FALSE);
+ &one, &zero);
free_temp_indexes(emit);
@@ -4571,11 +7117,11 @@ emit_sge(struct svga_shader_emitter_v10 *emit,
/* GE tmp, s0, s1 */
emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
- &inst->Src[1], FALSE);
+ &inst->Src[1]);
/* MOVC dst, tmp, one, zero */
emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
- &one, &zero, FALSE);
+ &one, &zero);
free_temp_indexes(emit);
@@ -4604,11 +7150,11 @@ emit_sgt(struct svga_shader_emitter_v10 *emit,
/* LT tmp, s1, s0 */
emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
- &inst->Src[0], FALSE);
+ &inst->Src[0]);
/* MOVC dst, tmp, one, zero */
emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
- &one, &zero, FALSE);
+ &one, &zero);
free_temp_indexes(emit);
@@ -4648,8 +7194,10 @@ emit_sincos(struct svga_shader_emitter_v10 *emit,
emit_src_register(emit, &inst->Src[0]);
end_emit_instruction(emit);
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
- &tmp_src_xxxx, inst->Instruction.Saturate);
+ emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
+ &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
+ inst->Instruction.Saturate,
+ inst->Instruction.Precise);
free_temp_indexes(emit);
@@ -4678,11 +7226,11 @@ emit_sle(struct svga_shader_emitter_v10 *emit,
/* GE tmp, s1, s0 */
emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
- &inst->Src[0], FALSE);
+ &inst->Src[0]);
/* MOVC dst, tmp, one, zero */
emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
- &one, &zero, FALSE);
+ &one, &zero);
free_temp_indexes(emit);
@@ -4711,11 +7259,11 @@ emit_slt(struct svga_shader_emitter_v10 *emit,
/* LT tmp, s0, s1 */
emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
- &inst->Src[1], FALSE);
+ &inst->Src[1]);
/* MOVC dst, tmp, one, zero */
emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
- &one, &zero, FALSE);
+ &one, &zero);
free_temp_indexes(emit);
@@ -4744,11 +7292,11 @@ emit_sne(struct svga_shader_emitter_v10 *emit,
/* NE tmp, s0, s1 */
emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
- &inst->Src[1], FALSE);
+ &inst->Src[1]);
/* MOVC dst, tmp, one, zero */
emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
- &one, &zero, FALSE);
+ &one, &zero);
free_temp_indexes(emit);
@@ -4789,13 +7337,13 @@ emit_ssg(struct svga_shader_emitter_v10 *emit,
struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
- &zero, FALSE);
+ &zero);
emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
- &neg_one, &zero, FALSE);
+ &neg_one, &zero);
emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
- &inst->Src[0], FALSE);
+ &inst->Src[0]);
emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
- &one, &tmp2_src, FALSE);
+ &one, &tmp2_src);
free_temp_indexes(emit);
@@ -4832,11 +7380,11 @@ emit_issg(struct svga_shader_emitter_v10 *emit,
struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
- &inst->Src[0], &zero, FALSE);
+ &inst->Src[0], &zero);
emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
- &zero, &inst->Src[0], FALSE);
+ &zero, &inst->Src[0]);
emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
- &tmp1_src, &neg_tmp2, FALSE);
+ &tmp1_src, &neg_tmp2);
free_temp_indexes(emit);
@@ -4982,15 +7530,15 @@ setup_texcoord(struct svga_shader_emitter_v10 *emit,
/* ADD tmp, coord, offset */
emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst,
- coord, &offset, FALSE);
+ coord, &offset);
/* MUL tmp, tmp, scale */
emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
- &tmp_src, &scale_src, FALSE);
+ &tmp_src, &scale_src);
}
else {
/* MUL tmp, coord, const[] */
emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
- coord, &scale_src, FALSE);
+ coord, &scale_src);
}
return tmp_src;
}
@@ -5118,8 +7666,6 @@ end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
/* convert gallium comparison func to SVGA comparison func */
SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
- assert(emit->unit == PIPE_SHADER_FRAGMENT);
-
int component =
tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4;
assert(component >= 0);
@@ -5161,7 +7707,7 @@ end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
/* MOV dst, color(tmp).<swizzle> */
emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
- swz->inst_dst, &src_swizzled, FALSE);
+ swz->inst_dst, &src_swizzled);
/* handle swizzle zero terms */
writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) |
@@ -5178,8 +7724,7 @@ end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
writemask_dst(swz->inst_dst, writemask_0);
/* MOV dst.writemask_0, {0,0,0,0} */
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
- &dst, &zero, FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &zero);
}
/* handle swizzle one terms */
@@ -5197,7 +7742,7 @@ end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
writemask_dst(swz->inst_dst, writemask_1);
/* MOV dst.writemask_1, {1,1,1,1} */
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one);
}
}
}
@@ -5339,45 +7884,160 @@ emit_tg4(struct svga_shader_emitter_v10 *emit,
{
const uint unit = inst->Src[2].Register.Index;
struct tgsi_full_src_register src;
+ struct tgsi_full_src_register offset_src, sampler, ref;
int offsets[3];
/* check that the sampler returns a float */
if (!is_valid_tex_instruction(emit, inst))
return TRUE;
- /* Only a single channel is supported in SM4_1 and we report
- * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1.
- * Only the 0th component will be gathered.
- */
- switch (emit->key.tex[unit].swizzle_r) {
- case PIPE_SWIZZLE_X:
- get_texel_offsets(emit, inst, offsets);
+ if (emit->version >= 50) {
+ unsigned target = inst->Texture.Texture;
+ int index = inst->Src[1].Register.Index;
+ const union tgsi_immediate_data *imm = emit->immediates[index];
+ int select_comp = imm[inst->Src[1].Register.SwizzleX].Int;
+ unsigned select_swizzle = PIPE_SWIZZLE_X;
+
+ if (!tgsi_is_shadow_target(target)) {
+ switch (select_comp) {
+ case 0:
+ select_swizzle = emit->key.tex[unit].swizzle_r;
+ break;
+ case 1:
+ select_swizzle = emit->key.tex[unit].swizzle_g;
+ break;
+ case 2:
+ select_swizzle = emit->key.tex[unit].swizzle_b;
+ break;
+ case 3:
+ select_swizzle = emit->key.tex[unit].swizzle_a;
+ break;
+ default:
+ assert(!"Unexpected component in texture gather swizzle");
+ }
+ }
+ else {
+ select_swizzle = emit->key.tex[unit].swizzle_r;
+ }
+
+ if (select_swizzle == PIPE_SWIZZLE_1) {
+ src = make_immediate_reg_float(emit, 1.0);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
+ return TRUE;
+ }
+ else if (select_swizzle == PIPE_SWIZZLE_0) {
+ src = make_immediate_reg_float(emit, 0.0);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
+ return TRUE;
+ }
+
src = setup_texcoord(emit, unit, &inst->Src[0]);
- /* Gather dst, coord, resource, sampler */
+ /* GATHER4 dst, coord, resource, sampler */
+ /* GATHER4_C dst, coord, resource, sampler ref */
+ /* GATHER4_PO dst, coord, offset resource, sampler */
+ /* GATHER4_PO_C dst, coord, offset resource, sampler, ref */
begin_emit_instruction(emit);
- emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4,
- inst->Instruction.Saturate, offsets);
+ if (inst->Texture.NumOffsets == 1) {
+ if (tgsi_is_shadow_target(target)) {
+ emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO_C,
+ inst->Instruction.Saturate);
+ }
+ else {
+ emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO,
+ inst->Instruction.Saturate);
+ }
+ }
+ else {
+ if (tgsi_is_shadow_target(target)) {
+ emit_opcode(emit, VGPU10_OPCODE_GATHER4_C,
+ inst->Instruction.Saturate);
+ }
+ else {
+ emit_opcode(emit, VGPU10_OPCODE_GATHER4,
+ inst->Instruction.Saturate);
+ }
+ }
+
emit_dst_register(emit, &inst->Dst[0]);
emit_src_register(emit, &src);
+ if (inst->Texture.NumOffsets == 1) {
+ /* offset */
+ offset_src = make_src_reg(inst->TexOffsets[0].File,
+ inst->TexOffsets[0].Index);
+ offset_src = swizzle_src(&offset_src, inst->TexOffsets[0].SwizzleX,
+ inst->TexOffsets[0].SwizzleY,
+ inst->TexOffsets[0].SwizzleZ,
+ TGSI_SWIZZLE_W);
+ emit_src_register(emit, &offset_src);
+ }
+
+ /* resource */
emit_resource_register(emit, unit);
- emit_sampler_register(emit, unit);
+
+ /* sampler */
+ sampler = make_src_reg(TGSI_FILE_SAMPLER, unit);
+ sampler.Register.SwizzleX =
+ sampler.Register.SwizzleY =
+ sampler.Register.SwizzleZ =
+ sampler.Register.SwizzleW = select_swizzle;
+ emit_src_register(emit, &sampler);
+
+ if (tgsi_is_shadow_target(target)) {
+ /* ref */
+ if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
+ ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
+ emit_tex_compare_refcoord(emit, target, &ref);
+ }
+ else {
+ emit_tex_compare_refcoord(emit, target, &src);
+ }
+ }
+
end_emit_instruction(emit);
- break;
- case PIPE_SWIZZLE_W:
- case PIPE_SWIZZLE_1:
- src = make_immediate_reg_float(emit, 1.0);
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
- &inst->Dst[0], &src, FALSE);
- break;
- case PIPE_SWIZZLE_Y:
- case PIPE_SWIZZLE_Z:
- case PIPE_SWIZZLE_0:
- default:
- src = make_immediate_reg_float(emit, 0.0);
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
- &inst->Dst[0], &src, FALSE);
- break;
+ free_temp_indexes(emit);
+ }
+ else {
+ /* Only a single channel is supported in SM4_1 and we report
+ * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1.
+ * Only the 0th component will be gathered.
+ */
+ switch (emit->key.tex[unit].swizzle_r) {
+ case PIPE_SWIZZLE_X:
+ get_texel_offsets(emit, inst, offsets);
+ src = setup_texcoord(emit, unit, &inst->Src[0]);
+
+ /* Gather dst, coord, resource, sampler */
+ begin_emit_instruction(emit);
+ emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4,
+ inst->Instruction.Saturate, offsets);
+ emit_dst_register(emit, &inst->Dst[0]);
+ emit_src_register(emit, &src);
+ emit_resource_register(emit, unit);
+
+ /* sampler */
+ sampler = make_src_reg(TGSI_FILE_SAMPLER, unit);
+ sampler.Register.SwizzleX =
+ sampler.Register.SwizzleY =
+ sampler.Register.SwizzleZ =
+ sampler.Register.SwizzleW = PIPE_SWIZZLE_X;
+ emit_src_register(emit, &sampler);
+
+ end_emit_instruction(emit);
+ break;
+ case PIPE_SWIZZLE_W:
+ case PIPE_SWIZZLE_1:
+ src = make_immediate_reg_float(emit, 1.0);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
+ break;
+ case PIPE_SWIZZLE_Y:
+ case PIPE_SWIZZLE_Z:
+ case PIPE_SWIZZLE_0:
+ default:
+ src = make_immediate_reg_float(emit, 0.0);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
+ break;
+ }
}
return TRUE;
@@ -5459,7 +8119,7 @@ emit_txp(struct svga_shader_emitter_v10 *emit,
/* DIV tmp, coord, coord.wwww */
emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
- &coord, &src0_wwww, FALSE);
+ &coord, &src0_wwww);
/* SAMPLE dst, coord(tmp), resource, sampler */
begin_emit_instruction(emit);
@@ -5703,8 +8363,7 @@ emit_txq(struct svga_shader_emitter_v10 *emit,
*/
struct tgsi_full_src_register size_src =
make_src_const_reg(emit->texture_buffer_size_index[unit]);
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src,
- FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src);
} else {
/* RESINFO dst, srcMipLevel, resource */
begin_emit_instruction(emit);
@@ -5722,6 +8381,319 @@ emit_txq(struct svga_shader_emitter_v10 *emit,
/**
+ * Does this opcode produce a double-precision result?
+ * XXX perhaps move this to a TGSI utility.
+ */
+static bool
+opcode_has_dbl_dst(unsigned opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_F2D:
+ case TGSI_OPCODE_DABS:
+ case TGSI_OPCODE_DADD:
+ case TGSI_OPCODE_DFRAC:
+ case TGSI_OPCODE_DMAX:
+ case TGSI_OPCODE_DMIN:
+ case TGSI_OPCODE_DMUL:
+ case TGSI_OPCODE_DNEG:
+ case TGSI_OPCODE_I2D:
+ case TGSI_OPCODE_U2D:
+ // XXX more TBD
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+/**
+ * Does this opcode use double-precision source registers?
+ */
+static bool
+opcode_has_dbl_src(unsigned opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_D2F:
+ case TGSI_OPCODE_DABS:
+ case TGSI_OPCODE_DADD:
+ case TGSI_OPCODE_DFRAC:
+ case TGSI_OPCODE_DMAX:
+ case TGSI_OPCODE_DMIN:
+ case TGSI_OPCODE_DMUL:
+ case TGSI_OPCODE_DNEG:
+ case TGSI_OPCODE_D2I:
+ case TGSI_OPCODE_D2U:
+ // XXX more TBD
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+/**
+ * Check that the swizzle for reading from a double-precision register
+ * is valid.
+ */
+static void
+check_double_src_swizzle(const struct tgsi_full_src_register *reg)
+{
+ assert((reg->Register.SwizzleX == PIPE_SWIZZLE_X &&
+ reg->Register.SwizzleY == PIPE_SWIZZLE_Y) ||
+ (reg->Register.SwizzleX == PIPE_SWIZZLE_Z &&
+ reg->Register.SwizzleY == PIPE_SWIZZLE_W));
+
+ assert((reg->Register.SwizzleZ == PIPE_SWIZZLE_X &&
+ reg->Register.SwizzleW == PIPE_SWIZZLE_Y) ||
+ (reg->Register.SwizzleZ == PIPE_SWIZZLE_Z &&
+ reg->Register.SwizzleW == PIPE_SWIZZLE_W));
+}
+
+
+/**
+ * Check that the writemask for a double-precision instruction is valid.
+ */
+static void
+check_double_dst_writemask(const struct tgsi_full_instruction *inst)
+{
+ ASSERTED unsigned writemask = inst->Dst[0].Register.WriteMask;
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_DABS:
+ case TGSI_OPCODE_DADD:
+ case TGSI_OPCODE_DFRAC:
+ case TGSI_OPCODE_DNEG:
+ case TGSI_OPCODE_DMAD:
+ case TGSI_OPCODE_DMAX:
+ case TGSI_OPCODE_DMIN:
+ case TGSI_OPCODE_DMUL:
+ case TGSI_OPCODE_DRCP:
+ case TGSI_OPCODE_DSQRT:
+ case TGSI_OPCODE_F2D:
+ assert(writemask == TGSI_WRITEMASK_XYZW ||
+ writemask == TGSI_WRITEMASK_XY ||
+ writemask == TGSI_WRITEMASK_ZW);
+ break;
+ case TGSI_OPCODE_DSEQ:
+ case TGSI_OPCODE_DSGE:
+ case TGSI_OPCODE_DSNE:
+ case TGSI_OPCODE_DSLT:
+ case TGSI_OPCODE_D2I:
+ case TGSI_OPCODE_D2U:
+ /* Write to 1 or 2 components only */
+ assert(util_bitcount(writemask) <= 2);
+ break;
+ default:
+ /* XXX this list may be incomplete */
+ ;
+ }
+}
+
+
+/**
+ * Double-precision absolute value.
+ */
+static boolean
+emit_dabs(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ assert(emit->version >= 50);
+ check_double_src_swizzle(&inst->Src[0]);
+ check_double_dst_writemask(inst);
+
+ struct tgsi_full_src_register abs_src = absolute_src(&inst->Src[0]);
+
+ /* DMOV dst, |src| */
+ emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &abs_src);
+
+ return TRUE;
+}
+
+
+/**
+ * Double-precision negation
+ */
+static boolean
+emit_dneg(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ assert(emit->version >= 50);
+ check_double_src_swizzle(&inst->Src[0]);
+ check_double_dst_writemask(inst);
+
+ struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
+
+ /* DMOV dst, -src */
+ emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &neg_src);
+
+ return TRUE;
+}
+
+
+/**
+ * SM5 has no DMAD opcode. Implement negation with DMUL/DADD.
+ */
+static boolean
+emit_dmad(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ assert(emit->version >= 50);
+ check_double_src_swizzle(&inst->Src[0]);
+ check_double_src_swizzle(&inst->Src[1]);
+ check_double_src_swizzle(&inst->Src[2]);
+ check_double_dst_writemask(inst);
+
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+ /* DMUL tmp, src[0], src[1] */
+ emit_instruction_opn(emit, VGPU10_OPCODE_DMUL,
+ &tmp_dst, &inst->Src[0], &inst->Src[1], NULL,
+ FALSE, inst->Instruction.Precise);
+
+ /* DADD dst, tmp, src[2] */
+ emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
+ &inst->Dst[0], &tmp_src, &inst->Src[2], NULL,
+ inst->Instruction.Saturate, inst->Instruction.Precise);
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * Double precision reciprocal square root
+ */
+static boolean
+emit_drsq(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_dst_register *dst,
+ const struct tgsi_full_src_register *src)
+{
+ assert(emit->version >= 50);
+
+ VGPU10OpcodeToken0 token0;
+ begin_emit_instruction(emit);
+
+ token0.value = 0;
+ token0.opcodeType = VGPU10_OPCODE_VMWARE;
+ token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DRSQ;
+ emit_dword(emit, token0.value);
+
+ emit_dst_register(emit, dst);
+
+ check_double_src_swizzle(src);
+ emit_src_register(emit, src);
+
+ end_emit_instruction(emit);
+
+ return TRUE;
+}
+
+
+/**
+ * There is no SM5 opcode for double precision square root.
+ * It will be implemented with DRSQ.
+ * dst = src * DRSQ(src)
+ */
+static boolean
+emit_dsqrt(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ assert(emit->version >= 50);
+
+ check_double_src_swizzle(&inst->Src[0]);
+
+ /* temporary register to hold the source */
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+
+ /* temporary register to hold the DEQ result */
+ unsigned tmp_cond = get_temp_index(emit);
+ struct tgsi_full_dst_register tmp_cond_dst = make_dst_temp_reg(tmp_cond);
+ struct tgsi_full_dst_register tmp_cond_dst_xy =
+ writemask_dst(&tmp_cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
+ struct tgsi_full_src_register tmp_cond_src = make_src_temp_reg(tmp_cond);
+ struct tgsi_full_src_register tmp_cond_src_xy =
+ swizzle_src(&tmp_cond_src,
+ PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
+ PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
+
+ /* The reciprocal square root of zero yields INF.
+ * So if the source is 0, we replace it with 1 in the tmp register.
+ * The later multiplication of zero in the original source will yield 0
+ * in the result.
+ */
+
+ /* tmp1 = (src == 0) ? 1 : src;
+ * EQ tmp1, 0, src
+ * MOVC tmp, tmp1, 1.0, src
+ */
+ struct tgsi_full_src_register zero =
+ make_immediate_reg_double(emit, 0);
+
+ struct tgsi_full_src_register one =
+ make_immediate_reg_double(emit, 1.0);
+
+ emit_instruction_op2(emit, VGPU10_OPCODE_DEQ, &tmp_cond_dst_xy,
+ &zero, &inst->Src[0]);
+ emit_instruction_op3(emit, VGPU10_OPCODE_DMOVC, &tmp_dst,
+ &tmp_cond_src_xy, &one, &inst->Src[0]);
+
+ struct tgsi_full_dst_register tmp_rsq_dst = make_dst_temp_reg(tmp);
+ struct tgsi_full_src_register tmp_rsq_src = make_src_temp_reg(tmp);
+
+ /* DRSQ tmp_rsq, tmp */
+ emit_drsq(emit, &tmp_rsq_dst, &tmp_src);
+
+ /* DMUL dst, tmp_rsq, src[0] */
+ emit_instruction_op2(emit, VGPU10_OPCODE_DMUL, &inst->Dst[0],
+ &tmp_rsq_src, &inst->Src[0]);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_interp_offset(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ assert(emit->version >= 50);
+
+ /* The src1.xy offset is a float with values in the range [-0.5, 0.5]
+ * where (0,0) is the center of the pixel. We need to translate that
+ * into an integer offset on a 16x16 grid in the range [-8/16, 7/16].
+ * Also need to flip the Y axis (I think).
+ */
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+ struct tgsi_full_dst_register tmp_dst_xy =
+ writemask_dst(&tmp_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
+ struct tgsi_full_src_register const16 =
+ make_immediate_reg_float4(emit, 16.0f, -16.0, 0, 0);
+
+ /* MUL tmp.xy, src1, {16, -16, 0, 0} */
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL,
+ &tmp_dst_xy, &inst->Src[1], &const16);
+
+ /* FTOI tmp.xy, tmp */
+ emit_instruction_op1(emit, VGPU10_OPCODE_FTOI, &tmp_dst_xy, &tmp_src);
+
+ /* EVAL_SNAPPED dst, src0, tmp */
+ emit_instruction_op2(emit, VGPU10_OPCODE_EVAL_SNAPPED,
+ &inst->Dst[0], &inst->Src[0], &tmp_src);
+
+ free_temp_indexes(emit);
+
+ return TRUE;
+}
+
+
+/**
* Emit a simple instruction (like ADD, MUL, MIN, etc).
*/
static boolean
@@ -5730,14 +8702,31 @@ emit_simple(struct svga_shader_emitter_v10 *emit,
{
const enum tgsi_opcode opcode = inst->Instruction.Opcode;
const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
+ const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
+ const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
unsigned i;
+ if (inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) {
+ emit->current_loop_depth++;
+ }
+ else if (inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) {
+ emit->current_loop_depth--;
+ }
+
begin_emit_instruction(emit);
- emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
+ emit_opcode_precise(emit, translate_opcode(inst->Instruction.Opcode),
+ inst->Instruction.Saturate,
+ inst->Instruction.Precise);
for (i = 0; i < op->num_dst; i++) {
+ if (dbl_dst) {
+ check_double_dst_writemask(inst);
+ }
emit_dst_register(emit, &inst->Dst[i]);
}
for (i = 0; i < op->num_src; i++) {
+ if (dbl_src) {
+ check_double_src_swizzle(&inst->Src[i]);
+ }
emit_src_register(emit, &inst->Src[i]);
}
end_emit_instruction(emit);
@@ -5747,6 +8736,222 @@ emit_simple(struct svga_shader_emitter_v10 *emit,
/**
+ * Emit MSB instruction (like IMSB, UMSB).
+ *
+ * GLSL returns the index starting from the LSB;
+ * whereas in SM5, firstbit_hi/shi returns the index starting from the MSB.
+ * To get correct location as per glsl from SM5 device, we should
+ * return (31 - index) if returned index is not -1.
+ */
+static boolean
+emit_msb(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ const struct tgsi_full_dst_register *index_dst = &inst->Dst[0];
+
+ assert(index_dst->Register.File != TGSI_FILE_OUTPUT);
+
+ struct tgsi_full_src_register index_src =
+ make_src_reg(index_dst->Register.File, index_dst->Register.Index);
+ struct tgsi_full_src_register imm31 =
+ make_immediate_reg_int(emit, 31);
+ imm31 = scalar_src(&imm31, TGSI_SWIZZLE_X);
+ struct tgsi_full_src_register neg_one =
+ make_immediate_reg_int(emit, -1);
+ neg_one = scalar_src(&neg_one, TGSI_SWIZZLE_X);
+ unsigned tmp = get_temp_index(emit);
+ const struct tgsi_full_dst_register tmp_dst =
+ make_dst_temp_reg(tmp);
+ const struct tgsi_full_dst_register tmp_dst_x =
+ writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
+ const struct tgsi_full_src_register tmp_src_x =
+ make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp, TGSI_SWIZZLE_X);
+ int writemask = TGSI_WRITEMASK_X;
+ int src_swizzle = TGSI_SWIZZLE_X;
+ int dst_writemask = index_dst->Register.WriteMask;
+
+ emit_simple(emit, inst);
+
+ /* index conversion from SM5 to GLSL */
+ while (writemask & dst_writemask) {
+ struct tgsi_full_src_register index_src_comp =
+ scalar_src(&index_src, src_swizzle);
+ struct tgsi_full_dst_register index_dst_comp =
+ writemask_dst(index_dst, writemask);
+
+ /* check if index_src_comp != -1 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_INE,
+ &tmp_dst_x, &index_src_comp, &neg_one);
+
+ /* if */
+ emit_if(emit, &tmp_src_x);
+
+ index_src_comp = negate_src(&index_src_comp);
+ /* SUB DST, IMM{31}, DST */
+ emit_instruction_op2(emit, VGPU10_OPCODE_IADD,
+ &index_dst_comp, &imm31, &index_src_comp);
+
+ /* endif */
+ emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
+
+ writemask = writemask << 1;
+ src_swizzle = src_swizzle + 1;
+ }
+ free_temp_indexes(emit);
+ return TRUE;
+}
+
+
+/**
+ * Emit a BFE instruction (like UBFE, IBFE).
+ * tgsi representation:
+ * U/IBFE dst, value, offset, width
+ * SM5 representation:
+ * U/IBFE dst, width, offset, value
+ * Note: SM5 has width & offset range (0-31);
+ * whereas GLSL has width & offset range (0-32)
+ */
+static boolean
+emit_bfe(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ const enum tgsi_opcode opcode = inst->Instruction.Opcode;
+ struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
+ imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
+ struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
+ zero = scalar_src(&zero, TGSI_SWIZZLE_X);
+
+ unsigned tmp1 = get_temp_index(emit);
+ const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
+ const struct tgsi_full_dst_register cond1_dst_x =
+ writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
+ const struct tgsi_full_src_register cond1_src_x =
+ make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
+
+ unsigned tmp2 = get_temp_index(emit);
+ const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
+ const struct tgsi_full_dst_register cond2_dst_x =
+ writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
+ const struct tgsi_full_src_register cond2_src_x =
+ make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
+
+ /**
+ * In SM5, when width = 32 and offset = 0, it returns 0.
+ * On the other hand GLSL, expects value to be copied as it is, to dst.
+ */
+
+ /* cond1 = width ! = 32 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
+ &cond1_dst_x, &inst->Src[2], &imm32);
+
+ /* cond2 = offset ! = 0 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
+ &cond2_dst_x, &inst->Src[1], &zero);
+
+ /* cond 2 = cond1 & cond 2 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_AND, &cond2_dst_x,
+ &cond2_src_x,
+ &cond1_src_x);
+ /* IF */
+ emit_if(emit, &cond2_src_x);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+ &inst->Src[0]);
+
+ /* ELSE */
+ emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
+
+ /* U/IBFE dst, width, offset, value */
+ emit_instruction_op3(emit, translate_opcode(opcode), &inst->Dst[0],
+ &inst->Src[2], &inst->Src[1], &inst->Src[0]);
+
+ /* ENDIF */
+ emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
+
+ free_temp_indexes(emit);
+ return TRUE;
+}
+
+
+/**
+ * Emit BFI instruction
+ * tgsi representation:
+ * BFI dst, base, insert, offset, width
+ * SM5 representation:
+ * BFI dst, width, offset, insert, base
+ * Note: SM5 has width & offset range (0-31);
+ * whereas GLSL has width & offset range (0-32)
+ */
+static boolean
+emit_bfi(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst)
+{
+ const enum tgsi_opcode opcode = inst->Instruction.Opcode;
+ struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
+ imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
+
+ struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
+ zero = scalar_src(&zero, TGSI_SWIZZLE_X);
+
+ unsigned tmp1 = get_temp_index(emit);
+ const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
+ const struct tgsi_full_dst_register cond1_dst_x =
+ writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
+ const struct tgsi_full_src_register cond1_src_x =
+ make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
+
+ unsigned tmp2 = get_temp_index(emit);
+ const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
+ const struct tgsi_full_dst_register cond2_dst_x =
+ writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
+ const struct tgsi_full_src_register cond2_src_x =
+ make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
+
+ /**
+ * In SM5, when width = 32 and offset = 0, it returns 0.
+ * On the other hand GLSL, expects insert to be copied as it is, to dst.
+ */
+
+ /* cond1 = width == 32 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
+ &cond1_dst_x, &inst->Src[3], &imm32);
+
+ /* cond1 = offset == 0 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
+ &cond2_dst_x, &inst->Src[2], &zero);
+
+ /* cond2 = cond1 & cond2 */
+ emit_instruction_op2(emit, VGPU10_OPCODE_AND,
+ &cond2_dst_x, &cond2_src_x, &cond1_src_x);
+
+ /* if */
+ emit_if(emit, &cond2_src_x);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
+ &inst->Src[1]);
+
+ /* else */
+ emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
+
+ /* BFI dst, width, offset, insert, base */
+ begin_emit_instruction(emit);
+ emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
+ emit_dst_register(emit, &inst->Dst[0]);
+ emit_src_register(emit, &inst->Src[3]);
+ emit_src_register(emit, &inst->Src[2]);
+ emit_src_register(emit, &inst->Src[1]);
+ emit_src_register(emit, &inst->Src[0]);
+ end_emit_instruction(emit);
+
+ /* endif */
+ emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
+
+ free_temp_indexes(emit);
+ return TRUE;
+}
+
+
+/**
* We only special case the MOV instruction to try to detect constant
* color writes in the fragment shader.
*/
@@ -5804,6 +9009,56 @@ emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
/**
+ * Emit a vmware specific VGPU10 instruction.
+ */
+static boolean
+emit_vmware(struct svga_shader_emitter_v10 *emit,
+ const struct tgsi_full_instruction *inst,
+ VGPU10_VMWARE_OPCODE_TYPE subopcode)
+{
+ VGPU10OpcodeToken0 token0;
+ const enum tgsi_opcode opcode = inst->Instruction.Opcode;
+ const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
+ const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
+ const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
+
+ unsigned i;
+
+ begin_emit_instruction(emit);
+
+ assert((subopcode > 0 && emit->version >= 50) || subopcode == 0);
+
+ token0.value = 0;
+ token0.opcodeType = VGPU10_OPCODE_VMWARE;
+ token0.vmwareOpcodeType = subopcode;
+ emit_dword(emit, token0.value);
+
+ if (subopcode == VGPU10_VMWARE_OPCODE_IDIV) {
+ /* IDIV only uses the first dest register. */
+ emit_dst_register(emit, &inst->Dst[0]);
+ emit_null_dst_register(emit);
+ } else {
+ for (i = 0; i < op->num_dst; i++) {
+ if (dbl_dst) {
+ check_double_dst_writemask(inst);
+ }
+ emit_dst_register(emit, &inst->Dst[i]);
+ }
+ }
+
+ for (i = 0; i < op->num_src; i++) {
+ if (dbl_src) {
+ check_double_src_swizzle(&inst->Src[i]);
+ }
+ emit_src_register(emit, &inst->Src[i]);
+ }
+ end_emit_instruction(emit);
+
+ return TRUE;
+}
+
+
+/**
* Translate a single TGSI instruction to VGPU10.
*/
static boolean
@@ -5813,6 +9068,9 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
{
const enum tgsi_opcode opcode = inst->Instruction.Opcode;
+ if (emit->skip_instruction)
+ return TRUE;
+
switch (opcode) {
case TGSI_OPCODE_ADD:
case TGSI_OPCODE_AND:
@@ -5852,7 +9110,6 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
case TGSI_OPCODE_NOP:
case TGSI_OPCODE_NOT:
case TGSI_OPCODE_OR:
- case TGSI_OPCODE_RET:
case TGSI_OPCODE_UADD:
case TGSI_OPCODE_USEQ:
case TGSI_OPCODE_USGE:
@@ -5869,9 +9126,41 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
case TGSI_OPCODE_USHR:
case TGSI_OPCODE_USNE:
case TGSI_OPCODE_XOR:
+ /* Begin SM5 opcodes */
+ case TGSI_OPCODE_F2D:
+ case TGSI_OPCODE_D2F:
+ case TGSI_OPCODE_DADD:
+ case TGSI_OPCODE_DMUL:
+ case TGSI_OPCODE_DMAX:
+ case TGSI_OPCODE_DMIN:
+ case TGSI_OPCODE_DSGE:
+ case TGSI_OPCODE_DSLT:
+ case TGSI_OPCODE_DSEQ:
+ case TGSI_OPCODE_DSNE:
+ case TGSI_OPCODE_BREV:
+ case TGSI_OPCODE_POPC:
+ case TGSI_OPCODE_LSB:
+ case TGSI_OPCODE_INTERP_CENTROID:
+ case TGSI_OPCODE_INTERP_SAMPLE:
/* simple instructions */
return emit_simple(emit, inst);
+ case TGSI_OPCODE_RET:
+ if (emit->unit == PIPE_SHADER_TESS_CTRL &&
+ !emit->tcs.control_point_phase) {
+
+ /* store the tessellation levels in the patch constant phase only */
+ store_tesslevels(emit);
+ }
+ return emit_simple(emit, inst);
+ case TGSI_OPCODE_IMSB:
+ case TGSI_OPCODE_UMSB:
+ return emit_msb(emit, inst);
+ case TGSI_OPCODE_IBFE:
+ case TGSI_OPCODE_UBFE:
+ return emit_bfe(emit, inst);
+ case TGSI_OPCODE_BFI:
+ return emit_bfi(emit, inst);
case TGSI_OPCODE_MOV:
return emit_mov(emit, inst);
case TGSI_OPCODE_EMIT:
@@ -5900,7 +9189,7 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
case TGSI_OPCODE_EXP:
return emit_exp(emit, inst);
case TGSI_OPCODE_IF:
- return emit_if(emit, inst);
+ return emit_if(emit, &inst->Src[0]);
case TGSI_OPCODE_KILL:
return emit_kill(emit, inst);
case TGSI_OPCODE_KILL_IF:
@@ -5962,18 +9251,90 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
case TGSI_OPCODE_TXQ:
return emit_txq(emit, inst);
case TGSI_OPCODE_UIF:
- return emit_if(emit, inst);
+ return emit_if(emit, &inst->Src[0]);
case TGSI_OPCODE_UMUL_HI:
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_UDIV:
- case TGSI_OPCODE_IDIV:
/* These cases use only the FIRST of two destination registers */
return emit_simple_1dst(emit, inst, 2, 0);
+ case TGSI_OPCODE_IDIV:
+ return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_IDIV);
case TGSI_OPCODE_UMUL:
case TGSI_OPCODE_UMOD:
case TGSI_OPCODE_MOD:
/* These cases use only the SECOND of two destination registers */
return emit_simple_1dst(emit, inst, 2, 1);
+
+ /* Begin SM5 opcodes */
+ case TGSI_OPCODE_DABS:
+ return emit_dabs(emit, inst);
+ case TGSI_OPCODE_DNEG:
+ return emit_dneg(emit, inst);
+ case TGSI_OPCODE_DRCP:
+ return emit_simple(emit, inst);
+ case TGSI_OPCODE_DSQRT:
+ return emit_dsqrt(emit, inst);
+ case TGSI_OPCODE_DMAD:
+ return emit_dmad(emit, inst);
+ case TGSI_OPCODE_DFRAC:
+ return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_DFRC);
+ case TGSI_OPCODE_D2I:
+ case TGSI_OPCODE_D2U:
+ return emit_simple(emit, inst);
+ case TGSI_OPCODE_I2D:
+ case TGSI_OPCODE_U2D:
+ return emit_simple(emit, inst);
+ case TGSI_OPCODE_DRSQ:
+ return emit_drsq(emit, &inst->Dst[0], &inst->Src[0]);
+ case TGSI_OPCODE_DDIV:
+ return emit_simple(emit, inst);
+ case TGSI_OPCODE_INTERP_OFFSET:
+ return emit_interp_offset(emit, inst);
+
+ /* The following opcodes should never be seen here. We return zero
+ * for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED,
+ * FMA_SUPPORTED, LDEXP_SUPPORTED queries.
+ */
+ case TGSI_OPCODE_FMA:
+ case TGSI_OPCODE_LDEXP:
+ case TGSI_OPCODE_DSSG:
+ case TGSI_OPCODE_DFRACEXP:
+ case TGSI_OPCODE_DLDEXP:
+ case TGSI_OPCODE_DTRUNC:
+ case TGSI_OPCODE_DCEIL:
+ case TGSI_OPCODE_DFLR:
+ debug_printf("Unexpected TGSI opcode %s. "
+ "Should have been translated away by the GLSL compiler.\n",
+ tgsi_get_opcode_name(opcode));
+ return FALSE;
+
+ case TGSI_OPCODE_LOAD:
+ case TGSI_OPCODE_STORE:
+ case TGSI_OPCODE_ATOMAND:
+ case TGSI_OPCODE_ATOMCAS:
+ case TGSI_OPCODE_ATOMIMAX:
+ case TGSI_OPCODE_ATOMIMIN:
+ case TGSI_OPCODE_ATOMOR:
+ case TGSI_OPCODE_ATOMUADD:
+ case TGSI_OPCODE_ATOMUMAX:
+ case TGSI_OPCODE_ATOMUMIN:
+ case TGSI_OPCODE_ATOMXCHG:
+ case TGSI_OPCODE_ATOMXOR:
+ return FALSE;
+ case TGSI_OPCODE_BARRIER:
+ if (emit->unit == PIPE_SHADER_TESS_CTRL) {
+ /* SM5 device doesn't support BARRIER in tcs . If barrier is used
+ * in shader, don't do anything for this opcode and continue rest
+ * of shader translation
+ */
+ pipe_debug_message(&emit->svga_debug_callback, INFO,
+ "barrier instruction is not supported in tessellation control shader\n");
+ return TRUE;
+ }
+ else {
+ return emit_simple(emit, inst);
+ }
+
case TGSI_OPCODE_END:
if (!emit_post_helpers(emit))
return FALSE;
@@ -5998,11 +9359,11 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
* \param vs_pos_tmp_index which temporary register contains the vertex pos.
*/
static void
-emit_vpos_instructions(struct svga_shader_emitter_v10 *emit,
- unsigned vs_pos_tmp_index)
+emit_vpos_instructions(struct svga_shader_emitter_v10 *emit)
{
struct tgsi_full_src_register tmp_pos_src;
struct tgsi_full_dst_register pos_dst;
+ const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
/* Don't bother to emit any extra vertex instructions if vertex position is
* not written out
@@ -6010,6 +9371,12 @@ emit_vpos_instructions(struct svga_shader_emitter_v10 *emit,
if (emit->vposition.out_index == INVALID_INDEX)
return;
+ /**
+ * Reset the temporary vertex position register index
+ * so that emit_dst_register() will use the real vertex position output
+ */
+ emit->vposition.tmp_index = INVALID_INDEX;
+
tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
pos_dst = make_dst_output_reg(emit->vposition.out_index);
@@ -6023,8 +9390,7 @@ emit_vpos_instructions(struct svga_shader_emitter_v10 *emit,
make_dst_output_reg(emit->vposition.so_index);
/* MOV pos_so, tmp_pos */
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst,
- &tmp_pos_src, FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, &tmp_pos_src);
}
if (emit->vposition.need_prescale) {
@@ -6045,17 +9411,17 @@ emit_vpos_instructions(struct svga_shader_emitter_v10 *emit,
writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
struct tgsi_full_src_register prescale_scale =
- make_src_const_reg(emit->vposition.prescale_scale_index);
+ make_src_temp_reg(emit->vposition.prescale_scale_index);
struct tgsi_full_src_register prescale_trans =
- make_src_const_reg(emit->vposition.prescale_trans_index);
+ make_src_temp_reg(emit->vposition.prescale_trans_index);
/* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
- &tmp_pos_src, &prescale_scale, FALSE);
+ &tmp_pos_src, &prescale_scale);
/* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
- &prescale_trans, &tmp_pos_src, FALSE);
+ &prescale_trans, &tmp_pos_src);
}
else if (emit->key.vs.undo_viewport) {
/* This code computes the final vertex position from the temporary
@@ -6090,19 +9456,18 @@ emit_vpos_instructions(struct svga_shader_emitter_v10 *emit,
/* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
- &tmp_pos_src, &vp_zwww, FALSE);
+ &tmp_pos_src, &vp_zwww);
/* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
- &tmp_pos_src, &vp_xyzw, FALSE);
+ &tmp_pos_src, &vp_xyzw);
/* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
- &tmp_pos_src, &tmp_pos_src_wwww, FALSE);
+ &tmp_pos_src, &tmp_pos_src_wwww);
/* MOV pos.w, tmp_pos.w */
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w,
- &tmp_pos_src, FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, &tmp_pos_src);
}
else if (vs_pos_tmp_index != INVALID_INDEX) {
/* This code is to handle the case where the temporary vertex
@@ -6120,6 +9485,11 @@ emit_vpos_instructions(struct svga_shader_emitter_v10 *emit,
emit_src_register(emit, &tmp_pos_src);
end_emit_instruction(emit);
}
+
+ /* Restore original vposition.tmp_index value for the next GS vertex.
+ * It doesn't matter for VS.
+ */
+ emit->vposition.tmp_index = vs_pos_tmp_index;
}
static void
@@ -6129,7 +9499,8 @@ emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
/* Copy from copy distance temporary to CLIPDIST & the shadow copy */
emit_clip_distance_instructions(emit);
- } else if (emit->clip_mode == CLIP_VERTEX) {
+ } else if (emit->clip_mode == CLIP_VERTEX &&
+ emit->key.last_vertex_stage) {
/* Convert TGSI CLIPVERTEX to CLIPDIST */
emit_clip_vertex_instructions(emit);
}
@@ -6150,7 +9521,7 @@ emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
* emit_vpos_instructions() call since the later function will change
* the TEMP[vs_pos_tmp_index] value.
*/
- if (emit->clip_mode == CLIP_LEGACY) {
+ if (emit->clip_mode == CLIP_LEGACY && emit->key.last_vertex_stage) {
/* Emit CLIPDIST for legacy user defined clip planes */
emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
}
@@ -6165,26 +9536,14 @@ emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
static void
emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
{
- const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
-
/* Emit clipping instructions based on clipping mode */
emit_clipping_instructions(emit);
- /**
- * Reset the temporary vertex position register index
- * so that emit_dst_register() will use the real vertex position output
- */
- emit->vposition.tmp_index = INVALID_INDEX;
-
/* Emit vertex position instructions */
- emit_vpos_instructions(emit, vs_pos_tmp_index);
-
- /* Restore original vposition.tmp_index value for the next GS vertex.
- * It doesn't matter for VS.
- */
- emit->vposition.tmp_index = vs_pos_tmp_index;
+ emit_vpos_instructions(emit);
}
+
/**
* Translate the TGSI_OPCODE_EMIT GS instruction.
*/
@@ -6196,13 +9555,66 @@ emit_vertex(struct svga_shader_emitter_v10 *emit,
assert(emit->unit == PIPE_SHADER_GEOMETRY);
- emit_vertex_instructions(emit);
+ /**
+ * Emit the viewport array index for the first vertex.
+ */
+ if (emit->gs.viewport_index_out_index != INVALID_INDEX) {
+ struct tgsi_full_dst_register viewport_index_out =
+ make_dst_output_reg(emit->gs.viewport_index_out_index);
+ struct tgsi_full_dst_register viewport_index_out_x =
+ writemask_dst(&viewport_index_out, TGSI_WRITEMASK_X);
+ struct tgsi_full_src_register viewport_index_tmp =
+ make_src_temp_reg(emit->gs.viewport_index_tmp_index);
+
+ /* Set the out index to INVALID_INDEX, so it will not
+ * be assigned to a temp again in emit_dst_register, and
+ * the viewport index will not be assigned again in the
+ * subsequent vertices.
+ */
+ emit->gs.viewport_index_out_index = INVALID_INDEX;
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+ &viewport_index_out_x, &viewport_index_tmp);
+ }
- /* We can't use emit_simple() because the TGSI instruction has one
- * operand (vertex stream number) which we must ignore for VGPU10.
+ /**
+ * Find the stream index associated with this emit vertex instruction.
*/
+ assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
+ unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
+
+ /**
+ * According to the ARB_gpu_shader5 spec, the built-in geometry shader
+ * outputs are always associated with vertex stream zero.
+ * So emit the extra vertex instructions for position or clip distance
+ * for stream zero only.
+ */
+ if (streamIndex == 0) {
+ /**
+ * Before emitting vertex instructions, emit the temporaries for
+ * the prescale constants based on the viewport index if needed.
+ */
+ if (emit->vposition.need_prescale && !emit->vposition.have_prescale)
+ emit_temp_prescale_instructions(emit);
+
+ emit_vertex_instructions(emit);
+ }
+
begin_emit_instruction(emit);
- emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE);
+ if (emit->version >= 50) {
+ if (emit->info.num_stream_output_components[streamIndex] == 0) {
+ /**
+ * If there is no output for this stream, discard this instruction.
+ */
+ emit->discard_instruction = TRUE;
+ }
+ else {
+ emit_opcode(emit, VGPU10_OPCODE_EMIT_STREAM, FALSE);
+ emit_stream_register(emit, streamIndex);
+ }
+ }
+ else {
+ emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE);
+ }
end_emit_instruction(emit);
return ret;
@@ -6399,11 +9811,11 @@ emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
/* ITOF/UTOF/MOV tmp, input[index] */
if (save_itof_mask & (1 << index)) {
emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
- &tmp_dst, &input_src, FALSE);
+ &tmp_dst, &input_src);
}
else if (save_utof_mask & (1 << index)) {
emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
- &tmp_dst, &input_src, FALSE);
+ &tmp_dst, &input_src);
}
else if (save_puint_to_snorm_mask & (1 << index)) {
emit_puint_to_snorm(emit, &tmp_dst, &input_src);
@@ -6417,7 +9829,7 @@ emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
else {
assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
- &tmp_dst, &input_src, FALSE);
+ &tmp_dst, &input_src);
}
if (save_is_bgra_mask & (1 << index)) {
@@ -6428,11 +9840,11 @@ emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
/* MOV tmp.w, 1.0 */
if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
- &tmp_dst_w, &one_int, FALSE);
+ &tmp_dst_w, &one_int);
}
else {
emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
- &tmp_dst_w, &one, FALSE);
+ &tmp_dst_w, &one);
}
}
}
@@ -6448,58 +9860,281 @@ emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
}
-/**
- * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
- * to implement some instructions. We pre-allocate those values here
- * in the immediate constant buffer.
- */
+/* Find zero-value immedate for default layer index */
static void
-alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
+emit_default_layer_instructions(struct svga_shader_emitter_v10 *emit)
{
- unsigned n = 0;
+ assert(emit->unit == PIPE_SHADER_FRAGMENT);
- emit->common_immediate_pos[n++] =
- alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
+ /* immediate for default layer index 0 */
+ if (emit->fs.layer_input_index != INVALID_INDEX) {
+ union tgsi_immediate_data imm;
+ imm.Int = 0;
+ emit->fs.layer_imm_index = find_immediate(emit, imm, 0);
+ }
+}
- if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) {
- emit->common_immediate_pos[n++] =
- alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f);
+
+static void
+emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
+ unsigned cbuf_index,
+ struct tgsi_full_dst_register *scale,
+ struct tgsi_full_dst_register *translate)
+{
+ struct tgsi_full_src_register scale_cbuf = make_src_const_reg(cbuf_index);
+ struct tgsi_full_src_register trans_cbuf = make_src_const_reg(cbuf_index+1);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, scale, &scale_cbuf);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, translate, &trans_cbuf);
+}
+
+
+/**
+ * A recursive helper function to find the prescale from the constant buffer
+ */
+static void
+find_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
+ unsigned index, unsigned num_prescale,
+ struct tgsi_full_src_register *vp_index,
+ struct tgsi_full_dst_register *scale,
+ struct tgsi_full_dst_register *translate,
+ struct tgsi_full_src_register *tmp_src,
+ struct tgsi_full_dst_register *tmp_dst)
+{
+ if (num_prescale == 0)
+ return;
+
+ if (index > 0) {
+ /* ELSE */
+ emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
}
- emit->common_immediate_pos[n++] =
- alloc_immediate_int4(emit, 0, 1, 0, -1);
+ struct tgsi_full_src_register index_src =
+ make_immediate_reg_int(emit, index);
- if (emit->key.vs.attrib_puint_to_snorm) {
- emit->common_immediate_pos[n++] =
- alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f);
+ if (index == 0) {
+ /* GE tmp, vp_index, index */
+ emit_instruction_op2(emit, VGPU10_OPCODE_GE, tmp_dst,
+ vp_index, &index_src);
+ } else {
+ /* EQ tmp, vp_index, index */
+ emit_instruction_op2(emit, VGPU10_OPCODE_EQ, tmp_dst,
+ vp_index, &index_src);
}
- if (emit->key.vs.attrib_puint_to_uscaled) {
- emit->common_immediate_pos[n++] =
- alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
+ /* IF tmp */
+ emit_if(emit, tmp_src);
+ emit_temp_prescale_from_cbuf(emit,
+ emit->vposition.prescale_cbuf_index + 2 * index,
+ scale, translate);
+
+ find_prescale_from_cbuf(emit, index+1, num_prescale-1,
+ vp_index, scale, translate,
+ tmp_src, tmp_dst);
+
+ /* ENDIF */
+ emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
+}
+
+
+/**
+ * This helper function emits instructions to set the prescale
+ * and translate temporaries to the correct constants from the
+ * constant buffer according to the designated viewport.
+ */
+static void
+emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit)
+{
+ struct tgsi_full_dst_register prescale_scale =
+ make_dst_temp_reg(emit->vposition.prescale_scale_index);
+ struct tgsi_full_dst_register prescale_translate =
+ make_dst_temp_reg(emit->vposition.prescale_trans_index);
+
+ unsigned prescale_cbuf_index = emit->vposition.prescale_cbuf_index;
+
+ if (emit->vposition.num_prescale == 1) {
+ emit_temp_prescale_from_cbuf(emit,
+ prescale_cbuf_index,
+ &prescale_scale, &prescale_translate);
+ } else {
+ /**
+ * Since SM5 device does not support dynamic indexing, we need
+ * to do the if-else to find the prescale constants for the
+ * specified viewport.
+ */
+ struct tgsi_full_src_register vp_index_src =
+ make_src_temp_reg(emit->gs.viewport_index_tmp_index);
+
+ struct tgsi_full_src_register vp_index_src_x =
+ scalar_src(&vp_index_src, TGSI_SWIZZLE_X);
+
+ unsigned tmp = get_temp_index(emit);
+ struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
+ struct tgsi_full_src_register tmp_src_x =
+ scalar_src(&tmp_src, TGSI_SWIZZLE_X);
+ struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
+
+ find_prescale_from_cbuf(emit, 0, emit->vposition.num_prescale,
+ &vp_index_src_x,
+ &prescale_scale, &prescale_translate,
+ &tmp_src_x, &tmp_dst);
}
- if (emit->key.vs.attrib_puint_to_sscaled) {
- emit->common_immediate_pos[n++] =
- alloc_immediate_int4(emit, 22, 12, 2, 0);
+ /* Mark prescale temporaries are emitted */
+ emit->vposition.have_prescale = 1;
+}
- emit->common_immediate_pos[n++] =
- alloc_immediate_int4(emit, 22, 30, 0, 0);
+
+/**
+ * Hull Shader must have control point outputs. But tessellation
+ * control shader can return without writing to control point output.
+ * In this case, the control point output is assumed to be passthrough
+ * from the control point input.
+ * This helper function is to write out a control point output first in case
+ * the tessellation control shader returns before writing a
+ * control point output.
+ */
+static void
+emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit)
+{
+ assert(emit->unit == PIPE_SHADER_TESS_CTRL);
+ assert(emit->tcs.control_point_phase);
+ assert(emit->tcs.control_point_input_index != INVALID_INDEX);
+ assert(emit->tcs.control_point_out_index != INVALID_INDEX);
+ assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX);
+
+ /* UARL ADDR[INDEX].x INVOCATION.xxxx */
+
+ struct tgsi_full_src_register invocation_src;
+ struct tgsi_full_dst_register addr_dst;
+ struct tgsi_full_dst_register addr_dst_x;
+ unsigned addr_tmp;
+
+ addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index];
+ addr_dst = make_dst_temp_reg(addr_tmp);
+ addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X);
+
+ invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE,
+ emit->tcs.invocation_id_sys_index);
+
+ begin_emit_instruction(emit);
+ emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
+ emit_dst_register(emit, &addr_dst_x);
+ emit_src_register(emit, &invocation_src);
+ end_emit_instruction(emit);
+
+
+ /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */
+
+ struct tgsi_full_src_register input_control_point;
+ struct tgsi_full_dst_register output_control_point;
+
+ input_control_point = make_src_reg(TGSI_FILE_INPUT,
+ emit->tcs.control_point_input_index);
+ input_control_point.Register.Dimension = 1;
+ input_control_point.Dimension.Indirect = 1;
+ input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS;
+ input_control_point.DimIndirect.Index = emit->tcs.control_point_addr_index;
+ output_control_point =
+ make_dst_output_reg(emit->tcs.control_point_out_index);
+
+ begin_emit_instruction(emit);
+ emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
+ emit_dst_register(emit, &output_control_point);
+ emit_src_register(emit, &input_control_point);
+ end_emit_instruction(emit);
+}
+
+/**
+ * This functions constructs temporary tessfactor from VGPU10*_TESSFACTOR
+ * values in domain shader. SM5 has tessfactors as floating point values where
+ * as tgsi emit them as vector. This function allows to construct temp
+ * tessfactor vector similar to TGSI_SEMANTIC_TESSINNER/OUTER filled with
+ * values from VGPU10*_TESSFACTOR. Use this constructed vector whenever
+ * TGSI_SEMANTIC_TESSINNER/OUTER is used in shader.
+ */
+static void
+emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 *emit)
+{
+ struct tgsi_full_src_register src;
+ struct tgsi_full_dst_register dst;
+
+ if (emit->tes.inner.tgsi_index != INVALID_INDEX) {
+ dst = make_dst_temp_reg(emit->tes.inner.temp_index);
+
+ switch (emit->tes.prim_mode) {
+ case PIPE_PRIM_QUADS:
+ src = make_src_scalar_reg(TGSI_FILE_INPUT,
+ emit->tes.inner.in_index + 1, TGSI_SWIZZLE_X);
+ dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
+ case PIPE_PRIM_TRIANGLES:
+ src = make_src_scalar_reg(TGSI_FILE_INPUT,
+ emit->tes.inner.in_index, TGSI_SWIZZLE_X);
+ dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
+ break;
+ case PIPE_PRIM_LINES:
+ /**
+ * As per SM5 spec, InsideTessFactor for isolines are unused.
+ * In fact glsl tessInnerLevel for isolines doesn't mean anything but if
+ * any application try to read tessInnerLevel in TES when primitive type
+ * is isolines, then instead of driver throwing segfault for accesing it,
+ * return atleast vec(1.0f)
+ */
+ src = make_immediate_reg_float(emit, 1.0f);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
+ break;
+ default:
+ break;
+ }
}
- unsigned i;
+ if (emit->tes.outer.tgsi_index != INVALID_INDEX) {
+ dst = make_dst_temp_reg(emit->tes.outer.temp_index);
+
+ switch (emit->tes.prim_mode) {
+ case PIPE_PRIM_QUADS:
+ src = make_src_scalar_reg(TGSI_FILE_INPUT,
+ emit->tes.outer.in_index + 3, TGSI_SWIZZLE_X);
+ dst = writemask_dst(&dst, TGSI_WRITEMASK_W);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
+ case PIPE_PRIM_TRIANGLES:
+ src = make_src_scalar_reg(TGSI_FILE_INPUT,
+ emit->tes.outer.in_index + 2, TGSI_SWIZZLE_X);
+ dst = writemask_dst(&dst, TGSI_WRITEMASK_Z);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
+ case PIPE_PRIM_LINES:
+ src = make_src_scalar_reg(TGSI_FILE_INPUT,
+ emit->tes.outer.in_index + 1, TGSI_SWIZZLE_X);
+ dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
+
+ src = make_src_scalar_reg(TGSI_FILE_INPUT,
+ emit->tes.outer.in_index , TGSI_SWIZZLE_X);
+ dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
- for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
- if (emit->key.tex[i].texel_bias) {
- /* Replace 0.0f if more immediate float value is needed */
- emit->common_immediate_pos[n++] =
- alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f);
+ break;
+ default:
break;
}
}
+}
- assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
- emit->num_common_immediates = n;
+
+static void
+emit_initialize_temp_instruction(struct svga_shader_emitter_v10 *emit)
+{
+ struct tgsi_full_src_register src;
+ struct tgsi_full_dst_register dst;
+ unsigned vgpu10_temp_index = remap_temp_index(emit, TGSI_FILE_TEMPORARY,
+ emit->initialize_temp_index);
+ src = make_immediate_reg_float(emit, 0.0f);
+ dst = make_dst_temp_reg(vgpu10_temp_index);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
+ emit->temp_map[emit->initialize_temp_index].initialized = TRUE;
+ emit->initialize_temp_index = INVALID_INDEX;
}
@@ -6513,6 +10148,25 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
/* Properties */
if (emit->unit == PIPE_SHADER_GEOMETRY)
emit_property_instructions(emit);
+ else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
+ emit_hull_shader_declarations(emit);
+
+ /* Save the position of the first instruction token so that we can
+ * do a second pass of the instructions for the patch constant phase.
+ */
+ emit->tcs.instruction_token_pos = emit->cur_tgsi_token;
+
+ if (!emit_hull_shader_control_point_phase(emit)) {
+ emit->skip_instruction = TRUE;
+ return TRUE;
+ }
+
+ /* Set the current tcs phase to control point phase */
+ emit->tcs.control_point_phase = TRUE;
+ }
+ else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
+ emit_domain_shader_declarations(emit);
+ }
/* Declare inputs */
if (!emit_input_declarations(emit))
@@ -6525,20 +10179,30 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
/* Declare temporary registers */
emit_temporaries_declaration(emit);
- /* Declare constant registers */
- emit_constant_declaration(emit);
+ /* For PIPE_SHADER_TESS_CTRL, constants, samplers, resources and immediates
+ * will already be declared in hs_decls (emit_hull_shader_declarations)
+ */
+ if (emit->unit != PIPE_SHADER_TESS_CTRL) {
+ /* Declare constant registers */
+ emit_constant_declaration(emit);
- /* Declare samplers and resources */
- emit_sampler_declarations(emit);
- emit_resource_declarations(emit);
+ /* Declare samplers and resources */
+ emit_sampler_declarations(emit);
+ emit_resource_declarations(emit);
- /* Declare clip distance output registers */
- if (emit->unit == PIPE_SHADER_VERTEX ||
- emit->unit == PIPE_SHADER_GEOMETRY) {
- emit_clip_distance_declarations(emit);
+ alloc_common_immediates(emit);
+ /* Now, emit the constant block containing all the immediates
+ * declared by shader, as well as the extra ones seen above.
+ */
}
- alloc_common_immediates(emit);
+ if (emit->unit != PIPE_SHADER_FRAGMENT) {
+ /*
+ * Declare clip distance output registers for ClipVertex or
+ * user defined planes
+ */
+ emit_clip_distance_declarations(emit);
+ }
if (emit->unit == PIPE_SHADER_FRAGMENT &&
emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
@@ -6547,19 +10211,36 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
}
- /* Now, emit the constant block containing all the immediates
- * declared by shader, as well as the extra ones seen above.
- */
- emit_vgpu10_immediates_block(emit);
+ if (emit->unit != PIPE_SHADER_TESS_CTRL) {
+ /**
+ * For PIPE_SHADER_TESS_CTRL, immediates are already declared in
+ * hs_decls
+ */
+ emit_vgpu10_immediates_block(emit);
+ }
+ else {
+ emit_tcs_default_control_point_output(emit);
+ }
if (emit->unit == PIPE_SHADER_FRAGMENT) {
emit_frontface_instructions(emit);
emit_fragcoord_instructions(emit);
emit_sample_position_instructions(emit);
+ emit_default_layer_instructions(emit);
}
else if (emit->unit == PIPE_SHADER_VERTEX) {
emit_vertex_attrib_instructions(emit);
}
+ else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
+ emit_temp_tessfactor_instructions(emit);
+ }
+
+ /**
+ * For geometry shader that writes to viewport index, the prescale
+ * temporaries will be done at the first vertex emission.
+ */
+ if (emit->vposition.need_prescale && emit->vposition.num_prescale == 1)
+ emit_temp_prescale_instructions(emit);
return TRUE;
}
@@ -6601,7 +10282,7 @@ emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit,
color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W);
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one, FALSE);
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one);
}
}
@@ -6646,8 +10327,7 @@ emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
*/
if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
/* MOV output.color, tempcolor */
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
- &color_src, FALSE); /* XXX saturate? */
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
}
free_temp_indexes(emit);
@@ -6694,8 +10374,7 @@ emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
/* MOV output.color[i], tempcolor */
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
- &color_src, FALSE); /* XXX saturate? */
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
}
}
@@ -6734,6 +10413,18 @@ emit_post_helpers(struct svga_shader_emitter_v10 *emit)
emit_broadcast_color_instructions(emit, fs_color_tmp_index);
}
}
+ else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
+ if (!emit->tcs.control_point_phase) {
+ /* store the tessellation levels in the patch constant phase only */
+ store_tesslevels(emit);
+ }
+ else {
+ emit_clipping_instructions(emit);
+ }
+ }
+ else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
+ emit_vertex_instructions(emit);
+ }
return TRUE;
}
@@ -6754,6 +10445,10 @@ emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
tgsi_parse_init(&parse, tokens);
while (!tgsi_parse_end_of_tokens(&parse)) {
+
+ /* Save the current tgsi token starting position */
+ emit->cur_tgsi_token = parse.Position;
+
tgsi_parse_token(&parse);
switch (parse.FullToken.Token.Type) {
@@ -6778,6 +10473,24 @@ emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
}
ret = emit_vgpu10_instruction(emit, inst_number++,
&parse.FullToken.FullInstruction);
+
+ /* Usually this applies to TCS only. If shader is reading control
+ * point outputs in control point phase, we should reemit all
+ * instructions which are writting into control point output in
+ * control phase to store results into temporaries.
+ */
+ if (emit->reemit_instruction) {
+ assert(emit->unit == PIPE_SHADER_TESS_CTRL);
+ ret = emit_vgpu10_instruction(emit, inst_number,
+ &parse.FullToken.FullInstruction);
+ }
+ else if (emit->initialize_temp_index != INVALID_INDEX) {
+ emit_initialize_temp_instruction(emit);
+ emit->initialize_temp_index = INVALID_INDEX;
+ ret = emit_vgpu10_instruction(emit, inst_number - 1,
+ &parse.FullToken.FullInstruction);
+ }
+
if (!ret)
goto done;
break;
@@ -6793,6 +10506,10 @@ emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
}
}
+ if (emit->unit == PIPE_SHADER_TESS_CTRL) {
+ ret = emit_hull_shader_patch_constant_phase(emit, &parse);
+ }
+
done:
tgsi_parse_free(&parse);
return ret;
@@ -6808,6 +10525,7 @@ emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
VGPU10ProgramToken ptoken;
/* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */
+ ptoken.value = 0; /* init whole token to zero */
ptoken.majorVersion = emit->version / 10;
ptoken.minorVersion = emit->version % 10;
ptoken.programType = translate_shader_type(emit->unit);
@@ -6817,7 +10535,49 @@ emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
/* Second token: total length of shader, in tokens. We can't fill this
* in until we're all done. Emit zero for now.
*/
- return emit_dword(emit, 0);
+ if (!emit_dword(emit, 0))
+ return FALSE;
+
+ if (emit->version >= 50) {
+ VGPU10OpcodeToken0 token;
+
+ if (emit->unit == PIPE_SHADER_TESS_CTRL) {
+ /* For hull shader, we need to start the declarations phase first before
+ * emitting any declarations including the global flags.
+ */
+ token.value = 0;
+ token.opcodeType = VGPU10_OPCODE_HS_DECLS;
+ begin_emit_instruction(emit);
+ emit_dword(emit, token.value);
+ end_emit_instruction(emit);
+ }
+
+ /* Emit global flags */
+ token.value = 0; /* init whole token to zero */
+ token.opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
+ token.enableDoublePrecisionFloatOps = 1; /* set bit */
+ token.instructionLength = 1;
+ if (!emit_dword(emit, token.value))
+ return FALSE;
+ }
+
+ if (emit->version >= 40) {
+ VGPU10OpcodeToken0 token;
+
+ /* Reserved for global flag such as refactoringAllowed.
+ * If the shader does not use the precise qualifier, we will set the
+ * refactoringAllowed global flag; otherwise, we will leave the reserved
+ * token to NOP.
+ */
+ emit->reserved_token = (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
+ token.value = 0;
+ token.opcodeType = VGPU10_OPCODE_NOP;
+ token.instructionLength = 1;
+ if (!emit_dword(emit, token.value))
+ return FALSE;
+ }
+
+ return TRUE;
}
@@ -6830,6 +10590,16 @@ emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
tokens = (VGPU10ProgramToken *) emit->buf;
tokens[1].value = emit_get_num_tokens(emit);
+ if (emit->version >= 40 && !emit->uses_precise_qualifier) {
+ /* Replace the reserved token with the RefactoringAllowed global flag */
+ VGPU10OpcodeToken0 *ptoken;
+
+ ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
+ assert(ptoken->opcodeType == VGPU10_OPCODE_NOP);
+ ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
+ ptoken->refactoringAllowed = 1;
+ }
+
return TRUE;
}
@@ -6908,6 +10678,97 @@ transform_fs_aapoint(const struct tgsi_token *tokens,
return tokens;
}
+
+/**
+ * A helper function to determine the shader in the previous stage and
+ * then call the linker function to determine the input mapping for this
+ * shader to match the output indices from the shader in the previous stage.
+ */
+static void
+compute_input_mapping(struct svga_context *svga,
+ struct svga_shader_emitter_v10 *emit,
+ enum pipe_shader_type unit)
+{
+ struct svga_shader *prevShader = NULL; /* shader in the previous stage */
+
+ if (unit == PIPE_SHADER_FRAGMENT) {
+ prevShader = svga->curr.gs ?
+ &svga->curr.gs->base : (svga->curr.tes ?
+ &svga->curr.tes->base : &svga->curr.vs->base);
+ } else if (unit == PIPE_SHADER_GEOMETRY) {
+ prevShader = svga->curr.tes ? &svga->curr.tes->base : &svga->curr.vs->base;
+ } else if (unit == PIPE_SHADER_TESS_EVAL) {
+ assert(svga->curr.tcs);
+ prevShader = &svga->curr.tcs->base;
+ } else if (unit == PIPE_SHADER_TESS_CTRL) {
+ assert(svga->curr.vs);
+ prevShader = &svga->curr.vs->base;
+ }
+
+ if (prevShader != NULL) {
+ svga_link_shaders(&prevShader->info, &emit->info, &emit->linkage);
+ }
+ else {
+ /**
+ * Since vertex shader does not need to go through the linker to
+ * establish the input map, we need to make sure the highest index
+ * of input registers is set properly here.
+ */
+ emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max,
+ emit->info.file_max[TGSI_FILE_INPUT]);
+ }
+}
+
+
+/**
+ * Copies the shader signature info to the shader variant
+ */
+static void
+copy_shader_signature(struct svga_shader_signature *sgn,
+ struct svga_shader_variant *variant)
+{
+ SVGA3dDXShaderSignatureHeader *header = &sgn->header;
+
+ /* Calculate the signature length */
+ variant->signatureLen = sizeof(SVGA3dDXShaderSignatureHeader) +
+ (header->numInputSignatures +
+ header->numOutputSignatures +
+ header->numPatchConstantSignatures) *
+ sizeof(SVGA3dDXShaderSignatureEntry);
+
+ /* Allocate buffer for the signature info */
+ variant->signature =
+ (SVGA3dDXShaderSignatureHeader *)CALLOC(1, variant->signatureLen);
+
+ char *sgnBuf = (char *)variant->signature;
+ unsigned sgnLen;
+
+ /* Copy the signature info to the shader variant structure */
+ memcpy(sgnBuf, &sgn->header, sizeof(SVGA3dDXShaderSignatureHeader));
+ sgnBuf += sizeof(SVGA3dDXShaderSignatureHeader);
+
+ if (header->numInputSignatures) {
+ sgnLen =
+ header->numInputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
+ memcpy(sgnBuf, &sgn->inputs[0], sgnLen);
+ sgnBuf += sgnLen;
+ }
+
+ if (header->numOutputSignatures) {
+ sgnLen =
+ header->numOutputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
+ memcpy(sgnBuf, &sgn->outputs[0], sgnLen);
+ sgnBuf += sgnLen;
+ }
+
+ if (header->numPatchConstantSignatures) {
+ sgnLen =
+ header->numPatchConstantSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
+ memcpy(sgnBuf, &sgn->patchConstants[0], sgnLen);
+ }
+}
+
+
/**
* This is the main entrypoint for the TGSI -> VPGU10 translator.
*/
@@ -6920,12 +10781,15 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
struct svga_shader_variant *variant = NULL;
struct svga_shader_emitter_v10 *emit;
const struct tgsi_token *tokens = shader->tokens;
- struct svga_vertex_shader *vs = svga->curr.vs;
- struct svga_geometry_shader *gs = svga->curr.gs;
+
+ (void) make_immediate_reg_double; /* unused at this time */
assert(unit == PIPE_SHADER_VERTEX ||
unit == PIPE_SHADER_GEOMETRY ||
- unit == PIPE_SHADER_FRAGMENT);
+ unit == PIPE_SHADER_FRAGMENT ||
+ unit == PIPE_SHADER_TESS_CTRL ||
+ unit == PIPE_SHADER_TESS_EVAL ||
+ unit == PIPE_SHADER_COMPUTE);
/* These two flags cannot be used together */
assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
@@ -6939,12 +10803,29 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
goto done;
emit->unit = unit;
- emit->version = svga_have_sm4_1(svga) ? 41 : 40;
+ if (svga_have_sm5(svga)) {
+ emit->version = 50;
+ } else if (svga_have_sm4_1(svga)) {
+ emit->version = 41;
+ } else {
+ emit->version = 40;
+ }
+
+ emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0;
emit->key = *key;
emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
- emit->key.gs.need_prescale);
+ emit->key.gs.need_prescale ||
+ emit->key.tes.need_prescale);
+
+ /* Determine how many prescale factors in the constant buffer */
+ emit->vposition.num_prescale = 1;
+ if (emit->vposition.need_prescale && emit->key.gs.writes_viewport_index) {
+ assert(emit->unit == PIPE_SHADER_GEOMETRY);
+ emit->vposition.num_prescale = emit->key.gs.num_prescale;
+ }
+
emit->vposition.tmp_index = INVALID_INDEX;
emit->vposition.so_index = INVALID_INDEX;
emit->vposition.out_index = INVALID_INDEX;
@@ -6954,13 +10835,60 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
emit->fs.fragcoord_input_index = INVALID_INDEX;
emit->fs.sample_id_sys_index = INVALID_INDEX;
emit->fs.sample_pos_sys_index = INVALID_INDEX;
+ emit->fs.sample_mask_in_sys_index = INVALID_INDEX;
+ emit->fs.layer_input_index = INVALID_INDEX;
+ emit->fs.layer_imm_index = INVALID_INDEX;
emit->gs.prim_id_index = INVALID_INDEX;
+ emit->gs.invocation_id_sys_index = INVALID_INDEX;
+ emit->gs.viewport_index_out_index = INVALID_INDEX;
+ emit->gs.viewport_index_tmp_index = INVALID_INDEX;
+
+ emit->tcs.vertices_per_patch_index = INVALID_INDEX;
+ emit->tcs.invocation_id_sys_index = INVALID_INDEX;
+ emit->tcs.control_point_input_index = INVALID_INDEX;
+ emit->tcs.control_point_addr_index = INVALID_INDEX;
+ emit->tcs.control_point_out_index = INVALID_INDEX;
+ emit->tcs.control_point_tmp_index = INVALID_INDEX;
+ emit->tcs.control_point_out_count = 0;
+ emit->tcs.inner.out_index = INVALID_INDEX;
+ emit->tcs.inner.out_index = INVALID_INDEX;
+ emit->tcs.inner.temp_index = INVALID_INDEX;
+ emit->tcs.inner.tgsi_index = INVALID_INDEX;
+ emit->tcs.outer.out_index = INVALID_INDEX;
+ emit->tcs.outer.temp_index = INVALID_INDEX;
+ emit->tcs.outer.tgsi_index = INVALID_INDEX;
+ emit->tcs.patch_generic_out_count = 0;
+ emit->tcs.patch_generic_out_index = INVALID_INDEX;
+ emit->tcs.patch_generic_tmp_index = INVALID_INDEX;
+ emit->tcs.prim_id_index = INVALID_INDEX;
+
+ emit->tes.tesscoord_sys_index = INVALID_INDEX;
+ emit->tes.inner.in_index = INVALID_INDEX;
+ emit->tes.inner.temp_index = INVALID_INDEX;
+ emit->tes.inner.tgsi_index = INVALID_INDEX;
+ emit->tes.outer.in_index = INVALID_INDEX;
+ emit->tes.outer.temp_index = INVALID_INDEX;
+ emit->tes.outer.tgsi_index = INVALID_INDEX;
+ emit->tes.prim_id_index = INVALID_INDEX;
emit->clip_dist_out_index = INVALID_INDEX;
emit->clip_dist_tmp_index = INVALID_INDEX;
emit->clip_dist_so_index = INVALID_INDEX;
emit->clip_vertex_out_index = INVALID_INDEX;
+ emit->clip_vertex_tmp_index = INVALID_INDEX;
+ emit->svga_debug_callback = svga->debug.callback;
+
+ emit->index_range.start_index = INVALID_INDEX;
+ emit->index_range.count = 0;
+ emit->index_range.required = FALSE;
+ emit->index_range.operandType = VGPU10_NUM_OPERANDS;
+ emit->index_range.dim = 0;
+ emit->index_range.size = 0;
+
+ emit->current_loop_depth = 0;
+
+ emit->initialize_temp_index = INVALID_INDEX;
if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
@@ -7002,34 +10930,21 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
emit->num_outputs = emit->info.num_outputs;
- if (unit == PIPE_SHADER_FRAGMENT) {
- /* Compute FS input remapping to match the output from VS/GS */
- if (gs) {
- svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage);
- } else {
- assert(vs);
- svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
- }
- } else if (unit == PIPE_SHADER_GEOMETRY) {
- assert(vs);
- svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
- }
-
- /* Since vertex shader does not need to go through the linker to
- * establish the input map, we need to make sure the highest index
- * of input registers is set properly here.
+ /**
+ * Compute input mapping to match the outputs from shader
+ * in the previous stage
*/
- emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max,
- emit->info.file_max[TGSI_FILE_INPUT]);
+ compute_input_mapping(svga, emit, unit);
determine_clipping_mode(emit);
- if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) {
+ if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX ||
+ unit == PIPE_SHADER_TESS_CTRL || unit == PIPE_SHADER_TESS_EVAL) {
if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
/* if there is stream output declarations associated
* with this shader or the shader writes to ClipDistance
* then reserve extra registers for the non-adjusted vertex position
- * and the ClipDistance shadow copy
+ * and the ClipDistance shadow copy.
*/
emit->vposition.so_index = emit->num_outputs++;
@@ -7073,6 +10988,12 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
variant->shader = shader;
variant->nr_tokens = emit_get_num_tokens(emit);
variant->tokens = (const unsigned *)emit->buf;
+
+ /* Copy shader signature info to the shader variant */
+ if (svga_have_sm5(svga)) {
+ copy_shader_signature(&emit->signature, variant);
+ }
+
emit->buf = NULL; /* buffer is no longer owed by emitter context */
memcpy(&variant->key, key, sizeof(*key));
variant->id = UTIL_BITMASK_INVALID_INDEX;
@@ -7091,23 +11012,38 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
variant->extra_const_start--;
}
- variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
+ if (unit == PIPE_SHADER_FRAGMENT) {
+ struct svga_fs_variant *fs_variant = svga_fs_variant(variant);
- /* If there was exactly one write to a fragment shader output register
- * and it came from a constant buffer, we know all fragments will have
- * the same color (except for blending).
- */
- variant->constant_color_output =
- emit->constant_color_output && emit->num_output_writes == 1;
+ fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
- /** keep track in the variant if flat interpolation is used
- * for any of the varyings.
- */
- variant->uses_flat_interp = emit->uses_flat_interp;
+ /* If there was exactly one write to a fragment shader output register
+ * and it came from a constant buffer, we know all fragments will have
+ * the same color (except for blending).
+ */
+ fs_variant->constant_color_output =
+ emit->constant_color_output && emit->num_output_writes == 1;
+
+ /** keep track in the variant if flat interpolation is used
+ * for any of the varyings.
+ */
+ fs_variant->uses_flat_interp = emit->uses_flat_interp;
- variant->fs_shadow_compare_units = emit->fs.shadow_compare_units;
+ fs_variant->fs_shadow_compare_units = emit->fs.shadow_compare_units;
+ }
+ else if (unit == PIPE_SHADER_TESS_EVAL) {
+ struct svga_tes_variant *tes_variant = svga_tes_variant(variant);
+
+ /* Keep track in the tes variant some of the layout parameters.
+ * These parameters will be referenced by the tcs to emit
+ * the necessary declarations for the hull shader.
+ */
+ tes_variant->prim_mode = emit->tes.prim_mode;
+ tes_variant->spacing = emit->tes.spacing;
+ tes_variant->vertices_order_cw = emit->tes.vertices_order_cw;
+ tes_variant->point_mode = emit->tes.point_mode;
+ }
- variant->fs_shadow_compare_units = emit->fs.shadow_compare_units;
if (tokens != shader->tokens) {
tgsi_free_tokens(tokens);
diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h
index 88c1c6c7983..717e56caccf 100644
--- a/src/gallium/drivers/svga/svga_winsys.h
+++ b/src/gallium/drivers/svga/svga_winsys.h
@@ -427,7 +427,9 @@ struct svga_winsys_context
uint32 shaderId,
SVGA3dShaderType shaderType,
const uint32 *bytecode,
- uint32 bytecodeLen);
+ uint32 bytecodeLen,
+ const SVGA3dDXShaderSignatureHeader *sgnInfo,
+ uint32 sgnLen);
/**
* Destroy a DX GB shader.
@@ -457,7 +459,13 @@ struct svga_winsys_context
/** For HUD queries */
uint64_t num_commands;
+ uint64_t num_command_buffers;
uint64_t num_draw_commands;
+ uint64_t num_shader_reloc;
+ uint64_t num_surf_reloc;
+
+ /* Whether we are in retry processing */
+ unsigned int in_retry;
};
diff --git a/src/gallium/winsys/svga/drm/vmw_context.c b/src/gallium/winsys/svga/drm/vmw_context.c
index 432f9afcd1f..da7506e7797 100644
--- a/src/gallium/winsys/svga/drm/vmw_context.c
+++ b/src/gallium/winsys/svga/drm/vmw_context.c
@@ -65,6 +65,7 @@
#define VMW_MAX_SURF_MEM_FACTOR 2
+
struct vmw_buffer_relocation
{
struct pb_buffer *buffer;
@@ -701,20 +702,19 @@ vmw_svga_winsys_vgpu10_shader_create(struct svga_winsys_context *swc,
uint32 shaderId,
SVGA3dShaderType shaderType,
const uint32 *bytecode,
- uint32 bytecodeLen)
+ uint32 bytecodeLen,
+ const SVGA3dDXShaderSignatureHeader *sgnInfo,
+ uint32 sgnLen)
{
struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
struct vmw_svga_winsys_shader *shader;
- struct svga_winsys_gb_shader *gb_shader =
- vmw_svga_winsys_shader_create(&vswc->vws->base, shaderType, bytecode,
- bytecodeLen);
- if (!gb_shader)
+ shader = vmw_svga_shader_create(&vswc->vws->base, shaderType, bytecode,
+ bytecodeLen, sgnInfo, sgnLen);
+ if (!shader)
return NULL;
- shader = vmw_svga_winsys_shader(gb_shader);
shader->shid = shaderId;
-
- return gb_shader;
+ return svga_winsys_shader(shader);
}
/**
diff --git a/src/gallium/winsys/svga/drm/vmw_shader.c b/src/gallium/winsys/svga/drm/vmw_shader.c
index 56ffdd16f79..dbf63c59234 100644
--- a/src/gallium/winsys/svga/drm/vmw_shader.c
+++ b/src/gallium/winsys/svga/drm/vmw_shader.c
@@ -28,7 +28,9 @@
#include "util/u_debug.h"
#include "util/u_memory.h"
+#include "vmw_context.h"
#include "vmw_shader.h"
+#include "vmw_buffer.h"
#include "vmw_screen.h"
void
@@ -63,3 +65,54 @@ vmw_svga_winsys_shader_reference(struct vmw_svga_winsys_shader **pdst,
*pdst = src;
}
+
+
+/**
+ * A helper function to create a shader object and upload the
+ * shader bytecode and signature if specified to the shader memory.
+ */
+struct vmw_svga_winsys_shader *
+vmw_svga_shader_create(struct svga_winsys_screen *sws,
+ SVGA3dShaderType type,
+ const uint32 *bytecode,
+ uint32 bytecodeLen,
+ const SVGA3dDXShaderSignatureHeader *sgnInfo,
+ uint32 sgnLen)
+{
+ struct vmw_svga_winsys_shader *shader;
+ void *map;
+
+ shader = CALLOC_STRUCT(vmw_svga_winsys_shader);
+ if (!shader)
+ return NULL;
+
+ pipe_reference_init(&shader->refcnt, 1);
+ p_atomic_set(&shader->validated, 0);
+ shader->screen = vmw_winsys_screen(sws);
+ shader->buf = sws->buffer_create(sws, 64,
+ SVGA_BUFFER_USAGE_SHADER,
+ bytecodeLen + sgnLen);
+ if (!shader->buf) {
+ FREE(shader);
+ return NULL;
+ }
+
+ map = sws->buffer_map(sws, shader->buf, PIPE_TRANSFER_WRITE);
+ if (!map) {
+ FREE(shader);
+ return NULL;
+ }
+
+ /* copy the shader bytecode */
+ memcpy(map, bytecode, bytecodeLen);
+
+ /* if shader signature is specified, append it to the bytecode. */
+ if (sgnLen) {
+ assert(sws->have_sm5);
+ map = (char *)map + bytecodeLen;
+ memcpy(map, sgnInfo, sgnLen);
+ }
+ sws->buffer_unmap(sws, shader->buf);
+
+ return shader;
+}
diff --git a/src/gallium/winsys/svga/drm/vmw_shader.h b/src/gallium/winsys/svga/drm/vmw_shader.h
index ae557bcc8e4..a62a814471d 100644
--- a/src/gallium/winsys/svga/drm/vmw_shader.h
+++ b/src/gallium/winsys/svga/drm/vmw_shader.h
@@ -65,4 +65,12 @@ void
vmw_svga_winsys_shader_reference(struct vmw_svga_winsys_shader **pdst,
struct vmw_svga_winsys_shader *src);
+struct vmw_svga_winsys_shader *
+vmw_svga_shader_create(struct svga_winsys_screen *sws,
+ SVGA3dShaderType type,
+ const uint32 *bytecode,
+ uint32 bytecodeLen,
+ const SVGA3dDXShaderSignatureHeader *sgnInfo,
+ uint32 sgnLen);
+
#endif /* VMW_SHADER_H_ */