summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
diff options
context:
space:
mode:
authorNeha Bhende <bhenden@vmware.com>2020-05-26 21:29:50 +0530
committerMarge Bot <eric+marge@anholt.net>2020-06-05 06:36:54 +0000
commitba37d408da30d87b6848d76242d9d797dbef80a0 (patch)
tree87447cb12f6f5b7ca80c69308ae636639842b4c0 /src/gallium/drivers/svga/svga_tgsi_vgpu10.c
parentccb4ea5a43e89fcc93fff98c881639223f1538e5 (diff)
svga: Performance fixes
This is a squash commit of in house performance fixes and misc bug fixes for GL4.1 support. Performance fixes: * started using system memory for constant buffer to gain 3X performance boost with metro redux Misc bug fixes: * fixed usage of vertexid in shader * added empty control point phase in hull shader for zero ouput control point * misc shader signature fixes * fixed clip_distance input declaration * clearing the dirty bit for the surface while using direct map if surface is already flushed and there is no pending primitive This patch also uses SVGA_RETRY macro for commands retries. Part of it is already used in previous patch. Reviewed-by: Brian Paul <brianp@vmware.com> Reviewed-by: Charmaine Lee <charmainel@vmware.com> Signed-off-by: Neha Bhende <bhenden@vmware.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5317>
Diffstat (limited to 'src/gallium/drivers/svga/svga_tgsi_vgpu10.c')
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_vgpu10.c251
1 files changed, 219 insertions, 32 deletions
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index 6e607cd0616..0c07985519c 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -279,6 +279,10 @@ struct svga_shader_emitter_v10
/* viewport constant */
unsigned viewport_index;
+ unsigned vertex_id_bias_index;
+ unsigned vertex_id_sys_index;
+ unsigned vertex_id_tmp_index;
+
/* temp index of adjusted vertex attributes */
unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
} vs;
@@ -333,7 +337,6 @@ struct svga_shader_emitter_v10
struct {
unsigned vertices_per_patch_index; /**< vertices_per_patch system value index */
unsigned imm_index; /**< immediate for tcs */
- unsigned vertices_out;
unsigned invocation_id_sys_index; /**< invocation id */
unsigned invocation_id_tmp_index;
unsigned instruction_token_pos; /* token pos for the first instruction */
@@ -343,6 +346,7 @@ struct svga_shader_emitter_v10
unsigned control_point_tmp_index; /* control point temporary register */
unsigned control_point_out_count; /* control point output count */
boolean control_point_phase; /* true if in control point phase */
+ boolean fork_phase_add_signature; /* true if needs to add signature in fork phase */
unsigned patch_generic_out_count; /* per-patch generic output count */
unsigned patch_generic_out_index; /* per-patch generic output register index*/
unsigned patch_generic_tmp_index; /* per-patch generic temporary register index*/
@@ -408,6 +412,7 @@ struct svga_shader_emitter_v10
/* VS/TCS/TES/GS/FS Linkage info */
struct shader_linkage linkage;
+ struct tgsi_shader_info *prevShaderInfo;
/* Shader signature */
struct svga_shader_signature signature;
@@ -603,7 +608,7 @@ check_register_index(struct svga_shader_emitter_v10 *emit,
(emit->unit == PIPE_SHADER_FRAGMENT &&
index >= VGPU10_MAX_FS_INPUTS) ||
(emit->unit == PIPE_SHADER_TESS_CTRL &&
- index >= VGPU11_MAX_HS_INPUTS) ||
+ index >= VGPU11_MAX_HS_INPUT_CONTROL_POINTS) ||
(emit->unit == PIPE_SHADER_TESS_EVAL &&
index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) {
emit->register_overflow = TRUE;
@@ -1445,7 +1450,7 @@ static boolean
need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit,
unsigned index)
{
- if (!(emit->info.indirect_files & (1u << TGSI_FILE_TEMPORARY))
+ if (!(emit->info.indirect_files && (1u << TGSI_FILE_TEMPORARY))
&& emit->current_loop_depth == 0) {
if (!emit->temp_map[index].initialized &&
emit->temp_map[index].index < emit->num_shader_temps) {
@@ -1575,10 +1580,18 @@ emit_src_register(struct svga_shader_emitter_v10 *emit,
}
}
else if (file == TGSI_FILE_SYSTEM_VALUE) {
- /* Map the TGSI system value to a VGPU10 input register */
- assert(index < ARRAY_SIZE(emit->system_value_indexes));
- file = TGSI_FILE_INPUT;
- index = emit->system_value_indexes[index];
+ if (index == emit->vs.vertex_id_sys_index &&
+ emit->vs.vertex_id_tmp_index != INVALID_INDEX) {
+ file = TGSI_FILE_TEMPORARY;
+ index = emit->vs.vertex_id_tmp_index;
+ swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
+ }
+ else {
+ /* Map the TGSI system value to a VGPU10 input register */
+ assert(index < ARRAY_SIZE(emit->system_value_indexes));
+ file = TGSI_FILE_INPUT;
+ index = emit->system_value_indexes[index];
+ }
}
}
else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
@@ -1600,7 +1613,10 @@ emit_src_register(struct svga_shader_emitter_v10 *emit,
*/
operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID;
- index = 0;
+ operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
+ operand0.mask = 0;
+ emit_dword(emit, operand0.value);
+ return;
}
else {
/* There is no control point ID input declaration in
@@ -1718,6 +1734,8 @@ emit_src_register(struct svga_shader_emitter_v10 *emit,
* to align with the tcs output index.
*/
index = emit->linkage.input_map[index];
+
+ assert(index2 < emit->key.tes.vertices_per_patch);
}
else {
if (index < emit->key.tes.tessfactor_index)
@@ -2824,7 +2842,7 @@ emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
break;
case TGSI_PROPERTY_TCS_VERTICES_OUT:
- emit->tcs.vertices_out = prop->u[0].Data;
+ /* This info is already captured in the shader key */
break;
case TGSI_PROPERTY_TES_PRIM_MODE:
@@ -2935,7 +2953,7 @@ emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit)
assert(emit->unit == PIPE_SHADER_TESS_EVAL);
/* Emit the input control point count */
- assert(emit->key.tes.vertices_per_patch > 0 &&
+ assert(emit->key.tes.vertices_per_patch >= 0 &&
emit->key.tes.vertices_per_patch <= 32);
opcode0.value = 0;
@@ -3066,11 +3084,11 @@ emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit)
end_emit_instruction(emit);
/* Emit the output control point count */
- assert(emit->tcs.vertices_out >= 0 && emit->tcs.vertices_out <= 32);
+ assert(emit->key.tcs.vertices_out >= 0 && emit->key.tcs.vertices_out <= 32);
opcode0.value = 0;
opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT;
- opcode0.controlPointCount = emit->tcs.vertices_out;
+ opcode0.controlPointCount = emit->key.tcs.vertices_out;
begin_emit_instruction(emit);
emit_dword(emit, opcode0.value);
end_emit_instruction(emit);
@@ -3157,7 +3175,8 @@ needs_control_point_phase(struct svga_shader_emitter_v10 *emit)
* we need a control point phase to explicitly set the output control
* points.
*/
- if (emit->key.tcs.vertices_per_patch != emit->tcs.vertices_out)
+ if ((emit->key.tcs.vertices_per_patch != emit->key.tcs.vertices_out) &&
+ emit->key.tcs.vertices_out)
return TRUE;
for (i = 0; i < emit->info.num_outputs; i++) {
@@ -3175,23 +3194,93 @@ needs_control_point_phase(struct svga_shader_emitter_v10 *emit)
/**
- * Start the hull shader control point phase
+ * A helper function to add shader signature for passthrough control point
+ * phase. This signature is also generated for passthrough control point
+ * phase from HLSL compiler and is needed by Metal Renderer.
*/
-static boolean
-emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit)
+static void
+emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 *emit)
{
- VGPU10OpcodeToken0 opcode0;
+ struct svga_shader_signature *sgn = &emit->signature;
+ SVGA3dDXShaderSignatureEntry *sgnEntry;
+ unsigned i;
- /* If there is no control point output, skip the control point phase. */
- if (!needs_control_point_phase(emit))
- return FALSE;
+ for (i = 0; i < emit->info.num_inputs; i++) {
+ unsigned index = emit->linkage.input_map[i];
+ enum tgsi_semantic sem_name = emit->info.input_semantic_name[i];
+
+ sgnEntry = &sgn->inputs[sgn->header.numInputSignatures++];
+
+ set_shader_signature_entry(sgnEntry, index,
+ tgsi_semantic_to_sgn_name[sem_name],
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
+ SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
+
+ sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
+
+ set_shader_signature_entry(sgnEntry, i,
+ tgsi_semantic_to_sgn_name[sem_name],
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
+ SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
+ }
+}
+
+
+/**
+ * A helper function to emit an instruction to start the control point phase
+ * in the hull shader.
+ */
+static void
+emit_control_point_phase_instruction(struct svga_shader_emitter_v10 *emit)
+{
+ VGPU10OpcodeToken0 opcode0;
- /* Start the control point phase in the hull shader */
opcode0.value = 0;
opcode0.opcodeType = VGPU10_OPCODE_HS_CONTROL_POINT_PHASE;
begin_emit_instruction(emit);
emit_dword(emit, opcode0.value);
end_emit_instruction(emit);
+}
+
+
+/**
+ * Start the hull shader control point phase
+ */
+static boolean
+emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit)
+{
+ /* If there is no control point output, skip the control point phase. */
+ if (!needs_control_point_phase(emit)) {
+ if (!emit->key.tcs.vertices_out) {
+ /**
+ * If the tcs does not explicitly generate any control point output
+ * and the tes does not use any input control point, then
+ * emit an empty control point phase with zero output control
+ * point count.
+ */
+ emit_control_point_phase_instruction(emit);
+
+ /**
+ * Since this is an empty control point phase, we will need to
+ * add input signatures when we parse the tcs again in the
+ * patch constant phase.
+ */
+ emit->tcs.fork_phase_add_signature = TRUE;
+ }
+ else {
+ /**
+ * Before skipping the control point phase, add the signature for
+ * the passthrough control point.
+ */
+ emit_passthrough_control_point_signature(emit);
+ }
+ return FALSE;
+ }
+
+ /* Start the control point phase in the hull shader */
+ emit_control_point_phase_instruction(emit);
/* Declare the output control point ID */
if (emit->tcs.invocation_id_sys_index == INVALID_INDEX) {
@@ -3799,9 +3888,6 @@ emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit)
emit->key.fs.write_color0_to_n_cbufs;
}
}
- else {
- assert(!emit->key.fs.write_color0_to_n_cbufs);
- }
}
else if (semantic_name == TGSI_SEMANTIC_POSITION) {
/* Fragment depth output */
@@ -4064,7 +4150,7 @@ emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit,
SVGA3dDXShaderSignatureEntry *sgnEntry =
&sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
set_shader_signature_entry(sgnEntry, index,
- sgnName, SVGA3DWRITEMASK_0,
+ sgnName, VGPU10_OPERAND_4_COMPONENT_MASK_X,
SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
}
@@ -4324,6 +4410,7 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
map_tgsi_semantic_to_sgn_name(semantic_name));
break;
case TGSI_SEMANTIC_VERTEXID:
+ emit->vs.vertex_id_sys_index = index;
index = alloc_system_value_index(emit, index);
emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
VGPU10_OPERAND_TYPE_INPUT,
@@ -4786,6 +4873,10 @@ emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit)
unsigned i;
unsigned size = emit->key.tcs.vertices_per_patch;
unsigned indicesMask = 0;
+ boolean addSignature = TRUE;
+
+ if (!emit->tcs.control_point_phase)
+ addSignature = emit->tcs.fork_phase_add_signature;
for (i = 0; i < emit->info.num_inputs; i++) {
unsigned usage_mask = emit->info.input_usage_mask[i];
@@ -4793,7 +4884,8 @@ emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit)
enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
VGPU10_SYSTEM_NAME name = VGPU10_NAME_UNDEFINED;
VGPU10_OPERAND_TYPE operandType = VGPU10_OPERAND_TYPE_INPUT;
- boolean addSignature = TRUE;
+ SVGA3dDXSignatureSemanticName sgn_name =
+ map_tgsi_semantic_to_sgn_name(semantic_name);
/* indices that are declared */
indicesMask |= 1 << index;
@@ -4806,13 +4898,18 @@ emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit)
else if (usage_mask == 0) {
continue; /* register is not actually used */
}
+ else if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
+ /* The shadow copy is being used here. So set the signature name
+ * to UNDEFINED.
+ */
+ sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
+ }
/* input control points in the patch constant phase are emitted in the
* vicp register rather than the v register.
*/
if (!emit->tcs.control_point_phase) {
operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
- addSignature = emit->tcs.control_point_out_count == 0;
}
/* Tessellation control shader inputs are two dimensional.
@@ -4826,9 +4923,7 @@ emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit)
VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
VGPU10_INTERPOLATION_UNDEFINED,
- addSignature,
- map_tgsi_semantic_to_sgn_name(semantic_name));
-
+ addSignature, sgn_name);
}
if (emit->tcs.control_point_phase) {
@@ -4983,6 +5078,54 @@ emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit)
}
emit_tessfactor_input_declarations(emit);
+
+ /* DX spec requires DS input controlpoint/patch-constant signatures to match
+ * the HS output controlpoint/patch-constant signatures exactly.
+ * Add missing input declarations even if they are not used in the shader.
+ */
+ if (emit->linkage.num_inputs < emit->linkage.prevShader.num_outputs) {
+ struct tgsi_shader_info *prevInfo = emit->prevShaderInfo;
+ for (i = 0; i < emit->linkage.prevShader.num_outputs; i++) {
+
+ /* If a tcs output does not have a corresponding input register in
+ * tes, add one.
+ */
+ if (emit->linkage.prevShader.output_map[i] >
+ emit->linkage.input_map_max) {
+ const enum tgsi_semantic sem_name = prevInfo->output_semantic_name[i];
+
+ if (sem_name == TGSI_SEMANTIC_PATCH) {
+ emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
+ VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
+ VGPU10_OPERAND_INDEX_1D,
+ i, 1, VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_4_COMPONENT,
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ VGPU10_INTERPOLATION_UNDEFINED,
+ TRUE,
+ map_tgsi_semantic_to_sgn_name(sem_name));
+
+ } else if (sem_name != TGSI_SEMANTIC_TESSINNER &&
+ sem_name != TGSI_SEMANTIC_TESSOUTER) {
+ emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
+ VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT,
+ VGPU10_OPERAND_INDEX_2D,
+ i, emit->key.tes.vertices_per_patch,
+ VGPU10_NAME_UNDEFINED,
+ VGPU10_OPERAND_4_COMPONENT,
+ VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+ VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
+ VGPU10_INTERPOLATION_UNDEFINED,
+ TRUE,
+ map_tgsi_semantic_to_sgn_name(sem_name));
+ }
+ /* tessellation factors are taken care of in
+ * emit_tessfactor_input_declarations().
+ */
+ }
+ }
+ }
}
@@ -5088,7 +5231,7 @@ emit_output_declarations(struct svga_shader_emitter_v10 *emit)
VGPU10_NAME_UNDEFINED,
emit->output_usage_mask[emit->clip_dist_out_index],
TRUE,
- SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
if (emit->info.num_written_clipdistance > 4) {
/* for the second clip distance register, each handles 4 planes */
@@ -5097,7 +5240,7 @@ emit_output_declarations(struct svga_shader_emitter_v10 *emit)
VGPU10_NAME_UNDEFINED,
emit->output_usage_mask[emit->clip_dist_out_index+1],
TRUE,
- SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
+ SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
}
}
@@ -5184,6 +5327,11 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
total_temps++;
}
+ if (emit->info.uses_vertexid) {
+ assert(emit->unit == PIPE_SHADER_VERTEX);
+ emit->vs.vertex_id_tmp_index = total_temps++;
+ }
+
if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
emit->key.clip_plane_enable ||
@@ -5440,6 +5588,9 @@ emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
if (emit->key.vs.undo_viewport) {
emit->vs.viewport_index = total_consts++;
}
+ if (emit->key.vs.need_vertex_id_bias) {
+ emit->vs.vertex_id_bias_index = total_consts++;
+ }
}
/* user-defined clip planes */
@@ -9986,6 +10137,33 @@ emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit)
/**
+ * A helper function to emit an instruction in a vertex shader to add a bias
+ * to the VertexID system value. This patches the VertexID in the SVGA vertex
+ * shader to include the base vertex of an indexed primitive or the start index
+ * of a non-indexed primitive.
+ */
+static void
+emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 *emit)
+{
+ struct tgsi_full_src_register vertex_id_bias_index =
+ make_src_const_reg(emit->vs.vertex_id_bias_index);
+ struct tgsi_full_src_register vertex_id_sys_src =
+ make_src_reg(TGSI_FILE_SYSTEM_VALUE, emit->vs.vertex_id_sys_index);
+ struct tgsi_full_src_register vertex_id_sys_src_x =
+ scalar_src(&vertex_id_sys_src, TGSI_SWIZZLE_X);
+ struct tgsi_full_dst_register vertex_id_tmp_dst =
+ make_dst_temp_reg(emit->vs.vertex_id_tmp_index);
+
+ /* IADD vertex_id_tmp, vertex_id_sys, vertex_id_bias */
+ unsigned vertex_id_tmp_index = emit->vs.vertex_id_tmp_index;
+ emit->vs.vertex_id_tmp_index = INVALID_INDEX;
+ emit_instruction_opn(emit, VGPU10_OPCODE_IADD, &vertex_id_tmp_dst,
+ &vertex_id_sys_src_x, &vertex_id_bias_index, NULL, FALSE,
+ FALSE);
+ emit->vs.vertex_id_tmp_index = vertex_id_tmp_index;
+}
+
+/**
* Hull Shader must have control point outputs. But tessellation
* control shader can return without writing to control point output.
* In this case, the control point output is assumed to be passthrough
@@ -10155,6 +10333,7 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
* do a second pass of the instructions for the patch constant phase.
*/
emit->tcs.instruction_token_pos = emit->cur_tgsi_token;
+ emit->tcs.fork_phase_add_signature = FALSE;
if (!emit_hull_shader_control_point_phase(emit)) {
emit->skip_instruction = TRUE;
@@ -10230,6 +10409,9 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
}
else if (emit->unit == PIPE_SHADER_VERTEX) {
emit_vertex_attrib_instructions(emit);
+
+ if (emit->info.uses_vertexid)
+ emit_vertex_id_nobase_instruction(emit);
}
else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
emit_temp_tessfactor_instructions(emit);
@@ -10707,6 +10889,7 @@ compute_input_mapping(struct svga_context *svga,
if (prevShader != NULL) {
svga_link_shaders(&prevShader->info, &emit->info, &emit->linkage);
+ emit->prevShaderInfo = &prevShader->info;
}
else {
/**
@@ -10830,6 +11013,10 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
emit->vposition.so_index = INVALID_INDEX;
emit->vposition.out_index = INVALID_INDEX;
+ emit->vs.vertex_id_sys_index = INVALID_INDEX;
+ emit->vs.vertex_id_tmp_index = INVALID_INDEX;
+ emit->vs.vertex_id_bias_index = INVALID_INDEX;
+
emit->fs.color_tmp_index = INVALID_INDEX;
emit->fs.face_input_index = INVALID_INDEX;
emit->fs.fragcoord_input_index = INVALID_INDEX;