diff options
Diffstat (limited to 'src/gallium/drivers/svga/svga_tgsi_vgpu10.c')
-rw-r--r-- | src/gallium/drivers/svga/svga_tgsi_vgpu10.c | 3252 |
1 files changed, 2598 insertions, 654 deletions
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c index 8989a57d2ba..20e316c08ce 100644 --- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c +++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c @@ -1,5 +1,5 @@ /********************************************************** - * Copyright 1998-2013 VMware, Inc. All rights reserved. + * Copyright 1998-2022 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -32,10 +32,9 @@ * \author Brian Paul */ -#include "pipe/p_compiler.h" +#include "util/compiler.h" #include "pipe/p_shader_tokens.h" #include "pipe/p_defines.h" -#include "tgsi/tgsi_build.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" @@ -60,13 +59,12 @@ #define INVALID_INDEX 99999 -#define MAX_INTERNAL_TEMPS 3 +#define MAX_INTERNAL_TEMPS 4 #define MAX_SYSTEM_VALUES 4 #define MAX_IMMEDIATE_COUNT \ (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4) #define MAX_TEMP_ARRAYS 64 /* Enough? */ - /** * Clipping is complicated. There's four different cases which we * handle during VS/GS shader translation: @@ -181,6 +179,18 @@ map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name) return tgsi_semantic_to_sgn_name[name]; } +enum reemit_mode { + REEMIT_FALSE = 0, + REEMIT_TRUE = 1, + REEMIT_IN_PROGRESS = 2 +}; + +struct svga_raw_buf_tmp { + bool indirect; + unsigned buffer_index:8; + unsigned element_index:8; + unsigned element_rel:8; +}; struct svga_shader_emitter_v10 { @@ -193,20 +203,26 @@ struct svga_shader_emitter_v10 struct svga_compile_key key; struct tgsi_shader_info info; unsigned unit; - unsigned version; /**< Either 40 or 41 at this time */ + unsigned version; /**< Either 40, 41, 50 or 51 at this time */ unsigned cur_tgsi_token; /**< current tgsi token position */ unsigned inst_start_token; - boolean discard_instruction; /**< throw away current instruction? */ - boolean reemit_instruction; /**< reemit current instruction */ - boolean skip_instruction; /**< skip current instruction */ + bool discard_instruction; /**< throw away current instruction? */ + bool reemit_instruction; /**< reemit current instruction */ + bool reemit_tgsi_instruction; /**< reemit current tgsi instruction */ + bool skip_instruction; /**< skip current instruction */ + bool use_sampler_state_mapping; /* use sampler state mapping */ + enum reemit_mode reemit_rawbuf_instruction; union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4]; double (*immediates_dbl)[2]; unsigned num_immediates; /**< Number of immediates emitted */ - unsigned common_immediate_pos[10]; /**< literals for common immediates */ + unsigned common_immediate_pos[20]; /**< literals for common immediates */ unsigned num_common_immediates; - boolean immediates_emitted; + unsigned num_immediates_emitted; + unsigned num_new_immediates; /** pending immediates to be declared */ + unsigned immediates_block_start_token; + unsigned immediates_block_next_token; unsigned num_outputs; /**< include any extra outputs */ /** The first extra output is reserved for @@ -225,7 +241,7 @@ struct svga_shader_emitter_v10 /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */ struct { unsigned arrayId, index; - boolean initialized; + bool initialized; } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */ unsigned initialize_temp_index; @@ -235,17 +251,43 @@ struct svga_shader_emitter_v10 */ unsigned num_shader_consts[SVGA_MAX_CONST_BUFS]; + /* Raw constant buffers */ + unsigned raw_buf_srv_start_index; /* starting srv index for raw buffers */ + unsigned raw_bufs; /* raw buffers bitmask */ + unsigned raw_buf_tmp_index; /* starting temp index for raw buffers */ + unsigned raw_buf_cur_tmp_index; /* current temp index for raw buffers */ + struct svga_raw_buf_tmp raw_buf_tmp[3]; /* temporaries for raw buf source */ + /* Samplers */ unsigned num_samplers; - boolean sampler_view[PIPE_MAX_SAMPLERS]; /**< True if sampler view exists*/ - ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */ - ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */ + bool sampler_view[PIPE_MAX_SAMPLERS]; /**< True if sampler view exists*/ + uint8_t sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */ + uint8_t sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */ + + /* Images */ + unsigned num_images; + unsigned image_mask; + struct tgsi_declaration_image image[PIPE_MAX_SHADER_IMAGES]; + unsigned image_size_index; /* starting index to cbuf for image size */ + + /* Shader buffers */ + unsigned num_shader_bufs; + unsigned raw_shaderbuf_srv_start_index; /* starting srv index for raw shaderbuf */ + uint64_t raw_shaderbufs; /* raw shader buffers bitmask */ + + /* HW atomic buffers */ + unsigned num_atomic_bufs; + unsigned atomic_bufs_mask; + unsigned max_atomic_counter_index; + VGPU10_OPCODE_TYPE cur_atomic_opcode; /* current atomic opcode */ + + bool uav_declared; /* True if uav is declared */ /* Index Range declaration */ struct { unsigned start_index; unsigned count; - boolean required; + bool required; unsigned operandType; unsigned size; unsigned dim; @@ -256,10 +298,10 @@ struct svga_shader_emitter_v10 unsigned address_reg_index[MAX_VGPU10_ADDR_REGS]; /* Output register usage masks */ - ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS]; + uint8_t output_usage_mask[PIPE_MAX_SHADER_OUTPUTS]; /* To map TGSI system value index to VGPU shader input indexes */ - ubyte system_value_indexes[MAX_SYSTEM_VALUES]; + uint8_t system_value_indexes[MAX_SYSTEM_VALUES]; struct { /* vertex position scale/translation */ @@ -274,6 +316,11 @@ struct svga_shader_emitter_v10 unsigned have_prescale:1; } vposition; + /* Shader limits */ + unsigned max_vs_inputs; + unsigned max_vs_outputs; + unsigned max_gs_inputs; + /* For vertex shaders only */ struct { /* viewport constant */ @@ -299,6 +346,7 @@ struct svga_shader_emitter_v10 unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */ unsigned pstipple_sampler_unit; + unsigned pstipple_sampler_state_index; unsigned fragcoord_input_index; /**< real fragment position input reg */ unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */ @@ -311,12 +359,11 @@ struct svga_shader_emitter_v10 /** TGSI index of sample mask input sys value */ unsigned sample_mask_in_sys_index; - /** Which texture units are doing shadow comparison in the FS code */ - unsigned shadow_compare_units; - /* layer */ unsigned layer_input_index; /**< TGSI index of layer */ unsigned layer_imm_index; /**< immediate for default layer 0 */ + + bool forceEarlyDepthStencil; /**< true if Early Depth stencil test is enabled */ } fs; /* For geometry shaders only */ @@ -345,8 +392,8 @@ struct svga_shader_emitter_v10 unsigned control_point_out_index; /* control point output register index */ unsigned control_point_tmp_index; /* control point temporary register */ unsigned control_point_out_count; /* control point output count */ - boolean control_point_phase; /* true if in control point phase */ - boolean fork_phase_add_signature; /* true if needs to add signature in fork phase */ + bool control_point_phase; /* true if in control point phase */ + bool fork_phase_add_signature; /* true if needs to add signature in fork phase */ unsigned patch_generic_out_count; /* per-patch generic output count */ unsigned patch_generic_out_index; /* per-patch generic output register index*/ unsigned patch_generic_tmp_index; /* per-patch generic temporary register index*/ @@ -365,11 +412,12 @@ struct svga_shader_emitter_v10 /* For tessellation evaluation shaders only */ struct { - enum pipe_prim_type prim_mode; + enum mesa_prim prim_mode; enum pipe_tess_spacing spacing; - boolean vertices_order_cw; - boolean point_mode; + bool vertices_order_cw; + bool point_mode; unsigned tesscoord_sys_index; + unsigned swizzle_max; unsigned prim_id_index; /* primitive id */ struct { unsigned in_index; /* real tessinner input register */ @@ -383,6 +431,19 @@ struct svga_shader_emitter_v10 } outer; } tes; + struct { + unsigned block_width; /* thread group size in x dimension */ + unsigned block_height; /* thread group size in y dimension */ + unsigned block_depth; /* thread group size in z dimension */ + unsigned thread_id_index; /* thread id tgsi index */ + unsigned block_id_index; /* block id tgsi index */ + bool shared_memory_declared; /* set if shared memory is declared */ + struct { + unsigned tgsi_index; /* grid size tgsi index */ + unsigned imm_index; /* grid size imm index */ + } grid_size; + } cs; + /* For vertex or geometry shaders */ enum clipping_mode clip_mode; unsigned clip_dist_out_index; /**< clip distance output register index */ @@ -397,12 +458,12 @@ struct svga_shader_emitter_v10 unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES]; unsigned num_output_writes; - boolean constant_color_output; + bool constant_color_output; - boolean uses_flat_interp; + bool uses_flat_interp; unsigned reserved_token; /* index to the reserved token */ - boolean uses_precise_qualifier; + bool uses_precise_qualifier; /* For all shaders: const reg index for RECT coord scaling */ unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS]; @@ -410,6 +471,9 @@ struct svga_shader_emitter_v10 /* For all shaders: const reg index for texture buffer size */ unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS]; + /** Which texture units are doing shadow comparison in the shader code */ + unsigned shadow_compare_units; + /* VS/TCS/TES/GS/FS Linkage info */ struct shader_linkage linkage; struct tgsi_shader_info *prevShaderInfo; @@ -419,8 +483,8 @@ struct svga_shader_emitter_v10 bool register_overflow; /**< Set if we exceed a VGPU10 register limit */ - /* For pipe_debug_message */ - struct pipe_debug_callback svga_debug_callback; + /* For util_debug_message */ + struct util_debug_callback svga_debug_callback; /* current loop depth in shader */ unsigned current_loop_depth; @@ -429,36 +493,48 @@ struct svga_shader_emitter_v10 static void emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit); static void emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit); -static boolean emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit); -static boolean emit_constant_declaration(struct svga_shader_emitter_v10 *emit); -static boolean emit_sampler_declarations(struct svga_shader_emitter_v10 *emit); -static boolean emit_resource_declarations(struct svga_shader_emitter_v10 *emit); -static boolean emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit); -static boolean emit_index_range_declaration(struct svga_shader_emitter_v10 *emit); +static bool emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit); +static bool emit_constant_declaration(struct svga_shader_emitter_v10 *emit); +static bool emit_sampler_declarations(struct svga_shader_emitter_v10 *emit); +static bool emit_resource_declarations(struct svga_shader_emitter_v10 *emit); +static bool emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit); +static bool emit_index_range_declaration(struct svga_shader_emitter_v10 *emit); +static void emit_image_declarations(struct svga_shader_emitter_v10 *emit); +static void emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit); +static void emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit); static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit); -static boolean +static bool emit_post_helpers(struct svga_shader_emitter_v10 *emit); -static boolean +static bool emit_vertex(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst); -static boolean +static bool emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, unsigned inst_number, const struct tgsi_full_instruction *inst); static void emit_input_declaration(struct svga_shader_emitter_v10 *emit, - unsigned opcodeType, unsigned operandType, - unsigned dim, unsigned index, unsigned size, - unsigned name, unsigned numComp, - unsigned selMode, unsigned usageMask, - unsigned interpMode, - boolean addSignature, + VGPU10_OPCODE_TYPE opcodeType, + VGPU10_OPERAND_TYPE operandType, + VGPU10_OPERAND_INDEX_DIMENSION dim, + unsigned index, unsigned size, + VGPU10_SYSTEM_NAME name, + VGPU10_OPERAND_NUM_COMPONENTS numComp, + VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode, + unsigned usageMask, + VGPU10_INTERPOLATION_MODE interpMode, + bool addSignature, SVGA3dDXSignatureSemanticName sgnName); +static bool +emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit, + unsigned inst_number, + const struct tgsi_full_instruction *inst); + static void create_temp_array(struct svga_shader_emitter_v10 *emit, unsigned arrayID, unsigned first, unsigned count, @@ -466,7 +542,7 @@ create_temp_array(struct svga_shader_emitter_v10 *emit, static char err_buf[128]; -static boolean +static bool expand(struct svga_shader_emitter_v10 *emit) { char *new_buf; @@ -481,13 +557,13 @@ expand(struct svga_shader_emitter_v10 *emit) emit->ptr = err_buf; emit->buf = err_buf; emit->size = sizeof(err_buf); - return FALSE; + return false; } emit->size = newsize; emit->ptr = new_buf + (emit->ptr - emit->buf); emit->buf = new_buf; - return TRUE; + return true; } /** @@ -521,40 +597,40 @@ free_emitter(struct svga_shader_emitter_v10 *emit) FREE(emit); } -static inline boolean +static inline bool reserve(struct svga_shader_emitter_v10 *emit, unsigned nr_dwords) { while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) { if (!expand(emit)) - return FALSE; + return false; } - return TRUE; + return true; } -static boolean +static bool emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword) { if (!reserve(emit, 1)) - return FALSE; + return false; *(uint32 *)emit->ptr = dword; emit->ptr += sizeof dword; - return TRUE; + return true; } -static boolean +static bool emit_dwords(struct svga_shader_emitter_v10 *emit, const uint32 *dwords, unsigned nr) { if (!reserve(emit, nr)) - return FALSE; + return false; memcpy(emit->ptr, dwords, nr * sizeof *dwords); emit->ptr += nr * sizeof *dwords; - return TRUE; + return true; } /** Return the number of tokens in the emitter's buffer */ @@ -584,13 +660,13 @@ check_register_index(struct svga_shader_emitter_v10 *emit, case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP: case VGPU10_OPCODE_DCL_TEMPS: if (index >= VGPU10_MAX_TEMPS) { - emit->register_overflow = TRUE; + emit->register_overflow = true; } break; case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER: case VGPU10_OPCODE_DCL_CONSTANT_BUFFER: if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { - emit->register_overflow = TRUE; + emit->register_overflow = true; } break; case VGPU10_OPERAND_TYPE_INPUT: @@ -602,16 +678,16 @@ check_register_index(struct svga_shader_emitter_v10 *emit, case VGPU10_OPCODE_DCL_INPUT_PS_SGV: case VGPU10_OPCODE_DCL_INPUT_PS_SIV: if ((emit->unit == PIPE_SHADER_VERTEX && - index >= VGPU10_MAX_VS_INPUTS) || + index >= emit->max_vs_inputs) || (emit->unit == PIPE_SHADER_GEOMETRY && - index >= VGPU10_MAX_GS_INPUTS) || + index >= emit->max_gs_inputs) || (emit->unit == PIPE_SHADER_FRAGMENT && index >= VGPU10_MAX_FS_INPUTS) || (emit->unit == PIPE_SHADER_TESS_CTRL && index >= VGPU11_MAX_HS_INPUT_CONTROL_POINTS) || (emit->unit == PIPE_SHADER_TESS_EVAL && index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) { - emit->register_overflow = TRUE; + emit->register_overflow = true; } break; case VGPU10_OPERAND_TYPE_OUTPUT: @@ -625,7 +701,7 @@ check_register_index(struct svga_shader_emitter_v10 *emit, * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2 */ if ((emit->unit == PIPE_SHADER_VERTEX && - index >= VGPU10_MAX_VS_OUTPUTS) || + index >= emit->max_vs_outputs) || (emit->unit == PIPE_SHADER_GEOMETRY && index >= VGPU10_MAX_GS_OUTPUTS) || (emit->unit == PIPE_SHADER_FRAGMENT && @@ -634,27 +710,34 @@ check_register_index(struct svga_shader_emitter_v10 *emit, index >= VGPU11_MAX_HS_OUTPUTS + 2) || (emit->unit == PIPE_SHADER_TESS_EVAL && index >= VGPU11_MAX_DS_OUTPUTS)) { - emit->register_overflow = TRUE; + emit->register_overflow = true; } break; case VGPU10_OPERAND_TYPE_SAMPLER: case VGPU10_OPCODE_DCL_SAMPLER: if (index >= VGPU10_MAX_SAMPLERS) { - emit->register_overflow = TRUE; + emit->register_overflow = true; } break; case VGPU10_OPERAND_TYPE_RESOURCE: case VGPU10_OPCODE_DCL_RESOURCE: if (index >= VGPU10_MAX_RESOURCES) { - emit->register_overflow = TRUE; + emit->register_overflow = true; } break; case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: if (index >= MAX_IMMEDIATE_COUNT) { - emit->register_overflow = TRUE; + emit->register_overflow = true; } break; case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + case VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID: + case VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: + case VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT: + case VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT: + case VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT: + case VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID: + case VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: /* nothing */ break; default: @@ -960,6 +1043,10 @@ translate_opcode(enum tgsi_opcode opcode) return VGPU10_OPCODE_EVAL_SAMPLE_INDEX; case TGSI_OPCODE_BARRIER: return VGPU10_OPCODE_SYNC; + case TGSI_OPCODE_DFMA: + return VGPU10_OPCODE_DFMA; + case TGSI_OPCODE_FMA: + return VGPU10_OPCODE_MAD; /* DX11.1 Opcodes */ case TGSI_OPCODE_DDIV: @@ -1000,7 +1087,7 @@ translate_opcode(enum tgsi_opcode opcode) * \param array is the TGSI_FILE_TEMPORARY register an array? */ static VGPU10_OPERAND_TYPE -translate_register_file(enum tgsi_file_type file, boolean array) +translate_register_file(enum tgsi_file_type file, bool array) { switch (file) { case TGSI_FILE_CONSTANT: @@ -1093,8 +1180,8 @@ static VGPU10OperandToken0 setup_operand0_indexing(struct svga_shader_emitter_v10 *emit, VGPU10OperandToken0 operand0, enum tgsi_file_type file, - boolean indirect, - boolean index2D, bool indirect2D) + bool indirect, + bool index2D, bool indirect2D) { VGPU10_OPERAND_INDEX_REPRESENTATION index0Rep, index1Rep; VGPU10_OPERAND_INDEX_DIMENSION indexDim; @@ -1195,13 +1282,13 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit, const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index]; const unsigned sem_index = emit->info.output_semantic_index[index]; unsigned writemask = reg->Register.WriteMask; - const boolean indirect = reg->Register.Indirect; + const bool indirect = reg->Register.Indirect; unsigned tempArrayId = get_temp_array_id(emit, file, index); - boolean index2d = reg->Register.Dimension || tempArrayId > 0; + bool index2d = reg->Register.Dimension || tempArrayId > 0; VGPU10OperandToken0 operand0; if (file == TGSI_FILE_TEMPORARY) { - emit->temp_map[index].initialized = TRUE; + emit->temp_map[index].initialized = true; } if (file == TGSI_FILE_OUTPUT) { @@ -1243,7 +1330,7 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit, */ VGPU10OpcodeToken0 *token = (VGPU10OpcodeToken0 *)emit->buf + emit->inst_start_token; - token->saturate = TRUE; + token->saturate = true; } else if (sem_name == TGSI_SEMANTIC_VIEWPORT_INDEX && emit->gs.viewport_index_out_index != INVALID_INDEX) { @@ -1300,7 +1387,7 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit, */ if (emit->tcs.control_point_phase) { /* Discard writing into tessfactor in control point phase */ - emit->discard_instruction = TRUE; + emit->discard_instruction = true; } else { file = TGSI_FILE_TEMPORARY; @@ -1314,7 +1401,7 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit, */ if (emit->tcs.control_point_phase) { /* Discard writing into tessfactor in control point phase */ - emit->discard_instruction = TRUE; + emit->discard_instruction = true; } else { file = TGSI_FILE_TEMPORARY; @@ -1327,7 +1414,7 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit, if (emit->tcs.control_point_phase) { /* Discard writing into generic patch constant outputs in control point phase */ - emit->discard_instruction = TRUE; + emit->discard_instruction = true; } else { if (emit->reemit_instruction) { @@ -1342,14 +1429,14 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit, tempArrayId = get_temp_array_id(emit, file, index); index2d = tempArrayId > 0; - emit->reemit_instruction = FALSE; + emit->reemit_instruction = false; } else { /* If per-patch outputs is been read in shader, we * reemit instruction and store results in temporaries in * patch constant phase. */ if (emit->info.reads_perpatch_outputs) { - emit->reemit_instruction = TRUE; + emit->reemit_instruction = true; } } } @@ -1359,20 +1446,20 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit, if (emit->tcs.control_point_phase) { if (emit->reemit_instruction) { /* Store results of reemitted instruction in temporary register. */ - index2d = FALSE; + index2d = false; file = TGSI_FILE_TEMPORARY; index = emit->tcs.control_point_tmp_index + (index - emit->tcs.control_point_out_index); - emit->reemit_instruction = FALSE; + emit->reemit_instruction = false; } else { /* The mapped control point outputs are 1-D */ - index2d = FALSE; + index2d = false; if (emit->info.reads_pervertex_outputs) { /* If per-vertex outputs is been read in shader, we * reemit instruction and store results in temporaries * control point phase. */ - emit->reemit_instruction = TRUE; + emit->reemit_instruction = true; } } @@ -1398,7 +1485,7 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit, else { /* Discard writing into control point outputs in patch constant phase */ - emit->discard_instruction = TRUE; + emit->discard_instruction = true; } } } @@ -1424,7 +1511,7 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit, check_register_index(emit, operand0.operandType, index); operand0 = setup_operand0_indexing(emit, operand0, file, indirect, - index2d, FALSE); + index2d, false); /* Emit tokens */ emit_dword(emit, operand0.value); @@ -1446,19 +1533,19 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit, * temporary is not used in loop. In these two scenarios, we cannot * determine if temporary is initialized or not. */ -static boolean +static bool need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit, unsigned index) { - if (!(emit->info.indirect_files && (1u << TGSI_FILE_TEMPORARY)) + if (!(emit->info.indirect_files & (1u << TGSI_FILE_TEMPORARY)) && emit->current_loop_depth == 0) { if (!emit->temp_map[index].initialized && emit->temp_map[index].index < emit->num_shader_temps) { - return TRUE; + return true; } } - return FALSE; + return false; } @@ -1474,19 +1561,19 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, { enum tgsi_file_type file = reg->Register.File; unsigned index = reg->Register.Index; - const boolean indirect = reg->Register.Indirect; + bool indirect = reg->Register.Indirect; unsigned tempArrayId = get_temp_array_id(emit, file, index); - boolean index2d = (reg->Register.Dimension || + bool index2d = (reg->Register.Dimension || tempArrayId > 0 || file == TGSI_FILE_CONSTANT); unsigned index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index; - boolean indirect2d = reg->Dimension.Indirect; + bool indirect2d = reg->Dimension.Indirect; unsigned swizzleX = reg->Register.SwizzleX; unsigned swizzleY = reg->Register.SwizzleY; unsigned swizzleZ = reg->Register.SwizzleZ; unsigned swizzleW = reg->Register.SwizzleW; - const boolean absolute = reg->Register.Absolute; - const boolean negate = reg->Register.Negate; + const bool absolute = reg->Register.Absolute; + const bool negate = reg->Register.Negate; VGPU10OperandToken0 operand0; VGPU10OperandToken1 operand1; @@ -1528,11 +1615,8 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, /* Emitted as vCoverage0.x */ /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32) * elements where s is the maximum number of color samples supported - * by the implementation. With current implementation, we should not - * have more than one element. So assert if Index != 0 + * by the implementation. */ - assert((!reg->Register.Indirect && reg->Register.Index == 0) || - reg->Register.Indirect); operand0.value = 0; operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK; operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; @@ -1655,7 +1739,7 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, index == emit->tcs.inner.tgsi_index || index == emit->tcs.outer.tgsi_index) { if (emit->tcs.control_point_phase) { - emit->discard_instruction = TRUE; + emit->discard_instruction = true; } else { /* Device doesn't allow reading from output so @@ -1686,12 +1770,12 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, /* Device doesn't allow reading from output so * use corresponding temporary register as source */ file = TGSI_FILE_TEMPORARY; - index2d = FALSE; + index2d = false; index = emit->tcs.control_point_tmp_index + (index - emit->tcs.control_point_out_index); } else { - emit->discard_instruction = TRUE; + emit->discard_instruction = true; } } } @@ -1705,6 +1789,14 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT; index = 0; + + /* Make sure swizzles are of those components allowed according + * to the tessellator domain. + */ + swizzleX = MIN2(swizzleX, emit->tes.swizzle_max); + swizzleY = MIN2(swizzleY, emit->tes.swizzle_max); + swizzleZ = MIN2(swizzleZ, emit->tes.swizzle_max); + swizzleW = MIN2(swizzleW, emit->tes.swizzle_max); } else if (index == emit->tes.inner.tgsi_index) { file = TGSI_FILE_TEMPORARY; @@ -1749,16 +1841,97 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, } } } + else if (emit->unit == PIPE_SHADER_COMPUTE) { + if (file == TGSI_FILE_SYSTEM_VALUE) { + if (index == emit->cs.thread_id_index) { + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP; + index = 0; + } else if (index == emit->cs.block_id_index) { + operand0.value = 0; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID; + operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; + operand0.swizzleX = swizzleX; + operand0.swizzleY = swizzleY; + operand0.swizzleZ = swizzleZ; + operand0.swizzleW = swizzleW; + emit_dword(emit, operand0.value); + return; + } else if (index == emit->cs.grid_size.tgsi_index) { + file = TGSI_FILE_IMMEDIATE; + index = emit->cs.grid_size.imm_index; + } + } + } if (file == TGSI_FILE_ADDRESS) { index = emit->address_reg_index[index]; file = TGSI_FILE_TEMPORARY; } + if (file == TGSI_FILE_CONSTANT) { + /** + * If this constant buffer is to be bound as srv raw buffer, + * then we have to load the constant to a temp first before + * it can be used as a source in the instruction. + * This is accomplished in two passes. The first pass is to + * identify if there is any constbuf to rawbuf translation. + * If there isn't, emit the instruction as usual. + * If there is, then we save the constant buffer reference info, + * and then instead of emitting the instruction at the end + * of the instruction, it will trigger a second pass of parsing + * this instruction. Before it starts the parsing, it will + * load the referenced raw buffer elements to temporaries. + * Then it will emit the instruction that replaces the + * constant buffer replaces with the corresponding temporaries. + */ + if (emit->raw_bufs & (1 << index2)) { + if (emit->reemit_rawbuf_instruction != REEMIT_IN_PROGRESS) { + unsigned tmpIdx = emit->raw_buf_cur_tmp_index; + + emit->raw_buf_tmp[tmpIdx].buffer_index = index2; + + /* Save whether the element index is indirect indexing */ + emit->raw_buf_tmp[tmpIdx].indirect = indirect; + + /* If it is indirect index, save the temporary + * address index, otherwise, save the immediate index. + */ + if (indirect) { + emit->raw_buf_tmp[tmpIdx].element_index = + emit->address_reg_index[reg->Indirect.Index]; + emit->raw_buf_tmp[tmpIdx].element_rel = + reg->Register.Index; + } + else { + emit->raw_buf_tmp[tmpIdx].element_index = index; + emit->raw_buf_tmp[tmpIdx].element_rel = 0; + } + + emit->raw_buf_cur_tmp_index++; + emit->reemit_rawbuf_instruction = REEMIT_TRUE; + emit->discard_instruction = true; + emit->reemit_tgsi_instruction = true; + } + else { + /* In the reemitting process, replace the constant buffer + * reference with temporary. + */ + file = TGSI_FILE_TEMPORARY; + index = emit->raw_buf_cur_tmp_index + emit->raw_buf_tmp_index; + index2d = false; + indirect = false; + emit->raw_buf_cur_tmp_index++; + } + } + } + if (file == TGSI_FILE_TEMPORARY) { if (need_temp_reg_initialization(emit, index)) { emit->initialize_temp_index = index; - emit->discard_instruction = TRUE; + emit->discard_instruction = true; } } @@ -1801,6 +1974,8 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, } } + check_register_index(emit, operand0.operandType, index); + /* Emit the operand tokens */ emit_dword(emit, operand0.value); if (operand0.extended) @@ -1834,6 +2009,7 @@ emit_src_register(struct svga_shader_emitter_v10 *emit, emit_dword(emit, remap_temp_index(emit, file, index)); if (indirect) { + assert(operand0.operandType != VGPU10_OPERAND_TYPE_TEMP); emit_indirect_register(emit, reg->Indirect.Index); } } @@ -1873,9 +2049,15 @@ emit_resource_register(struct svga_shader_emitter_v10 *emit, */ static void emit_sampler_register(struct svga_shader_emitter_v10 *emit, - unsigned sampler_number) + unsigned unit) { VGPU10OperandToken0 operand0; + unsigned sampler_number; + + sampler_number = emit->key.tex[unit].sampler_index; + + if ((emit->shadow_compare_units & (1 << unit)) && emit->use_sampler_state_mapping) + sampler_number++; check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number); @@ -1971,7 +2153,7 @@ emit_stream_register(struct svga_shader_emitter_v10 *emit, unsigned index) */ static void emit_opcode_precise(struct svga_shader_emitter_v10 *emit, - unsigned vgpu10_opcode, boolean saturate, boolean precise) + unsigned vgpu10_opcode, bool saturate, bool precise) { VGPU10OpcodeToken0 token0; @@ -1997,9 +2179,9 @@ emit_opcode_precise(struct svga_shader_emitter_v10 *emit, */ static void emit_opcode(struct svga_shader_emitter_v10 *emit, - unsigned vgpu10_opcode, boolean saturate) + unsigned vgpu10_opcode, bool saturate) { - emit_opcode_precise(emit, vgpu10_opcode, saturate, FALSE); + emit_opcode_precise(emit, vgpu10_opcode, saturate, false); } @@ -2031,7 +2213,7 @@ emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit, */ static void emit_sample_opcode(struct svga_shader_emitter_v10 *emit, - unsigned vgpu10_opcode, boolean saturate, + unsigned vgpu10_opcode, bool saturate, const int offsets[3]) { VGPU10OpcodeToken0 token0; @@ -2071,7 +2253,7 @@ emit_sample_opcode(struct svga_shader_emitter_v10 *emit, * Otherwise, we'll discard the fragment if the X component is 0. */ static void -emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero) +emit_discard_opcode(struct svga_shader_emitter_v10 *emit, bool nonzero) { VGPU10OpcodeToken0 opcode0; @@ -2129,7 +2311,7 @@ end_emit_instruction(struct svga_shader_emitter_v10 *emit) } emit->inst_start_token = 0; /* reset to zero for error checking */ - emit->discard_instruction = FALSE; + emit->discard_instruction = false; } @@ -2356,7 +2538,7 @@ writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask) /** * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW. */ -static boolean +static bool same_swizzle_terms(const struct tgsi_full_src_register *reg) { return (reg->Register.SwizzleX == reg->Register.SwizzleY && @@ -2391,7 +2573,7 @@ find_immediate(struct svga_shader_emitter_v10 *emit, const unsigned endIndex = emit->num_immediates; unsigned i; - assert(emit->immediates_emitted); + assert(emit->num_immediates_emitted > 0); /* Search immediates for x, y, z, w */ for (i = startIndex; i < endIndex; i++) { @@ -2402,8 +2584,7 @@ find_immediate(struct svga_shader_emitter_v10 *emit, return i; } } - /* Should never try to use an immediate value that wasn't pre-declared */ - assert(!"find_immediate() failed!"); + /* immediate not declared yet */ return -1; } @@ -2418,7 +2599,7 @@ find_immediate_dbl(struct svga_shader_emitter_v10 *emit, const unsigned endIndex = emit->num_immediates; unsigned i; - assert(emit->immediates_emitted); + assert(emit->num_immediates_emitted > 0); /* Search immediates for x, y, z, w */ for (i = 0; i < endIndex; i++) { @@ -2600,7 +2781,6 @@ alloc_immediate_4(struct svga_shader_emitter_v10 *emit, const union tgsi_immediate_data imm[4]) { unsigned n = emit->num_immediates++; - assert(!emit->immediates_emitted); assert(n < ARRAY_SIZE(emit->immediates)); emit->immediates[n][0] = imm[0]; emit->immediates[n][1] = imm[1]; @@ -2644,12 +2824,34 @@ alloc_immediate_int4(struct svga_shader_emitter_v10 *emit, } +/** + * Add a new immediate after the immediate block has been declared. + * Any new immediates will be appended to the immediate block after the + * shader has been parsed. + * \return the index/position of the immediate. + */ +static unsigned +add_immediate_int(struct svga_shader_emitter_v10 *emit, int x) +{ + union tgsi_immediate_data imm[4]; + imm[0].Int = x; + imm[1].Int = x+1; + imm[2].Int = x+2; + imm[3].Int = x+3; + + unsigned immpos = alloc_immediate_4(emit, imm); + emit->num_new_immediates++; + + return immpos; +} + + static unsigned alloc_immediate_double2(struct svga_shader_emitter_v10 *emit, double x, double y) { unsigned n = emit->num_immediates++; - assert(!emit->immediates_emitted); + assert(!emit->num_immediates_emitted); assert(n < ARRAY_SIZE(emit->immediates)); emit->immediates_dbl[n][0] = x; emit->immediates_dbl[n][1] = y; @@ -2674,7 +2876,7 @@ alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index) /** * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10. */ -static boolean +static bool emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_immediate *imm) { @@ -2682,7 +2884,7 @@ emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit, * immediate values and emit them later. */ alloc_immediate_4(emit, imm->u); - return TRUE; + return true; } @@ -2691,28 +2893,76 @@ emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit, * containing all the immediate values previously allocated * with alloc_immediate_4(). */ -static boolean +static bool emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit) { VGPU10OpcodeToken0 token; - assert(!emit->immediates_emitted); + assert(!emit->num_immediates_emitted); token.value = 0; token.opcodeType = VGPU10_OPCODE_CUSTOMDATA; token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER; + emit->immediates_block_start_token = + (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0); + /* Note: no begin/end_emit_instruction() calls */ emit_dword(emit, token.value); emit_dword(emit, 2 + 4 * emit->num_immediates); emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates); - emit->immediates_emitted = TRUE; + emit->num_immediates_emitted = emit->num_immediates; + + emit->immediates_block_next_token = + (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0); + + return true; +} + + +/** + * Reemit the immediate constant buffer block to include the new + * immediates that are allocated after the block is declared. Those + * immediates are used as constant indices to constant buffers. + */ +static bool +reemit_immediates_block(struct svga_shader_emitter_v10 *emit) +{ + unsigned num_tokens = emit_get_num_tokens(emit); + unsigned num_new_immediates = emit->num_new_immediates; + + /* Reserve room for the new immediates */ + if (!reserve(emit, 4 * num_new_immediates)) + return false; + + /* Move the tokens after the immediates block to make room for the + * new immediates. + */ + VGPU10ProgramToken *tokens = (VGPU10ProgramToken *)emit->buf; + char *next = (char *) (tokens + emit->immediates_block_next_token); + char *new_next = (char *) (tokens + emit->immediates_block_next_token + + num_new_immediates * 4); + + char *end = emit->ptr; + unsigned len = end - next; + memmove(new_next, next, len); - return TRUE; + /* Append the new immediates to the end of the immediates block */ + char *start = (char *) (tokens + emit->immediates_block_start_token+1); + unsigned immediates_block_size = *(uint32 *)start; + + char *new_immediates = (char *)&emit->immediates[emit->num_immediates_emitted][0]; + *(uint32 *)start = immediates_block_size + 4 * num_new_immediates; + memcpy(next, new_immediates, 4 * num_new_immediates * sizeof(uint32)); + + emit->ptr = (char *) (tokens + num_tokens + 4 * num_new_immediates); + + return true; } + /** * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10 * interpolation mode. @@ -2764,42 +3014,42 @@ translate_interpolation(const struct svga_shader_emitter_v10 *emit, * information. The output primitive topology might be changed later. The * final property instructions will be emitted as part of the pre-helper code. */ -static boolean +static bool emit_vgpu10_property(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_property *prop) { static const VGPU10_PRIMITIVE primType[] = { - VGPU10_PRIMITIVE_POINT, /* PIPE_PRIM_POINTS */ - VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINES */ - VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_LOOP */ - VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_STRIP */ - VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLES */ - VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_STRIP */ - VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_FAN */ - VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUADS */ - VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ - VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_POLYGON */ - VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ - VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ - VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ - VGPU10_PRIMITIVE_TRIANGLE_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ + VGPU10_PRIMITIVE_POINT, /* MESA_PRIM_POINTS */ + VGPU10_PRIMITIVE_LINE, /* MESA_PRIM_LINES */ + VGPU10_PRIMITIVE_LINE, /* MESA_PRIM_LINE_LOOP */ + VGPU10_PRIMITIVE_LINE, /* MESA_PRIM_LINE_STRIP */ + VGPU10_PRIMITIVE_TRIANGLE, /* MESA_PRIM_TRIANGLES */ + VGPU10_PRIMITIVE_TRIANGLE, /* MESA_PRIM_TRIANGLE_STRIP */ + VGPU10_PRIMITIVE_TRIANGLE, /* MESA_PRIM_TRIANGLE_FAN */ + VGPU10_PRIMITIVE_UNDEFINED, /* MESA_PRIM_QUADS */ + VGPU10_PRIMITIVE_UNDEFINED, /* MESA_PRIM_QUAD_STRIP */ + VGPU10_PRIMITIVE_UNDEFINED, /* MESA_PRIM_POLYGON */ + VGPU10_PRIMITIVE_LINE_ADJ, /* MESA_PRIM_LINES_ADJACENCY */ + VGPU10_PRIMITIVE_LINE_ADJ, /* MESA_PRIM_LINE_STRIP_ADJACENCY */ + VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* MESA_PRIM_TRIANGLES_ADJACENCY */ + VGPU10_PRIMITIVE_TRIANGLE_ADJ /* MESA_PRIM_TRIANGLE_STRIP_ADJACENCY */ }; static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = { - VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* PIPE_PRIM_POINTS */ - VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINES */ - VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINE_LOOP */ - VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* PIPE_PRIM_LINE_STRIP */ - VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* PIPE_PRIM_TRIANGLES */ - VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */ - VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */ - VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUADS */ - VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ - VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_POLYGON */ - VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ - VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ - VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ - VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ + VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* MESA_PRIM_POINTS */ + VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* MESA_PRIM_LINES */ + VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* MESA_PRIM_LINE_LOOP */ + VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* MESA_PRIM_LINE_STRIP */ + VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* MESA_PRIM_TRIANGLES */ + VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* MESA_PRIM_TRIANGLE_STRIP */ + VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* MESA_PRIM_TRIANGLE_FAN */ + VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* MESA_PRIM_QUADS */ + VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* MESA_PRIM_QUAD_STRIP */ + VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* MESA_PRIM_POLYGON */ + VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* MESA_PRIM_LINES_ADJACENCY */ + VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* MESA_PRIM_LINE_STRIP_ADJACENCY */ + VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* MESA_PRIM_TRIANGLES_ADJACENCY */ + VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* MESA_PRIM_TRIANGLE_STRIP_ADJACENCY */ }; static const unsigned inputArraySize[] = { @@ -2861,12 +3111,28 @@ emit_vgpu10_property(struct svga_shader_emitter_v10 *emit, emit->tes.point_mode = prop->u[0].Data; break; + case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH: + emit->cs.block_width = prop->u[0].Data; + break; + + case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT: + emit->cs.block_height = prop->u[0].Data; + break; + + case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH: + emit->cs.block_depth = prop->u[0].Data; + break; + + case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL: + emit->fs.forceEarlyDepthStencil = true; + break; + default: debug_printf("Unexpected TGSI property %s\n", tgsi_property_names[prop->Property.PropertyName]); } - return TRUE; + return true; } @@ -2918,18 +3184,18 @@ emit_property_instructions(struct svga_shader_emitter_v10 *emit) */ static void emit_tessellator_domain(struct svga_shader_emitter_v10 *emit, - enum pipe_prim_type prim_mode) + enum mesa_prim prim_mode) { VGPU10OpcodeToken0 opcode0; opcode0.value = 0; opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_DOMAIN; switch (prim_mode) { - case PIPE_PRIM_QUADS: - case PIPE_PRIM_LINES: + case MESA_PRIM_QUADS: + case MESA_PRIM_LINES: opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_QUAD; break; - case PIPE_PRIM_TRIANGLES: + case MESA_PRIM_TRIANGLES: opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_TRI; break; default: @@ -2964,6 +3230,12 @@ emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit) end_emit_instruction(emit); emit_tessellator_domain(emit, emit->tes.prim_mode); + + /* Specify a max for swizzles of the domain point according to the + * tessellator domain type. + */ + emit->tes.swizzle_max = emit->tes.prim_mode == MESA_PRIM_TRIANGLES ? + TGSI_SWIZZLE_Z : TGSI_SWIZZLE_Y; } @@ -2986,7 +3258,10 @@ alloc_common_immediates(struct svga_shader_emitter_v10 *emit) } emit->common_immediate_pos[n++] = - alloc_immediate_int4(emit, 0, 1, 0, -1); + alloc_immediate_int4(emit, 0, 1, 2, -1); + + emit->common_immediate_pos[n++] = + alloc_immediate_int4(emit, 3, 4, 5, 6); if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 || emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) { @@ -3034,7 +3309,8 @@ alloc_common_immediates(struct svga_shader_emitter_v10 *emit) alloc_immediate_double2(emit, -1.0, -1.0); } - if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0) { + if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0 || + emit->info.opcode_count[TGSI_OPCODE_DTRUNC] > 0) { emit->common_immediate_pos[n++] = alloc_immediate_double2(emit, 0.0, 0.0); emit->common_immediate_pos[n++] = @@ -3059,6 +3335,25 @@ alloc_common_immediates(struct svga_shader_emitter_v10 *emit) } } + /** TODO: allocate immediates for all possible element byte offset? + */ + if (emit->raw_bufs) { + unsigned i; + for (i = 7; i < 12; i+=4) { + emit->common_immediate_pos[n++] = + alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3)); + } + } + + if (emit->info.indirect_files & + (1 << TGSI_FILE_IMAGE | 1 << TGSI_FILE_BUFFER)) { + unsigned i; + for (i = 7; i < 8; i+=4) { + emit->common_immediate_pos[n++] = + alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3)); + } + } + assert(n <= ARRAY_SIZE(emit->common_immediate_pos)); emit->num_common_immediates = n; } @@ -3102,12 +3397,12 @@ emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit) if (emit->key.tcs.point_mode) { opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_POINT; } - else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) { + else if (emit->key.tcs.prim_mode == MESA_PRIM_LINES) { opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_LINE; } else { - assert(emit->key.tcs.prim_mode == PIPE_PRIM_QUADS || - emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES); + assert(emit->key.tcs.prim_mode == MESA_PRIM_QUADS || + emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES); if (emit->key.tcs.vertices_order_cw) opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CCW; @@ -3139,6 +3434,8 @@ emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit) emit_dword(emit, opcode0.value); end_emit_instruction(emit); + alloc_common_immediates(emit); + /* Declare constant registers */ emit_constant_declaration(emit); @@ -3146,7 +3443,14 @@ emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit) emit_sampler_declarations(emit); emit_resource_declarations(emit); - alloc_common_immediates(emit); + /* Declare images */ + emit_image_declarations(emit); + + /* Declare shader buffers */ + emit_shader_buf_declarations(emit); + + /* Declare atomic buffers */ + emit_atomic_buf_declarations(emit); int nVertices = emit->key.tcs.vertices_per_patch; emit->tcs.imm_index = @@ -3164,7 +3468,7 @@ emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit) * A helper function to determine if control point phase is needed. * Returns TRUE if there is control point output. */ -static boolean +static bool needs_control_point_phase(struct svga_shader_emitter_v10 *emit) { unsigned i; @@ -3177,7 +3481,7 @@ needs_control_point_phase(struct svga_shader_emitter_v10 *emit) */ if ((emit->key.tcs.vertices_per_patch != emit->key.tcs.vertices_out) && emit->key.tcs.vertices_out) - return TRUE; + return true; for (i = 0; i < emit->info.num_outputs; i++) { switch (emit->info.output_semantic_name[i]) { @@ -3186,10 +3490,10 @@ needs_control_point_phase(struct svga_shader_emitter_v10 *emit) case TGSI_SEMANTIC_TESSINNER: break; default: - return TRUE; + return true; } } - return FALSE; + return false; } @@ -3248,7 +3552,7 @@ emit_control_point_phase_instruction(struct svga_shader_emitter_v10 *emit) /** * Start the hull shader control point phase */ -static boolean +static bool emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit) { /* If there is no control point output, skip the control point phase. */ @@ -3267,7 +3571,7 @@ emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit) * add input signatures when we parse the tcs again in the * patch constant phase. */ - emit->tcs.fork_phase_add_signature = TRUE; + emit->tcs.fork_phase_add_signature = true; } else { /** @@ -3276,7 +3580,7 @@ emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit) */ emit_passthrough_control_point_signature(emit); } - return FALSE; + return false; } /* Start the control point phase in the hull shader */ @@ -3295,7 +3599,7 @@ emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit) VGPU10_NAME_UNDEFINED, VGPU10_OPERAND_0_COMPONENT, 0, 0, - VGPU10_INTERPOLATION_CONSTANT, TRUE, + VGPU10_INTERPOLATION_CONSTANT, true, SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); if (emit->tcs.prim_id_index != INVALID_INDEX) { @@ -3307,11 +3611,11 @@ emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit) VGPU10_OPERAND_0_COMPONENT, VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 0, - VGPU10_INTERPOLATION_UNDEFINED, TRUE, + VGPU10_INTERPOLATION_UNDEFINED, true, SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID); } - return TRUE; + return true; } @@ -3320,15 +3624,15 @@ emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit) * do the second pass of the tcs translation and emit * the relevant declarations and instructions for this phase. */ -static boolean +static bool emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit, struct tgsi_parse_context *parse) { unsigned inst_number = 0; - boolean ret = TRUE; + bool ret = true; VGPU10OpcodeToken0 opcode0; - emit->skip_instruction = FALSE; + emit->skip_instruction = false; /* Start the patch constant phase */ opcode0.value = 0; @@ -3338,7 +3642,7 @@ emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit, end_emit_instruction(emit); /* Set the current phase to patch constant phase */ - emit->tcs.control_point_phase = FALSE; + emit->tcs.control_point_phase = false; if (emit->tcs.prim_id_index != INVALID_INDEX) { emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, @@ -3349,13 +3653,13 @@ emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit, VGPU10_OPERAND_0_COMPONENT, VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 0, - VGPU10_INTERPOLATION_UNDEFINED, TRUE, + VGPU10_INTERPOLATION_UNDEFINED, true, SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID); } /* Emit declarations for this phase */ emit->index_range.required = - emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE; + emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? true : false; emit_tcs_input_declarations(emit); if (emit->index_range.start_index != INVALID_INDEX) { @@ -3363,13 +3667,13 @@ emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit, } emit->index_range.required = - emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE; + emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? true : false; emit_tcs_output_declarations(emit); if (emit->index_range.start_index != INVALID_INDEX) { emit_index_range_declaration(emit); } - emit->index_range.required = FALSE; + emit->index_range.required = false; emit_temporaries_declaration(emit); @@ -3387,31 +3691,54 @@ emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit, /* Usually this applies to TCS only. If shader is reading output of * patch constant in fork phase, we should reemit all instructions - * which are writting into ouput of patch constant in fork phase + * which are writting into output of patch constant in fork phase * to store results into temporaries. */ + assert(!(emit->reemit_instruction && emit->reemit_rawbuf_instruction)); if (emit->reemit_instruction) { assert(emit->unit == PIPE_SHADER_TESS_CTRL); ret = emit_vgpu10_instruction(emit, inst_number, &parse->FullToken.FullInstruction); + } else if (emit->reemit_rawbuf_instruction) { + ret = emit_rawbuf_instruction(emit, inst_number, + &parse->FullToken.FullInstruction); } if (!ret) - return FALSE; + return false; } - return TRUE; + return true; +} + + +/** + * Emit the thread group declaration for compute shader. + */ +static void +emit_compute_shader_declarations(struct svga_shader_emitter_v10 *emit) +{ + VGPU10OpcodeToken0 opcode0; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_THREAD_GROUP; + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, emit->cs.block_width); + emit_dword(emit, emit->cs.block_height); + emit_dword(emit, emit->cs.block_depth); + end_emit_instruction(emit); } /** * Emit index range declaration. */ -static boolean +static bool emit_index_range_declaration(struct svga_shader_emitter_v10 *emit) { if (emit->version < 50) - return TRUE; + return true; assert(emit->index_range.start_index != INVALID_INDEX); assert(emit->index_range.count != 0); @@ -3461,7 +3788,7 @@ emit_index_range_declaration(struct svga_shader_emitter_v10 *emit) emit->index_range.size = 0; emit->index_range.dim = 0; - return TRUE; + return true; } @@ -3542,7 +3869,7 @@ emit_input_declaration(struct svga_shader_emitter_v10 *emit, VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode, unsigned usageMask, VGPU10_INTERPOLATION_MODE interpMode, - boolean addSignature, + bool addSignature, SVGA3dDXSignatureSemanticName sgnName) { VGPU10OpcodeToken0 opcode0; @@ -3696,7 +4023,7 @@ emit_output_declaration(struct svga_shader_emitter_v10 *emit, VGPU10_OPCODE_TYPE type, unsigned index, VGPU10_SYSTEM_NAME name, unsigned writemask, - boolean addSignature, + bool addSignature, SVGA3dDXSignatureSemanticName sgnName) { VGPU10OpcodeToken0 opcode0; @@ -3862,7 +4189,7 @@ emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit) VGPU10_OPCODE_DCL_OUTPUT, semantic_index, VGPU10_NAME_UNDEFINED, VGPU10_OPERAND_4_COMPONENT_MASK_ALL, - TRUE, + true, map_tgsi_semantic_to_sgn_name(semantic_name)); if (semantic_index == 0) { @@ -3879,7 +4206,7 @@ emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit) VGPU10_OPCODE_DCL_OUTPUT, idx, VGPU10_NAME_UNDEFINED, VGPU10_OPERAND_4_COMPONENT_MASK_ALL, - TRUE, + true, map_tgsi_semantic_to_sgn_name(semantic_name)); emit->info.output_semantic_index[idx] = j; } @@ -3910,7 +4237,7 @@ emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit) static void emit_vertex_output_declaration(struct svga_shader_emitter_v10 *emit, unsigned index, unsigned writemask, - boolean addSignature) + bool addSignature) { const enum tgsi_semantic semantic_name = emit->info.output_semantic_name[index]; @@ -3928,7 +4255,7 @@ emit_vertex_output_declaration(struct svga_shader_emitter_v10 *emit, assert(emit->tcs.control_point_phase); type = VGPU10_OPCODE_DCL_OUTPUT; name = VGPU10_NAME_UNDEFINED; - emit_output_declaration(emit, type, index, name, final_mask, TRUE, + emit_output_declaration(emit, type, index, name, final_mask, true, SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); return; } @@ -3973,7 +4300,7 @@ emit_vs_output_declarations(struct svga_shader_emitter_v10 *emit) { unsigned i; for (i = 0; i < emit->info.num_outputs; i++) { - emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE); + emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], true); } } @@ -3983,8 +4310,8 @@ emit_vs_output_declarations(struct svga_shader_emitter_v10 *emit) * for the specified stream. */ static unsigned -output_writemask_for_stream(unsigned stream, ubyte output_streams, - ubyte output_usagemask) +output_writemask_for_stream(unsigned stream, uint8_t output_streams, + uint8_t output_usagemask) { unsigned i; unsigned writemask = 0; @@ -4026,7 +4353,7 @@ emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit) if (emit->version >= 50) { /* DCL_STREAM stream */ begin_emit_instruction(emit); - emit_opcode(emit, VGPU10_OPCODE_DCL_STREAM, FALSE); + emit_opcode(emit, VGPU10_OPCODE_DCL_STREAM, false); emit_stream_register(emit, s); end_emit_instruction(emit); } @@ -4057,7 +4384,7 @@ emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit) VGPU10_OPCODE_DCL_OUTPUT_SGV, i, VGPU10_NAME_PRIMITIVE_ID, VGPU10_OPERAND_4_COMPONENT_MASK_ALL, - FALSE, + false, map_tgsi_semantic_to_sgn_name(semantic_name)); break; case TGSI_SEMANTIC_LAYER: @@ -4065,7 +4392,7 @@ emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit) VGPU10_OPCODE_DCL_OUTPUT_SIV, i, VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX, VGPU10_OPERAND_4_COMPONENT_MASK_X, - FALSE, + false, map_tgsi_semantic_to_sgn_name(semantic_name)); break; case TGSI_SEMANTIC_VIEWPORT_INDEX: @@ -4073,12 +4400,12 @@ emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit) VGPU10_OPCODE_DCL_OUTPUT_SIV, i, VGPU10_NAME_VIEWPORT_ARRAY_INDEX, VGPU10_OPERAND_4_COMPONENT_MASK_X, - FALSE, + false, map_tgsi_semantic_to_sgn_name(semantic_name)); emit->gs.viewport_index_out_index = i; break; default: - emit_vertex_output_declaration(emit, i, writemask, FALSE); + emit_vertex_output_declaration(emit, i, writemask, false); } } } @@ -4125,7 +4452,7 @@ emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit, assert(emit->version >= 50); assert(name >= VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR || - (emit->key.tcs.prim_mode == PIPE_PRIM_LINES && + (emit->key.tcs.prim_mode == MESA_PRIM_LINES && name == VGPU10_NAME_UNDEFINED)); assert(name <= VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR); @@ -4188,7 +4515,7 @@ emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit) emit->tcs.inner.out_index = outputIndex; switch (emit->key.tcs.prim_mode) { - case PIPE_PRIM_QUADS: + case MESA_PRIM_QUADS: emit_tesslevel_declaration(emit, outputIndex++, VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR, @@ -4199,13 +4526,13 @@ emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit) VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR, SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR); break; - case PIPE_PRIM_TRIANGLES: + case MESA_PRIM_TRIANGLES: emit_tesslevel_declaration(emit, outputIndex++, VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR, SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR); break; - case PIPE_PRIM_LINES: + case MESA_PRIM_LINES: break; default: debug_printf("Unsupported primitive type"); @@ -4221,7 +4548,7 @@ emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit) emit->tcs.outer.out_index = outputIndex; switch (emit->key.tcs.prim_mode) { - case PIPE_PRIM_QUADS: + case MESA_PRIM_QUADS: for (int j = 0; j < 4; j++) { emit_tesslevel_declaration(emit, outputIndex++, VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, @@ -4229,7 +4556,7 @@ emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit) SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j); } break; - case PIPE_PRIM_TRIANGLES: + case MESA_PRIM_TRIANGLES: for (int j = 0; j < 3; j++) { emit_tesslevel_declaration(emit, outputIndex++, VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, @@ -4237,7 +4564,7 @@ emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit) SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j); } break; - case PIPE_PRIM_LINES: + case MESA_PRIM_LINES: for (int j = 0; j < 2; j++) { emit_tesslevel_declaration(emit, outputIndex++, VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, @@ -4262,7 +4589,7 @@ emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit) emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, index, VGPU10_NAME_UNDEFINED, VGPU10_OPERAND_4_COMPONENT_MASK_ALL, - FALSE, + false, map_tgsi_semantic_to_sgn_name(semantic_name)); SVGA3dDXShaderSignatureEntry *sgnEntry = @@ -4286,7 +4613,7 @@ emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit) break; emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], - TRUE); + true); } } @@ -4298,10 +4625,10 @@ emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit) if (emit->tcs.control_point_out_index == INVALID_INDEX) { /* use register index after tessellation factors */ switch (emit->key.tcs.prim_mode) { - case PIPE_PRIM_QUADS: + case MESA_PRIM_QUADS: emit->tcs.control_point_out_index = outputIndex + 6; break; - case PIPE_PRIM_TRIANGLES: + case MESA_PRIM_TRIANGLES: emit->tcs.control_point_out_index = outputIndex + 4; break; default: @@ -4313,14 +4640,14 @@ emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit) emit->tcs.control_point_out_index, VGPU10_NAME_POSITION, VGPU10_OPERAND_4_COMPONENT_MASK_ALL, - TRUE, + true, SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION); /* If tcs does not output any control point output, * we can end the hull shader control point phase here * after emitting the default control point output. */ - emit->skip_instruction = TRUE; + emit->skip_instruction = true; } } else { @@ -4329,7 +4656,7 @@ emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit) * we declare it here for patch constant phase only. */ emit->tcs.outer.out_index = outputIndex; - if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) { + if (emit->key.tcs.prim_mode == MESA_PRIM_QUADS) { for (int i = 0; i < 4; i++) { emit_tesslevel_declaration(emit, outputIndex++, VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, @@ -4337,7 +4664,7 @@ emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit) SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i); } } - else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) { + else if (emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES) { for (int i = 0; i < 3; i++) { emit_tesslevel_declaration(emit, outputIndex++, VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, @@ -4352,7 +4679,7 @@ emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit) * we declare it here */ emit->tcs.inner.out_index = outputIndex; - if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) { + if (emit->key.tcs.prim_mode == MESA_PRIM_QUADS) { emit_tesslevel_declaration(emit, outputIndex++, VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR, @@ -4362,7 +4689,7 @@ emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit) VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR, SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR); } - else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) { + else if (emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES) { emit_tesslevel_declaration(emit, outputIndex++, VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR, @@ -4383,7 +4710,7 @@ emit_tes_output_declarations(struct svga_shader_emitter_v10 *emit) unsigned int i; for (i = 0; i < emit->info.num_outputs; i++) { - emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE); + emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], true); } } @@ -4406,7 +4733,7 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, VGPU10_OPERAND_4_COMPONENT, VGPU10_OPERAND_4_COMPONENT_MASK_MODE, VGPU10_OPERAND_4_COMPONENT_MASK_X, - VGPU10_INTERPOLATION_UNDEFINED, TRUE, + VGPU10_INTERPOLATION_UNDEFINED, true, map_tgsi_semantic_to_sgn_name(semantic_name)); break; case TGSI_SEMANTIC_VERTEXID: @@ -4420,7 +4747,7 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, VGPU10_OPERAND_4_COMPONENT, VGPU10_OPERAND_4_COMPONENT_MASK_MODE, VGPU10_OPERAND_4_COMPONENT_MASK_X, - VGPU10_INTERPOLATION_UNDEFINED, TRUE, + VGPU10_INTERPOLATION_UNDEFINED, true, map_tgsi_semantic_to_sgn_name(semantic_name)); break; case TGSI_SEMANTIC_SAMPLEID: @@ -4435,7 +4762,7 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, VGPU10_OPERAND_4_COMPONENT, VGPU10_OPERAND_4_COMPONENT_MASK_MODE, VGPU10_OPERAND_4_COMPONENT_MASK_X, - VGPU10_INTERPOLATION_CONSTANT, TRUE, + VGPU10_INTERPOLATION_CONSTANT, true, map_tgsi_semantic_to_sgn_name(semantic_name)); break; case TGSI_SEMANTIC_SAMPLEPOS: @@ -4470,7 +4797,7 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, VGPU10_OPERAND_0_COMPONENT, VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 0, - VGPU10_INTERPOLATION_UNDEFINED, TRUE, + VGPU10_INTERPOLATION_UNDEFINED, true, SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); } else if (emit->unit == PIPE_SHADER_TESS_CTRL) { /* The emission of the control point id will be done @@ -4495,7 +4822,7 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, VGPU10_OPERAND_1_COMPONENT, VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 0, - VGPU10_INTERPOLATION_CONSTANT, TRUE, + VGPU10_INTERPOLATION_CONSTANT, true, SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); break; case TGSI_SEMANTIC_TESSCOORD: @@ -4503,11 +4830,11 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, unsigned usageMask = 0; - if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) { + if (emit->tes.prim_mode == MESA_PRIM_TRIANGLES) { usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XYZ; } - else if (emit->tes.prim_mode == PIPE_PRIM_LINES || - emit->tes.prim_mode == PIPE_PRIM_QUADS) { + else if (emit->tes.prim_mode == MESA_PRIM_LINES || + emit->tes.prim_mode == MESA_PRIM_QUADS) { usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XY; } @@ -4520,7 +4847,7 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, VGPU10_OPERAND_4_COMPONENT, VGPU10_OPERAND_4_COMPONENT_MASK_MODE, usageMask, - VGPU10_INTERPOLATION_UNDEFINED, TRUE, + VGPU10_INTERPOLATION_UNDEFINED, true, SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); break; case TGSI_SEMANTIC_TESSINNER: @@ -4553,10 +4880,45 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, VGPU10_OPERAND_0_COMPONENT, VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 0, - VGPU10_INTERPOLATION_UNDEFINED, TRUE, + VGPU10_INTERPOLATION_UNDEFINED, true, map_tgsi_semantic_to_sgn_name(semantic_name)); } break; + case TGSI_SEMANTIC_THREAD_ID: + assert(emit->unit >= PIPE_SHADER_COMPUTE); + assert(emit->version >= 50); + emit->cs.thread_id_index = index; + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, + VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP, + VGPU10_OPERAND_INDEX_0D, + index, 1, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + VGPU10_INTERPOLATION_UNDEFINED, true, + map_tgsi_semantic_to_sgn_name(semantic_name)); + break; + case TGSI_SEMANTIC_BLOCK_ID: + assert(emit->unit >= PIPE_SHADER_COMPUTE); + assert(emit->version >= 50); + emit->cs.block_id_index = index; + emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, + VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID, + VGPU10_OPERAND_INDEX_0D, + index, 1, + VGPU10_NAME_UNDEFINED, + VGPU10_OPERAND_4_COMPONENT, + VGPU10_OPERAND_4_COMPONENT_MASK_MODE, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + VGPU10_INTERPOLATION_UNDEFINED, true, + map_tgsi_semantic_to_sgn_name(semantic_name)); + break; + case TGSI_SEMANTIC_GRID_SIZE: + assert(emit->unit == PIPE_SHADER_COMPUTE); + assert(emit->version >= 50); + emit->cs.grid_size.tgsi_index = index; + break; default: debug_printf("unexpected system value semantic index %u / %s\n", semantic_name, tgsi_semantic_names[semantic_name]); @@ -4566,19 +4928,19 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, /** * Translate a TGSI declaration to VGPU10. */ -static boolean +static bool emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_declaration *decl) { switch (decl->Declaration.File) { case TGSI_FILE_INPUT: /* do nothing - see emit_input_declarations() */ - return TRUE; + return true; case TGSI_FILE_OUTPUT: assert(decl->Range.First == decl->Range.Last); emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask; - return TRUE; + return true; case TGSI_FILE_TEMPORARY: /* Don't declare the temps here. Just keep track of how many @@ -4600,7 +4962,7 @@ emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, /* for all temps, indexed or not, keep track of highest index */ emit->num_shader_temps = MAX2(emit->num_shader_temps, decl->Range.Last + 1); - return TRUE; + return true; case TGSI_FILE_CONSTANT: /* Don't declare constants here. Just keep track and emit later. */ @@ -4623,39 +4985,40 @@ emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, " but [%u] is the limit.\n", num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); + emit->register_overflow = true; } /* The linker doesn't enforce the max UBO size so we clamp here */ emit->num_shader_consts[constbuf] = MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); } - return TRUE; + return true; case TGSI_FILE_IMMEDIATE: assert(!"TGSI_FILE_IMMEDIATE not handled yet!"); - return FALSE; + return false; case TGSI_FILE_SYSTEM_VALUE: emit_system_value_declaration(emit, decl->Semantic.Name, decl->Range.First); - return TRUE; + return true; case TGSI_FILE_SAMPLER: /* Don't declare samplers here. Just keep track and emit later. */ emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1); - return TRUE; + return true; #if 0 case TGSI_FILE_RESOURCE: /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/ /* XXX more, VGPU10_RETURN_TYPE_FLOAT */ assert(!"TGSI_FILE_RESOURCE not handled yet"); - return FALSE; + return false; #endif case TGSI_FILE_ADDRESS: emit->num_address_regs = MAX2(emit->num_address_regs, decl->Range.Last + 1); - return TRUE; + return true; case TGSI_FILE_SAMPLER_VIEW: { @@ -4665,18 +5028,54 @@ emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, /* Note: we can ignore YZW return types for now */ emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX; - emit->sampler_view[unit] = TRUE; + emit->sampler_view[unit] = true; } - return TRUE; + return true; + + case TGSI_FILE_IMAGE: + { + unsigned unit = decl->Range.First; + assert(decl->Range.First == decl->Range.Last); + assert(unit < PIPE_MAX_SHADER_IMAGES); + emit->image[unit] = decl->Image; + emit->image_mask |= 1 << unit; + emit->num_images++; + } + return true; + + case TGSI_FILE_HW_ATOMIC: + /* Declare the atomic buffer if it is not already declared. */ + if (!(emit->atomic_bufs_mask & (1 << decl->Dim.Index2D))) { + emit->num_atomic_bufs++; + emit->atomic_bufs_mask |= (1 << decl->Dim.Index2D); + } + + /* Remember the maximum atomic counter index encountered */ + emit->max_atomic_counter_index = + MAX2(emit->max_atomic_counter_index, decl->Range.Last); + return true; + + case TGSI_FILE_MEMORY: + /* Record memory has been used. */ + if (emit->unit == PIPE_SHADER_COMPUTE && + decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED) { + emit->cs.shared_memory_declared = true; + } + + return true; + + case TGSI_FILE_BUFFER: + assert(emit->version >= 50); + emit->num_shader_bufs++; + return true; default: assert(!"Unexpected type of declaration"); - return FALSE; + return false; } } - /** * Emit input declarations for fragment shader. */ @@ -4769,7 +5168,7 @@ emit_fs_input_declarations(struct svga_shader_emitter_v10 *emit) VGPU10_OPERAND_4_COMPONENT, VGPU10_OPERAND_4_COMPONENT_MASK_MODE, mask, - interpolationMode, TRUE, + interpolationMode, true, map_tgsi_semantic_to_sgn_name(semantic_name)); } } @@ -4797,7 +5196,7 @@ emit_vs_input_declarations(struct svga_shader_emitter_v10 *emit) VGPU10_OPERAND_4_COMPONENT, VGPU10_OPERAND_4_COMPONENT_MASK_MODE, VGPU10_OPERAND_4_COMPONENT_MASK_ALL, - VGPU10_INTERPOLATION_UNDEFINED, TRUE, + VGPU10_INTERPOLATION_UNDEFINED, true, SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); } } @@ -4859,7 +5258,7 @@ emit_gs_input_declarations(struct svga_shader_emitter_v10 *emit) name, numComp, selMode, VGPU10_OPERAND_4_COMPONENT_MASK_ALL, - VGPU10_INTERPOLATION_UNDEFINED, TRUE, + VGPU10_INTERPOLATION_UNDEFINED, true, map_tgsi_semantic_to_sgn_name(semantic_name)); } } @@ -4873,8 +5272,7 @@ emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit) { unsigned i; unsigned size = emit->key.tcs.vertices_per_patch; - unsigned indicesMask = 0; - boolean addSignature = TRUE; + bool addSignature = true; if (!emit->tcs.control_point_phase) addSignature = emit->tcs.fork_phase_add_signature; @@ -4888,9 +5286,6 @@ emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit) SVGA3dDXSignatureSemanticName sgn_name = map_tgsi_semantic_to_sgn_name(semantic_name); - /* indices that are declared */ - indicesMask |= 1 << index; - if (semantic_name == TGSI_SEMANTIC_POSITION || index == emit->linkage.position_index) { /* save the input control point index for later use */ @@ -4928,27 +5323,6 @@ emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit) } if (emit->tcs.control_point_phase) { - if (emit->tcs.control_point_input_index == INVALID_INDEX) { - - /* Add input control point declaration if it does not exist */ - if ((indicesMask & (1 << emit->linkage.position_index)) == 0) { - emit->linkage.input_map[emit->linkage.num_inputs] = - emit->linkage.position_index; - emit->tcs.control_point_input_index = emit->linkage.num_inputs++; - - emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, - VGPU10_OPERAND_TYPE_INPUT, - VGPU10_OPERAND_INDEX_2D, - emit->linkage.position_index, - emit->key.tcs.vertices_per_patch, - VGPU10_NAME_UNDEFINED, - VGPU10_OPERAND_4_COMPONENT, - VGPU10_OPERAND_4_COMPONENT_MASK_MODE, - VGPU10_OPERAND_4_COMPONENT_MASK_ALL, - VGPU10_INTERPOLATION_UNDEFINED, TRUE, - SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION); - } - } /* Also add an address register for the indirection to the * input control points @@ -4968,7 +5342,7 @@ emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit) */ unsigned inputIndex = emit->key.tes.tessfactor_index; - if (emit->tes.prim_mode == PIPE_PRIM_QUADS) { + if (emit->tes.prim_mode == MESA_PRIM_QUADS) { if (emit->key.tes.need_tessouter) { emit->tes.outer.in_index = inputIndex; for (int i = 0; i < 4; i++) { @@ -4995,7 +5369,7 @@ emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit) SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR); } } - else if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) { + else if (emit->tes.prim_mode == MESA_PRIM_TRIANGLES) { if (emit->key.tes.need_tessouter) { emit->tes.outer.in_index = inputIndex; for (int i = 0; i < 3; i++) { @@ -5016,7 +5390,7 @@ emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit) SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR); } } - else if (emit->tes.prim_mode == PIPE_PRIM_LINES) { + else if (emit->tes.prim_mode == MESA_PRIM_LINES) { if (emit->key.tes.need_tessouter) { emit->tes.outer.in_index = inputIndex; emit_tesslevel_declaration(emit, inputIndex++, @@ -5075,7 +5449,7 @@ emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit) VGPU10_OPERAND_4_COMPONENT_MASK_MODE, VGPU10_OPERAND_4_COMPONENT_MASK_ALL, VGPU10_INTERPOLATION_UNDEFINED, - TRUE, sgn_name); + true, sgn_name); } emit_tessfactor_input_declarations(emit); @@ -5104,7 +5478,7 @@ emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit) VGPU10_OPERAND_4_COMPONENT_MASK_MODE, VGPU10_OPERAND_4_COMPONENT_MASK_ALL, VGPU10_INTERPOLATION_UNDEFINED, - TRUE, + true, map_tgsi_semantic_to_sgn_name(sem_name)); } else if (sem_name != TGSI_SEMANTIC_TESSINNER && @@ -5118,7 +5492,7 @@ emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit) VGPU10_OPERAND_4_COMPONENT_MASK_MODE, VGPU10_OPERAND_4_COMPONENT_MASK_ALL, VGPU10_INTERPOLATION_UNDEFINED, - TRUE, + true, map_tgsi_semantic_to_sgn_name(sem_name)); } /* tessellation factors are taken care of in @@ -5133,11 +5507,11 @@ emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit) /** * Emit all input declarations. */ -static boolean +static bool emit_input_declarations(struct svga_shader_emitter_v10 *emit) { emit->index_range.required = - emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE; + emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? true : false; switch (emit->unit) { case PIPE_SHADER_FRAGMENT: @@ -5165,19 +5539,19 @@ emit_input_declarations(struct svga_shader_emitter_v10 *emit) if (emit->index_range.start_index != INVALID_INDEX) { emit_index_range_declaration(emit); } - emit->index_range.required = FALSE; - return TRUE; + emit->index_range.required = false; + return true; } /** * Emit all output declarations. */ -static boolean +static bool emit_output_declarations(struct svga_shader_emitter_v10 *emit) { emit->index_range.required = - emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE; + emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? true : false; switch (emit->unit) { case PIPE_SHADER_FRAGMENT: @@ -5214,7 +5588,7 @@ emit_output_declarations(struct svga_shader_emitter_v10 *emit) emit->vposition.so_index, VGPU10_NAME_UNDEFINED, VGPU10_OPERAND_4_COMPONENT_MASK_ALL, - TRUE, + true, SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION); } @@ -5225,13 +5599,14 @@ emit_output_declarations(struct svga_shader_emitter_v10 *emit) /* Emit the declaration for the clip distance shadow copy which * will be used for stream output purpose and for clip distance - * varying variable + * varying variable. Note all clip distances + * will be written regardless of the enabled clipping planes. */ emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, emit->clip_dist_so_index, VGPU10_NAME_UNDEFINED, - emit->output_usage_mask[emit->clip_dist_out_index], - TRUE, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + true, SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); if (emit->info.num_written_clipdistance > 4) { @@ -5239,8 +5614,8 @@ emit_output_declarations(struct svga_shader_emitter_v10 *emit) emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, emit->clip_dist_so_index + 1, VGPU10_NAME_UNDEFINED, - emit->output_usage_mask[emit->clip_dist_out_index+1], - TRUE, + VGPU10_OPERAND_4_COMPONENT_MASK_ALL, + true, SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); } } @@ -5248,8 +5623,8 @@ emit_output_declarations(struct svga_shader_emitter_v10 *emit) if (emit->index_range.start_index != INVALID_INDEX) { emit_index_range_declaration(emit); } - emit->index_range.required = FALSE; - return TRUE; + emit->index_range.required = false; + return true; } @@ -5282,7 +5657,7 @@ create_temp_array(struct svga_shader_emitter_v10 *emit, /** * Emit the declaration for the temporary registers. */ -static boolean +static bool emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) { unsigned total_temps, reg, i; @@ -5447,6 +5822,17 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) emit->tcs.invocation_id_tmp_index = total_temps++; } + if (emit->raw_bufs) { + /** + * Add 3 more temporaries if we need to translate constant buffer + * to srv raw buffer. Since we need to load the value to a temporary + * before it can be used as a source. There could be three source + * register in an instruction. + */ + emit->raw_buf_tmp_index = total_temps; + total_temps+=3; + } + for (i = 0; i < emit->num_address_regs; i++) { emit->address_reg_index[i] = total_temps++; } @@ -5539,11 +5925,38 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) */ check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1); - return TRUE; + return true; } -static boolean +static bool +emit_rawbuf_declaration(struct svga_shader_emitter_v10 *emit, + unsigned index) +{ + VGPU10OpcodeToken0 opcode1; + VGPU10OperandToken0 operand1; + + opcode1.value = 0; + opcode1.opcodeType = VGPU10_OPCODE_DCL_RESOURCE_RAW; + opcode1.resourceDimension = VGPU10_RESOURCE_DIMENSION_UNKNOWN; + + operand1.value = 0; + operand1.numComponents = VGPU10_OPERAND_0_COMPONENT; + operand1.operandType = VGPU10_OPERAND_TYPE_RESOURCE; + operand1.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand1.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + + begin_emit_instruction(emit); + emit_dword(emit, opcode1.value); + emit_dword(emit, operand1.value); + emit_dword(emit, index); + end_emit_instruction(emit); + + return true; +} + + +static bool emit_constant_declaration(struct svga_shader_emitter_v10 *emit) { VGPU10OpcodeToken0 opcode0; @@ -5618,8 +6031,20 @@ emit_constant_declaration(struct svga_shader_emitter_v10 *emit) } } } + if (emit->key.image_size_used) { + emit->image_size_index = total_consts; + total_consts += emit->num_images; + } if (total_consts > 0) { + if (total_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { + debug_printf("Warning: Too many constants [%u] declared in constant" + " buffer 0. %u is the limit.\n", + total_consts, + VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); + total_consts = VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT; + emit->register_overflow = true; + } begin_emit_instruction(emit); emit_dword(emit, opcode0.value); emit_dword(emit, operand0.value); @@ -5629,30 +6054,40 @@ emit_constant_declaration(struct svga_shader_emitter_v10 *emit) } /* Declare remaining constant buffers (UBOs) */ + for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) { if (emit->num_shader_consts[i] > 0) { - begin_emit_instruction(emit); - emit_dword(emit, opcode0.value); - emit_dword(emit, operand0.value); - emit_dword(emit, i); /* which const buffer slot */ - emit_dword(emit, emit->num_shader_consts[i]); - end_emit_instruction(emit); + if (emit->raw_bufs & (1 << i)) { + /* UBO declared as srv raw buffer */ + emit_rawbuf_declaration(emit, i + emit->raw_buf_srv_start_index); + } + else { + + /* UBO declared as const buffer */ + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, operand0.value); + emit_dword(emit, i); /* which const buffer slot */ + emit_dword(emit, emit->num_shader_consts[i]); + end_emit_instruction(emit); + } } } - return TRUE; + return true; } /** * Emit declarations for samplers. */ -static boolean +static bool emit_sampler_declarations(struct svga_shader_emitter_v10 *emit) { unsigned i; - for (i = 0; i < emit->num_samplers; i++) { + for (i = 0; i < emit->key.num_samplers; i++) { + VGPU10OpcodeToken0 opcode0; VGPU10OperandToken0 operand0; @@ -5673,7 +6108,7 @@ emit_sampler_declarations(struct svga_shader_emitter_v10 *emit) end_emit_instruction(emit); } - return TRUE; + return true; } @@ -5683,7 +6118,8 @@ emit_sampler_declarations(struct svga_shader_emitter_v10 *emit) static unsigned pipe_texture_to_resource_dimension(enum tgsi_texture_type target, unsigned num_samples, - boolean is_array) + bool is_array, + bool is_uav) { switch (target) { case PIPE_BUFFER: @@ -5710,8 +6146,9 @@ pipe_texture_to_resource_dimension(enum tgsi_texture_type target, else return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; case PIPE_TEXTURE_CUBE_ARRAY: - return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY : - VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; + return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY : + (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY : + VGPU10_RESOURCE_DIMENSION_TEXTURECUBE); default: assert(!"Unexpected resource type"); return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; @@ -5725,7 +6162,8 @@ pipe_texture_to_resource_dimension(enum tgsi_texture_type target, static unsigned tgsi_texture_to_resource_dimension(enum tgsi_texture_type target, unsigned num_samples, - boolean is_array) + bool is_array, + bool is_uav) { if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) { target = TGSI_TEXTURE_2D; @@ -5746,7 +6184,8 @@ tgsi_texture_to_resource_dimension(enum tgsi_texture_type target, return VGPU10_RESOURCE_DIMENSION_TEXTURE3D; case TGSI_TEXTURE_CUBE: case TGSI_TEXTURE_SHADOWCUBE: - return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; + return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY : + VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; case TGSI_TEXTURE_SHADOW1D: return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; case TGSI_TEXTURE_SHADOW2D: @@ -5766,6 +6205,9 @@ tgsi_texture_to_resource_dimension(enum tgsi_texture_type target, return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; case TGSI_TEXTURE_CUBE_ARRAY: + return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY : + (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY : + VGPU10_RESOURCE_DIMENSION_TEXTURECUBE); case TGSI_TEXTURE_SHADOWCUBE_ARRAY: return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; @@ -5779,21 +6221,21 @@ tgsi_texture_to_resource_dimension(enum tgsi_texture_type target, /** * Given a tgsi_return_type, return true iff it is an integer type. */ -static boolean +static bool is_integer_type(enum tgsi_return_type type) { switch (type) { case TGSI_RETURN_TYPE_SINT: case TGSI_RETURN_TYPE_UINT: - return TRUE; + return true; case TGSI_RETURN_TYPE_FLOAT: case TGSI_RETURN_TYPE_UNORM: case TGSI_RETURN_TYPE_SNORM: - return FALSE; + return false; case TGSI_RETURN_TYPE_COUNT: default: assert(!"is_integer_type: Unknown tgsi_return_type"); - return FALSE; + return false; } } @@ -5804,13 +6246,16 @@ is_integer_type(enum tgsi_return_type type) * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may * rework this code. */ -static boolean +static bool emit_resource_declarations(struct svga_shader_emitter_v10 *emit) { unsigned i; /* Emit resource decl for each sampler */ for (i = 0; i < emit->num_samplers; i++) { + if (!(emit->info.samplers_declared & (1 << i))) + continue; + VGPU10OpcodeToken0 opcode0; VGPU10OperandToken0 operand0; VGPU10ResourceReturnTypeToken return_type; @@ -5822,13 +6267,15 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit) opcode0.resourceDimension = tgsi_texture_to_resource_dimension(emit->sampler_target[i], emit->key.tex[i].num_samples, - emit->key.tex[i].is_array); + emit->key.tex[i].is_array, + false); } else { opcode0.resourceDimension = pipe_texture_to_resource_dimension(emit->key.tex[i].target, emit->key.tex[i].num_samples, - emit->key.tex[i].is_array); + emit->key.tex[i].is_array, + false); } opcode0.sampleCount = emit->key.tex[i].num_samples; operand0.value = 0; @@ -5879,9 +6326,246 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit) end_emit_instruction(emit); } - return TRUE; + return true; +} + + +/** + * Emit instruction to declare uav for the shader image + */ +static void +emit_image_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned i = 0; + unsigned unit = 0; + unsigned uav_mask = 0; + + /* Emit uav decl for each image */ + for (i = 0; i < emit->num_images; i++, unit++) { + + /* Find the unit index of the next declared image. + */ + while (!(emit->image_mask & (1 << unit))) { + unit++; + } + + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + VGPU10ResourceReturnTypeToken return_type; + + /* If the corresponding uav for the image is already declared, + * skip this image declaration. + */ + if (uav_mask & (1 << emit->key.images[unit].uav_index)) + continue; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_TYPED; + opcode0.uavResourceDimension = + tgsi_texture_to_resource_dimension(emit->image[unit].Resource, + 0, emit->key.images[unit].is_array, + true); + + if (emit->key.images[unit].is_single_layer && + emit->key.images[unit].resource_target == PIPE_TEXTURE_3D) { + opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_TEXTURE3D; + } + + /* Declare the uav as global coherent if the shader includes memory + * barrier instructions. + */ + opcode0.globallyCoherent = + (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0; + + operand0.value = 0; + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_UAV; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + + return_type.value = 0; + return_type.component0 = + return_type.component1 = + return_type.component2 = + return_type.component3 = emit->key.images[unit].return_type + 1; + + assert(emit->key.images[unit].uav_index != SVGA3D_INVALID_ID); + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, operand0.value); + emit_dword(emit, emit->key.images[unit].uav_index); + emit_dword(emit, return_type.value); + end_emit_instruction(emit); + + /* Mark the uav is already declared */ + uav_mask |= 1 << emit->key.images[unit].uav_index; + } + + emit->uav_declared |= uav_mask; } + +/** + * Emit instruction to declare uav for the shader buffer + */ +static void +emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned i; + unsigned uav_mask = 0; + + /* Emit uav decl for each shader buffer */ + for (i = 0; i < emit->num_shader_bufs; i++) { + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + + if (emit->raw_shaderbufs & (1 << i)) { + emit_rawbuf_declaration(emit, i + emit->raw_shaderbuf_srv_start_index); + continue; + } + + /* If the corresponding uav for the shader buf is already declared, + * skip this shader buffer declaration. + */ + if (uav_mask & (1 << emit->key.shader_buf_uav_index[i])) + continue; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW; + + /* Declare the uav as global coherent if the shader includes memory + * barrier instructions. + */ + opcode0.globallyCoherent = + (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0; + + operand0.value = 0; + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_UAV; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + + assert(emit->key.shader_buf_uav_index[i] != SVGA3D_INVALID_ID); + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, operand0.value); + emit_dword(emit, emit->key.shader_buf_uav_index[i]); + end_emit_instruction(emit); + + /* Mark the uav is already declared */ + uav_mask |= 1 << emit->key.shader_buf_uav_index[i]; + } + + emit->uav_declared |= uav_mask; +} + + +/** + * Emit instruction to declare thread group shared memory(tgsm) for shared memory + */ +static void +emit_memory_declarations(struct svga_shader_emitter_v10 *emit) +{ + if (emit->cs.shared_memory_declared) { + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_TGSM_RAW; + + /* Declare the uav as global coherent if the shader includes memory + * barrier instructions. + */ + opcode0.globallyCoherent = + (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0; + + operand0.value = 0; + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, operand0.value); + + /* Current state tracker only declares one shared memory for GLSL. + * Use index 0 for this shared memory. + */ + emit_dword(emit, 0); + emit_dword(emit, emit->key.cs.mem_size); /* byte Count */ + end_emit_instruction(emit); + } +} + + +/** + * Emit instruction to declare uav for atomic buffers + */ +static void +emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit) +{ + unsigned atomic_bufs_mask = emit->atomic_bufs_mask; + unsigned uav_mask = 0; + + /* Emit uav decl for each atomic buffer */ + while (atomic_bufs_mask) { + unsigned buf_index = u_bit_scan(&atomic_bufs_mask); + unsigned uav_index = emit->key.atomic_buf_uav_index[buf_index]; + + /* If the corresponding uav for the shader buf is already declared, + * skip this shader buffer declaration. + */ + if (uav_mask & (1 << uav_index)) + continue; + + VGPU10OpcodeToken0 opcode0; + VGPU10OperandToken0 operand0; + + assert(uav_index != SVGA3D_INVALID_ID); + + opcode0.value = 0; + opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW; + opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_BUFFER; + + /* Declare the uav as global coherent if the shader includes memory + * barrier instructions. + */ + opcode0.globallyCoherent = + (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0; + opcode0.uavHasCounter = 1; + + operand0.value = 0; + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + operand0.operandType = VGPU10_OPERAND_TYPE_UAV; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; + + begin_emit_instruction(emit); + emit_dword(emit, opcode0.value); + emit_dword(emit, operand0.value); + emit_dword(emit, uav_index); + end_emit_instruction(emit); + + /* Mark the uav is already declared */ + uav_mask |= 1 << uav_index; + } + + emit->uav_declared |= uav_mask; + + /* Allocate immediates to be used for index to the atomic buffers */ + unsigned j = 0; + for (unsigned i = 0; i <= emit->num_atomic_bufs / 4; i++, j+=4) { + alloc_immediate_int4(emit, j+0, j+1, j+2, j+3); + } + + /* Allocate immediates for the atomic counter index */ + for (; j <= emit->max_atomic_counter_index; j+=4) { + alloc_immediate_int4(emit, j+0, j+1, j+2, j+3); + } +} + + /** * Emit instruction with n=1, 2 or 3 source registers. */ @@ -5892,7 +6576,7 @@ emit_instruction_opn(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_src_register *src1, const struct tgsi_full_src_register *src2, const struct tgsi_full_src_register *src3, - boolean saturate, bool precise) + bool saturate, bool precise) { begin_emit_instruction(emit); emit_opcode_precise(emit, opcode, saturate, precise); @@ -5913,7 +6597,7 @@ emit_instruction_op1(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_dst_register *dst, const struct tgsi_full_src_register *src) { - emit_instruction_opn(emit, opcode, dst, src, NULL, NULL, FALSE, FALSE); + emit_instruction_opn(emit, opcode, dst, src, NULL, NULL, false, false); } static void @@ -5923,7 +6607,7 @@ emit_instruction_op2(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_src_register *src1, const struct tgsi_full_src_register *src2) { - emit_instruction_opn(emit, opcode, dst, src1, src2, NULL, FALSE, FALSE); + emit_instruction_opn(emit, opcode, dst, src1, src2, NULL, false, false); } static void @@ -5934,7 +6618,7 @@ emit_instruction_op3(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_src_register *src2, const struct tgsi_full_src_register *src3) { - emit_instruction_opn(emit, opcode, dst, src1, src2, src3, FALSE, FALSE); + emit_instruction_opn(emit, opcode, dst, src1, src2, src3, false, false); } static void @@ -5942,7 +6626,7 @@ emit_instruction_op0(struct svga_shader_emitter_v10 *emit, VGPU10_OPCODE_TYPE opcode) { begin_emit_instruction(emit); - emit_opcode(emit, opcode, FALSE); + emit_opcode(emit, opcode, false); end_emit_instruction(emit); } @@ -5959,7 +6643,7 @@ store_tesslevels(struct svga_shader_emitter_v10 *emit) * emitting the inner/outer tessellation levels, either from * values provided in tcs or fallback default values which is 1.0 */ - if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) { + if (emit->key.tcs.prim_mode == MESA_PRIM_QUADS) { struct tgsi_full_src_register temp_src; if (emit->tcs.inner.tgsi_index != INVALID_INDEX) @@ -5990,7 +6674,7 @@ store_tesslevels(struct svga_shader_emitter_v10 *emit) emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); } } - else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) { + else if (emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES) { struct tgsi_full_src_register temp_src; if (emit->tcs.inner.tgsi_index != INVALID_INDEX) @@ -6019,7 +6703,7 @@ store_tesslevels(struct svga_shader_emitter_v10 *emit) emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); } } - else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) { + else if (emit->key.tcs.prim_mode == MESA_PRIM_LINES) { if (emit->tcs.outer.tgsi_index != INVALID_INDEX) { struct tgsi_full_src_register temp_src = make_src_temp_reg(emit->tcs.outer.temp_index); @@ -6144,14 +6828,14 @@ emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit) if (plane_mask & 0xf) { unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index, - VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE, + VGPU10_NAME_CLIP_DISTANCE, cmask, true, SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE); emit->num_outputs++; } if (plane_mask & 0xf0) { unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1, - VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE, + VGPU10_NAME_CLIP_DISTANCE, cmask, true, SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE); emit->num_outputs++; } @@ -6275,7 +6959,7 @@ emit_swap_r_b(struct svga_shader_emitter_v10 *emit, swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W); begin_emit_instruction(emit); - emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); + emit_opcode(emit, VGPU10_OPCODE_MOV, false); emit_dst_register(emit, dst); emit_src_register(emit, &bgra_src); end_emit_instruction(emit); @@ -6371,7 +7055,7 @@ emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit, /** * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction. */ -static boolean +static bool emit_arl_uarl(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -6398,14 +7082,14 @@ emit_arl_uarl(struct svga_shader_emitter_v10 *emit, emit_instruction_op1(emit, opcode, &dst, &inst->Src[0]); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_CAL instruction. */ -static boolean +static bool emit_cal(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -6419,14 +7103,14 @@ emit_cal(struct svga_shader_emitter_v10 *emit, emit_dword(emit, label); end_emit_instruction(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_IABS instruction. */ -static boolean +static bool emit_iabs(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -6442,14 +7126,14 @@ emit_iabs(struct svga_shader_emitter_v10 *emit, emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0], &inst->Src[0], &neg_src); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_CMP instruction. */ -static boolean +static bool emit_cmp(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -6468,22 +7152,22 @@ emit_cmp(struct svga_shader_emitter_v10 *emit, struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); emit_instruction_opn(emit, VGPU10_OPCODE_LT, &tmp_dst, - &inst->Src[0], &zero, NULL, FALSE, + &inst->Src[0], &zero, NULL, false, inst->Instruction.Precise); emit_instruction_opn(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, &inst->Src[1], &inst->Src[2], - inst->Instruction.Saturate, FALSE); + inst->Instruction.Saturate, false); free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_DST instruction. */ -static boolean +static bool emit_dst(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -6555,7 +7239,7 @@ emit_dst(struct svga_shader_emitter_v10 *emit, emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src); free_temp_indexes(emit); - return TRUE; + return true; } @@ -6574,7 +7258,7 @@ find_stream_index(struct svga_shader_emitter_v10 *emit, /** * Emit code for TGSI_OPCODE_ENDPRIM (GS only) */ -static boolean +static bool emit_endprim(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -6588,26 +7272,26 @@ emit_endprim(struct svga_shader_emitter_v10 *emit, /** * If there is no output for this stream, discard this instruction. */ - emit->discard_instruction = TRUE; + emit->discard_instruction = true; } else { - emit_opcode(emit, VGPU10_OPCODE_CUT_STREAM, FALSE); + emit_opcode(emit, VGPU10_OPCODE_CUT_STREAM, false); assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE); emit_stream_register(emit, streamIndex); } } else { - emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE); + emit_opcode(emit, VGPU10_OPCODE_CUT, false); } end_emit_instruction(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_EX2 (2^x) instruction. */ -static boolean +static bool emit_ex2(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -6628,14 +7312,14 @@ emit_ex2(struct svga_shader_emitter_v10 *emit, inst->Instruction.Saturate, inst->Instruction.Precise); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_EXP instruction. */ -static boolean +static bool emit_exp(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -6715,14 +7399,14 @@ emit_exp(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_IF instruction. */ -static boolean +static bool emit_if(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_src_register *src) { @@ -6746,17 +7430,17 @@ emit_if(struct svga_shader_emitter_v10 *emit, emit_src_register(emit, src); end_emit_instruction(emit); - return TRUE; + return true; } /** - * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of + * Emit code for conditional discard instruction (discard fragment if any of * the register components are negative). */ -static boolean -emit_kill_if(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +static bool +emit_cond_discard(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { unsigned tmp = get_temp_index(emit); struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); @@ -6774,8 +7458,9 @@ emit_kill_if(struct svga_shader_emitter_v10 *emit, if (!same_swizzle_terms(&inst->Src[0])) { /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to - * logically OR the swizzle terms. Most uses of KILL_IF only - * test one channel so it's good to avoid these extra steps. + * logically OR the swizzle terms. Most uses of this conditional + * discard instruction only test one channel so it's good to + * avoid these extra steps. */ struct tgsi_full_src_register tmp_src_yyyy = scalar_src(&tmp_src, TGSI_SWIZZLE_Y); @@ -6793,39 +7478,39 @@ emit_kill_if(struct svga_shader_emitter_v10 *emit, } begin_emit_instruction(emit); - emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */ + emit_discard_opcode(emit, true); /* discard if src0.x is non-zero */ emit_src_register(emit, &tmp_src_xxxx); end_emit_instruction(emit); free_temp_indexes(emit); - return TRUE; + return true; } /** - * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard). + * Emit code for the unconditional discard instruction. */ -static boolean -emit_kill(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) +static bool +emit_discard(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); /* DISCARD if 0.0 is zero */ begin_emit_instruction(emit); - emit_discard_opcode(emit, FALSE); + emit_discard_opcode(emit, false); emit_src_register(emit, &zero); end_emit_instruction(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_LG2 instruction. */ -static boolean +static bool emit_lg2(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -6846,14 +7531,14 @@ emit_lg2(struct svga_shader_emitter_v10 *emit, inst->Instruction.Saturate, inst->Instruction.Precise); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_LIT instruction. */ -static boolean +static bool emit_lit(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -6899,7 +7584,7 @@ emit_lit(struct svga_shader_emitter_v10 *emit, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); emit_instruction_opn(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx, - &zero, NULL, inst->Instruction.Saturate, FALSE); + &zero, NULL, inst->Instruction.Saturate, false); } /* @@ -6982,14 +7667,14 @@ emit_lit(struct svga_shader_emitter_v10 *emit, emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src); free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit Level Of Detail Query (LODQ) instruction. */ -static boolean +static bool emit_lodq(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -6999,21 +7684,21 @@ emit_lodq(struct svga_shader_emitter_v10 *emit, /* LOD dst, coord, resource, sampler */ begin_emit_instruction(emit); - emit_opcode(emit, VGPU10_OPCODE_LOD, FALSE); + emit_opcode(emit, VGPU10_OPCODE_LOD, false); emit_dst_register(emit, &inst->Dst[0]); emit_src_register(emit, &inst->Src[0]); /* coord */ emit_resource_register(emit, unit); emit_sampler_register(emit, unit); end_emit_instruction(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_LOG instruction. */ -static boolean +static bool emit_log(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -7047,7 +7732,7 @@ emit_log(struct svga_shader_emitter_v10 *emit, emit_instruction_opn(emit, VGPU10_OPCODE_MOV, &dst_z, &tmp_src, NULL, NULL, - inst->Instruction.Saturate, FALSE); + inst->Instruction.Saturate, false); } /* FLR tmp.x, tmp.x */ @@ -7062,7 +7747,7 @@ emit_log(struct svga_shader_emitter_v10 *emit, emit_instruction_opn(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src, NULL, NULL, - inst->Instruction.Saturate, FALSE); + inst->Instruction.Saturate, false); } /* EXP tmp.x, tmp.x */ @@ -7073,7 +7758,7 @@ emit_log(struct svga_shader_emitter_v10 *emit, emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src); emit_instruction_opn(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx, - &tmp_src, NULL, inst->Instruction.Saturate, FALSE); + &tmp_src, NULL, inst->Instruction.Saturate, false); } /* MOV dst.w, 1.0 */ @@ -7088,14 +7773,14 @@ emit_log(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_LRP instruction. */ -static boolean +static bool emit_lrp(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -7112,7 +7797,7 @@ emit_lrp(struct svga_shader_emitter_v10 *emit, /* ADD tmp, s1, -s2 */ emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_tmp, - &inst->Src[1], &neg_src2, NULL, FALSE, + &inst->Src[1], &neg_src2, NULL, false, inst->Instruction.Precise); /* MAD dst, s1, tmp, s3 */ @@ -7123,14 +7808,14 @@ emit_lrp(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_POW instruction. */ -static boolean +static bool emit_pow(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -7153,12 +7838,12 @@ emit_pow(struct svga_shader_emitter_v10 *emit, /* LOG tmp, s0.xxxx */ emit_instruction_opn(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx, NULL, NULL, - FALSE, inst->Instruction.Precise); + false, inst->Instruction.Precise); /* MUL tmp, tmp, s1.xxxx */ emit_instruction_opn(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src, &src1_xxxx, NULL, - FALSE, inst->Instruction.Precise); + false, inst->Instruction.Precise); /* EXP tmp, s0.xxxx */ emit_instruction_opn(emit, VGPU10_OPCODE_EXP, @@ -7169,14 +7854,14 @@ emit_pow(struct svga_shader_emitter_v10 *emit, /* free tmp */ free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction. */ -static boolean +static bool emit_rcp(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -7213,7 +7898,7 @@ emit_rcp(struct svga_shader_emitter_v10 *emit, /* DIV tmp.x, 1.0, s0 */ emit_instruction_opn(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one, &inst->Src[0], NULL, - FALSE, inst->Instruction.Precise); + false, inst->Instruction.Precise); /* MOV dst, tmp.xxxx */ emit_instruction_opn(emit, VGPU10_OPCODE_MOV, @@ -7224,14 +7909,14 @@ emit_rcp(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); } - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_RSQ instruction. */ -static boolean +static bool emit_rsq(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -7254,7 +7939,7 @@ emit_rsq(struct svga_shader_emitter_v10 *emit, /* RSQ tmp, src.x */ emit_instruction_opn(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x, &inst->Src[0], NULL, NULL, - FALSE, inst->Instruction.Precise); + false, inst->Instruction.Precise); /* MOV dst, tmp.xxxx */ emit_instruction_opn(emit, VGPU10_OPCODE_MOV, @@ -7265,14 +7950,14 @@ emit_rsq(struct svga_shader_emitter_v10 *emit, /* free tmp */ free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction. */ -static boolean +static bool emit_seq(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -7298,14 +7983,14 @@ emit_seq(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction. */ -static boolean +static bool emit_sge(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -7331,14 +8016,14 @@ emit_sge(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction. */ -static boolean +static bool emit_sgt(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -7364,14 +8049,14 @@ emit_sgt(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions. */ -static boolean +static bool emit_sincos(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -7385,7 +8070,7 @@ emit_sincos(struct svga_shader_emitter_v10 *emit, writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); begin_emit_instruction(emit); - emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE); + emit_opcode(emit, VGPU10_OPCODE_SINCOS, false); if(inst->Instruction.Opcode == TGSI_OPCODE_SIN) { @@ -7407,14 +8092,14 @@ emit_sincos(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction. */ -static boolean +static bool emit_sle(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -7440,14 +8125,14 @@ emit_sle(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction. */ -static boolean +static bool emit_slt(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -7473,14 +8158,14 @@ emit_slt(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction. */ -static boolean +static bool emit_sne(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -7506,14 +8191,14 @@ emit_sne(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction. */ -static boolean +static bool emit_ssg(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -7553,14 +8238,14 @@ emit_ssg(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction. */ -static boolean +static bool emit_issg(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -7594,7 +8279,7 @@ emit_issg(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } @@ -7611,7 +8296,7 @@ emit_comparison(struct svga_shader_emitter_v10 *emit, { struct tgsi_full_src_register immediate; VGPU10OpcodeToken0 opcode0; - boolean swapSrc = FALSE; + bool swapSrc = false; /* Sanity checks for svga vs. gallium enums */ STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1)); @@ -7646,11 +8331,11 @@ emit_comparison(struct svga_shader_emitter_v10 *emit, break; case SVGA3D_CMP_LESSEQUAL: opcode0.opcodeType = VGPU10_OPCODE_GE; - swapSrc = TRUE; + swapSrc = true; break; case SVGA3D_CMP_GREATER: opcode0.opcodeType = VGPU10_OPCODE_LT; - swapSrc = TRUE; + swapSrc = true; break; case SVGA3D_CMP_NOTEQUAL: opcode0.opcodeType = VGPU10_OPCODE_NE; @@ -7788,8 +8473,8 @@ emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit, */ struct tex_swizzle_info { - boolean swizzled; - boolean shadow_compare; + bool swizzled; + bool shadow_compare; unsigned unit; enum tgsi_texture_type texture_target; /**< TGSI_TEXTURE_x */ struct tgsi_full_src_register tmp_src; @@ -7810,7 +8495,7 @@ static void begin_tex_swizzle(struct svga_shader_emitter_v10 *emit, unsigned unit, const struct tgsi_full_instruction *inst, - boolean shadow_compare, + bool shadow_compare, struct tex_swizzle_info *swz) { swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X || @@ -7834,7 +8519,7 @@ begin_tex_swizzle(struct svga_shader_emitter_v10 *emit, swz->inst_dst = &inst->Dst[0]; swz->coord_src = &inst->Src[0]; - emit->fs.shadow_compare_units |= shadow_compare << unit; + emit->shadow_compare_units |= shadow_compare << unit; } @@ -7883,7 +8568,7 @@ end_tex_swizzle(struct svga_shader_emitter_v10 *emit, /* AND dest, tmp, {1.0} */ begin_emit_instruction(emit); - emit_opcode(emit, VGPU10_OPCODE_AND, FALSE); + emit_opcode(emit, VGPU10_OPCODE_AND, false); if (swz->swizzled) { emit_dst_register(emit, &swz->tmp_dst); } @@ -7901,7 +8586,7 @@ end_tex_swizzle(struct svga_shader_emitter_v10 *emit, unsigned swz_b = emit->key.tex[swz->unit].swizzle_b; unsigned swz_a = emit->key.tex[swz->unit].swizzle_a; unsigned writemask_0 = 0, writemask_1 = 0; - boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]); + bool int_tex = is_integer_type(emit->sampler_return_type[swz->unit]); /* Swizzle w/out zero/one terms */ struct tgsi_full_src_register src_swizzled = @@ -7957,7 +8642,7 @@ end_tex_swizzle(struct svga_shader_emitter_v10 *emit, /** * Emit code for TGSI_OPCODE_SAMPLE instruction. */ -static boolean +static bool emit_sample(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -7967,7 +8652,7 @@ emit_sample(struct svga_shader_emitter_v10 *emit, int offsets[3]; struct tex_swizzle_info swz_info; - begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info); + begin_tex_swizzle(emit, sampler_unit, inst, false, &swz_info); get_texel_offsets(emit, inst, offsets); @@ -7991,7 +8676,7 @@ emit_sample(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } @@ -8003,18 +8688,18 @@ emit_sample(struct svga_shader_emitter_v10 *emit, * MOV dst, {1,1,1,1}; * \return TRUE if valid, FALSE if invalid. */ -static boolean +static bool is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { const unsigned unit = inst->Src[1].Register.Index; const enum tgsi_texture_type target = inst->Texture.Texture; - boolean valid = TRUE; + bool valid = true; if (tgsi_is_shadow_target(target) && is_integer_type(emit->sampler_return_type[unit])) { debug_printf("Invalid SAMPLE_C with an integer texture!\n"); - valid = FALSE; + valid = false; } /* XXX might check for other conditions in the future here */ @@ -8022,7 +8707,7 @@ is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit, /* emit a MOV dst, {1,1,1,1} instruction. */ struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); begin_emit_instruction(emit); - emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); + emit_opcode(emit, VGPU10_OPCODE_MOV, false); emit_dst_register(emit, &inst->Dst[0]); emit_src_register(emit, &one); end_emit_instruction(emit); @@ -8035,7 +8720,7 @@ is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit, /** * Emit code for TGSI_OPCODE_TEX (simple texture lookup) */ -static boolean +static bool emit_tex(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -8045,12 +8730,16 @@ emit_tex(struct svga_shader_emitter_v10 *emit, struct tgsi_full_src_register coord; int offsets[3]; struct tex_swizzle_info swz_info; + bool compare_in_shader; /* check that the sampler returns a float */ if (!is_valid_tex_instruction(emit, inst)) - return TRUE; + return true; + + compare_in_shader = tgsi_is_shadow_target(target) && + emit->key.tex[unit].compare_in_shader; - begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); + begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info); get_texel_offsets(emit, inst, offsets); @@ -8059,7 +8748,7 @@ emit_tex(struct svga_shader_emitter_v10 *emit, /* SAMPLE dst, coord(s0), resource, sampler */ begin_emit_instruction(emit); - if (tgsi_is_shadow_target(target)) + if (tgsi_is_shadow_target(target) && !compare_in_shader) opcode = VGPU10_OPCODE_SAMPLE_C; else opcode = VGPU10_OPCODE_SAMPLE; @@ -8078,13 +8767,13 @@ emit_tex(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather) */ -static boolean +static bool emit_tg4(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -8095,7 +8784,7 @@ emit_tg4(struct svga_shader_emitter_v10 *emit, /* check that the sampler returns a float */ if (!is_valid_tex_instruction(emit, inst)) - return TRUE; + return true; if (emit->version >= 50) { unsigned target = inst->Texture.Texture; @@ -8129,12 +8818,12 @@ emit_tg4(struct svga_shader_emitter_v10 *emit, if (select_swizzle == PIPE_SWIZZLE_1) { src = make_immediate_reg_float(emit, 1.0); emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src); - return TRUE; + return true; } else if (select_swizzle == PIPE_SWIZZLE_0) { src = make_immediate_reg_float(emit, 0.0); emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src); - return TRUE; + return true; } src = setup_texcoord(emit, unit, &inst->Src[0]); @@ -8182,7 +8871,8 @@ emit_tg4(struct svga_shader_emitter_v10 *emit, emit_resource_register(emit, unit); /* sampler */ - sampler = make_src_reg(TGSI_FILE_SAMPLER, unit); + sampler = make_src_reg(TGSI_FILE_SAMPLER, + emit->key.tex[unit].sampler_index); sampler.Register.SwizzleX = sampler.Register.SwizzleY = sampler.Register.SwizzleZ = @@ -8222,7 +8912,8 @@ emit_tg4(struct svga_shader_emitter_v10 *emit, emit_resource_register(emit, unit); /* sampler */ - sampler = make_src_reg(TGSI_FILE_SAMPLER, unit); + sampler = make_src_reg(TGSI_FILE_SAMPLER, + emit->key.tex[unit].sampler_index); sampler.Register.SwizzleX = sampler.Register.SwizzleY = sampler.Register.SwizzleZ = @@ -8246,7 +8937,7 @@ emit_tg4(struct svga_shader_emitter_v10 *emit, } } - return TRUE; + return true; } @@ -8254,7 +8945,7 @@ emit_tg4(struct svga_shader_emitter_v10 *emit, /** * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays) */ -static boolean +static bool emit_tex2(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -8263,12 +8954,20 @@ emit_tex2(struct svga_shader_emitter_v10 *emit, struct tgsi_full_src_register coord, ref; int offsets[3]; struct tex_swizzle_info swz_info; + VGPU10_OPCODE_TYPE opcode; + bool compare_in_shader; /* check that the sampler returns a float */ if (!is_valid_tex_instruction(emit, inst)) - return TRUE; + return true; - begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); + compare_in_shader = emit->key.tex[unit].compare_in_shader; + if (compare_in_shader) + opcode = VGPU10_OPCODE_SAMPLE; + else + opcode = VGPU10_OPCODE_SAMPLE_C; + + begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info); get_texel_offsets(emit, inst, offsets); @@ -8277,27 +8976,29 @@ emit_tex2(struct svga_shader_emitter_v10 *emit, /* SAMPLE_C dst, coord, resource, sampler, ref */ begin_emit_instruction(emit); - emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_C, + emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); emit_src_register(emit, &coord); emit_resource_register(emit, unit); emit_sampler_register(emit, unit); - emit_tex_compare_refcoord(emit, target, &ref); + if (opcode == VGPU10_OPCODE_SAMPLE_C) { + emit_tex_compare_refcoord(emit, target, &ref); + } end_emit_instruction(emit); end_tex_swizzle(emit, &swz_info); free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_TXP (projective texture) */ -static boolean +static bool emit_txp(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -8312,12 +9013,16 @@ emit_txp(struct svga_shader_emitter_v10 *emit, scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); struct tgsi_full_src_register coord; struct tex_swizzle_info swz_info; + bool compare_in_shader; /* check that the sampler returns a float */ if (!is_valid_tex_instruction(emit, inst)) - return TRUE; + return true; + + compare_in_shader = tgsi_is_shadow_target(target) && + emit->key.tex[unit].compare_in_shader; - begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); + begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info); get_texel_offsets(emit, inst, offsets); @@ -8330,7 +9035,7 @@ emit_txp(struct svga_shader_emitter_v10 *emit, /* SAMPLE dst, coord(tmp), resource, sampler */ begin_emit_instruction(emit); - if (tgsi_is_shadow_target(target)) + if (tgsi_is_shadow_target(target) && !compare_in_shader) /* NOTE: for non-fragment shaders, we should use * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is. */ @@ -8352,14 +9057,14 @@ emit_txp(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_TXD (explicit derivatives) */ -static boolean +static bool emit_txd(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -8392,24 +9097,24 @@ emit_txd(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_TXF (texel fetch) */ -static boolean +static bool emit_txf(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { const uint unit = inst->Src[1].Register.Index; - const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture) + const bool msaa = tgsi_is_msaa_target(inst->Texture.Texture) && emit->key.tex[unit].num_samples > 1; int offsets[3]; struct tex_swizzle_info swz_info; - begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); + begin_tex_swizzle(emit, unit, inst, false, &swz_info); get_texel_offsets(emit, inst, offsets); @@ -8445,7 +9150,7 @@ emit_txf(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } @@ -8453,7 +9158,7 @@ emit_txf(struct svga_shader_emitter_v10 *emit, * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias) * or TGSI_OPCODE_TXB2 (for cube shadow maps). */ -static boolean +static bool emit_txl_txb(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -8504,14 +9209,14 @@ emit_txl_txb(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array. */ -static boolean +static bool emit_txl2(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -8548,14 +9253,14 @@ emit_txl2(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit code for TGSI_OPCODE_TXQ (texture query) instruction. */ -static boolean +static bool emit_txq(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -8582,7 +9287,7 @@ emit_txq(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } @@ -8604,6 +9309,7 @@ opcode_has_dbl_dst(unsigned opcode) case TGSI_OPCODE_DNEG: case TGSI_OPCODE_I2D: case TGSI_OPCODE_U2D: + case TGSI_OPCODE_DFMA: // XXX more TBD return true; default: @@ -8629,7 +9335,21 @@ opcode_has_dbl_src(unsigned opcode) case TGSI_OPCODE_DNEG: case TGSI_OPCODE_D2I: case TGSI_OPCODE_D2U: - // XXX more TBD + case TGSI_OPCODE_DFMA: + case TGSI_OPCODE_DSLT: + case TGSI_OPCODE_DSGE: + case TGSI_OPCODE_DSEQ: + case TGSI_OPCODE_DSNE: + case TGSI_OPCODE_DRCP: + case TGSI_OPCODE_DSQRT: + case TGSI_OPCODE_DMAD: + case TGSI_OPCODE_DLDEXP: + case TGSI_OPCODE_DRSQ: + case TGSI_OPCODE_DTRUNC: + case TGSI_OPCODE_DCEIL: + case TGSI_OPCODE_DFLR: + case TGSI_OPCODE_DROUND: + case TGSI_OPCODE_DSSG: return true; default: return false; @@ -8639,22 +9359,35 @@ opcode_has_dbl_src(unsigned opcode) /** * Check that the swizzle for reading from a double-precision register - * is valid. + * is valid. If not valid, move the source to a temporary register first. */ -static void -check_double_src_swizzle(const struct tgsi_full_src_register *reg) +static struct tgsi_full_src_register +check_double_src(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_src_register *reg) { - assert((reg->Register.SwizzleX == PIPE_SWIZZLE_X && - reg->Register.SwizzleY == PIPE_SWIZZLE_Y) || - (reg->Register.SwizzleX == PIPE_SWIZZLE_Z && - reg->Register.SwizzleY == PIPE_SWIZZLE_W)); + struct tgsi_full_src_register src; - assert((reg->Register.SwizzleZ == PIPE_SWIZZLE_X && - reg->Register.SwizzleW == PIPE_SWIZZLE_Y) || - (reg->Register.SwizzleZ == PIPE_SWIZZLE_Z && - reg->Register.SwizzleW == PIPE_SWIZZLE_W)); -} + if (((reg->Register.SwizzleX == PIPE_SWIZZLE_X && + reg->Register.SwizzleY == PIPE_SWIZZLE_Y) || + (reg->Register.SwizzleX == PIPE_SWIZZLE_Z && + reg->Register.SwizzleY == PIPE_SWIZZLE_W)) && + ((reg->Register.SwizzleZ == PIPE_SWIZZLE_X && + reg->Register.SwizzleW == PIPE_SWIZZLE_Y) || + (reg->Register.SwizzleZ == PIPE_SWIZZLE_Z && + reg->Register.SwizzleW == PIPE_SWIZZLE_W))) { + src = *reg; + } else { + /* move the src to a temporary to fix the swizzle */ + unsigned tmp = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp_dst, reg); + src = tmp_src; + /* The temporary index will be released in the caller */ + } + return src; +} /** * Check that the writemask for a double-precision instruction is valid. @@ -8676,6 +9409,7 @@ check_double_dst_writemask(const struct tgsi_full_instruction *inst) case TGSI_OPCODE_DRCP: case TGSI_OPCODE_DSQRT: case TGSI_OPCODE_F2D: + case TGSI_OPCODE_DFMA: assert(writemask == TGSI_WRITEMASK_XYZW || writemask == TGSI_WRITEMASK_XY || writemask == TGSI_WRITEMASK_ZW); @@ -8699,54 +9433,57 @@ check_double_dst_writemask(const struct tgsi_full_instruction *inst) /** * Double-precision absolute value. */ -static boolean +static bool emit_dabs(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { assert(emit->version >= 50); - check_double_src_swizzle(&inst->Src[0]); + + struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]); check_double_dst_writemask(inst); - struct tgsi_full_src_register abs_src = absolute_src(&inst->Src[0]); + struct tgsi_full_src_register abs_src = absolute_src(&src); /* DMOV dst, |src| */ emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &abs_src); - return TRUE; + free_temp_indexes(emit); + return true; } /** * Double-precision negation */ -static boolean +static bool emit_dneg(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { assert(emit->version >= 50); - check_double_src_swizzle(&inst->Src[0]); + struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]); check_double_dst_writemask(inst); - struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]); + struct tgsi_full_src_register neg_src = negate_src(&src); /* DMOV dst, -src */ emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &neg_src); - return TRUE; + free_temp_indexes(emit); + return true; } /** * SM5 has no DMAD opcode. Implement negation with DMUL/DADD. */ -static boolean +static bool emit_dmad(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { assert(emit->version >= 50); - check_double_src_swizzle(&inst->Src[0]); - check_double_src_swizzle(&inst->Src[1]); - check_double_src_swizzle(&inst->Src[2]); + struct tgsi_full_src_register src0 = check_double_src(emit, &inst->Src[0]); + struct tgsi_full_src_register src1 = check_double_src(emit, &inst->Src[1]); + struct tgsi_full_src_register src2 = check_double_src(emit, &inst->Src[2]); check_double_dst_writemask(inst); unsigned tmp = get_temp_index(emit); @@ -8755,23 +9492,23 @@ emit_dmad(struct svga_shader_emitter_v10 *emit, /* DMUL tmp, src[0], src[1] */ emit_instruction_opn(emit, VGPU10_OPCODE_DMUL, - &tmp_dst, &inst->Src[0], &inst->Src[1], NULL, - FALSE, inst->Instruction.Precise); + &tmp_dst, &src0, &src1, NULL, + false, inst->Instruction.Precise); /* DADD dst, tmp, src[2] */ emit_instruction_opn(emit, VGPU10_OPCODE_DADD, - &inst->Dst[0], &tmp_src, &inst->Src[2], NULL, + &inst->Dst[0], &tmp_src, &src2, NULL, inst->Instruction.Saturate, inst->Instruction.Precise); free_temp_indexes(emit); - return TRUE; + return true; } /** * Double precision reciprocal square root */ -static boolean +static bool emit_drsq(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_dst_register *dst, const struct tgsi_full_src_register *src) @@ -8779,21 +9516,21 @@ emit_drsq(struct svga_shader_emitter_v10 *emit, assert(emit->version >= 50); VGPU10OpcodeToken0 token0; + struct tgsi_full_src_register dsrc = check_double_src(emit, src); + begin_emit_instruction(emit); token0.value = 0; token0.opcodeType = VGPU10_OPCODE_VMWARE; token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DRSQ; emit_dword(emit, token0.value); - emit_dst_register(emit, dst); - - check_double_src_swizzle(src); - emit_src_register(emit, src); - + emit_src_register(emit, &dsrc); end_emit_instruction(emit); - return TRUE; + free_temp_indexes(emit); + + return true; } @@ -8802,13 +9539,13 @@ emit_drsq(struct svga_shader_emitter_v10 *emit, * It will be implemented with DRSQ. * dst = src * DRSQ(src) */ -static boolean +static bool emit_dsqrt(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { assert(emit->version >= 50); - check_double_src_swizzle(&inst->Src[0]); + struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]); /* temporary register to hold the source */ unsigned tmp = get_temp_index(emit); @@ -8843,9 +9580,9 @@ emit_dsqrt(struct svga_shader_emitter_v10 *emit, make_immediate_reg_double(emit, 1.0); emit_instruction_op2(emit, VGPU10_OPCODE_DEQ, &tmp_cond_dst_xy, - &zero, &inst->Src[0]); + &zero, &src); emit_instruction_op3(emit, VGPU10_OPCODE_DMOVC, &tmp_dst, - &tmp_cond_src_xy, &one, &inst->Src[0]); + &tmp_cond_src_xy, &one, &src); struct tgsi_full_dst_register tmp_rsq_dst = make_dst_temp_reg(tmp); struct tgsi_full_src_register tmp_rsq_src = make_src_temp_reg(tmp); @@ -8855,15 +9592,103 @@ emit_dsqrt(struct svga_shader_emitter_v10 *emit, /* DMUL dst, tmp_rsq, src[0] */ emit_instruction_op2(emit, VGPU10_OPCODE_DMUL, &inst->Dst[0], - &tmp_rsq_src, &inst->Src[0]); + &tmp_rsq_src, &src); free_temp_indexes(emit); - return TRUE; + return true; +} + + +/** + * glsl-nir path does not lower DTRUNC, so we need to + * add the translation here. + * + * frac = DFRAC(src) + * tmp = src - frac + * dst = src >= 0 ? tmp : (tmp + (frac==0 ? 0 : 1)) + */ +static bool +emit_dtrunc(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + assert(emit->version >= 50); + + struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]); + + /* frac = DFRAC(src) */ + unsigned frac_index = get_temp_index(emit); + struct tgsi_full_dst_register frac_dst = make_dst_temp_reg(frac_index); + struct tgsi_full_src_register frac_src = make_src_temp_reg(frac_index); + + VGPU10OpcodeToken0 token0; + begin_emit_instruction(emit); + token0.value = 0; + token0.opcodeType = VGPU10_OPCODE_VMWARE; + token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DFRC; + emit_dword(emit, token0.value); + emit_dst_register(emit, &frac_dst); + emit_src_register(emit, &src); + end_emit_instruction(emit); + + /* tmp = src - frac */ + unsigned tmp_index = get_temp_index(emit); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index); + struct tgsi_full_src_register negate_frac_src = negate_src(&frac_src); + emit_instruction_opn(emit, VGPU10_OPCODE_DADD, + &tmp_dst, &src, &negate_frac_src, NULL, + inst->Instruction.Saturate, inst->Instruction.Precise); + + /* cond = frac==0 */ + unsigned cond_index = get_temp_index(emit); + struct tgsi_full_dst_register cond_dst = make_dst_temp_reg(cond_index); + struct tgsi_full_src_register cond_src = make_src_temp_reg(cond_index); + struct tgsi_full_src_register zero = + make_immediate_reg_double(emit, 0); + + /* Only use one or two components for double opcode */ + cond_dst = writemask_dst(&cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y); + + emit_instruction_opn(emit, VGPU10_OPCODE_DEQ, + &cond_dst, &frac_src, &zero, NULL, + inst->Instruction.Saturate, inst->Instruction.Precise); + + /* tmp2 = cond ? 0 : 1 */ + unsigned tmp2_index = get_temp_index(emit); + struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2_index); + struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2_index); + struct tgsi_full_src_register cond_src_xy = + swizzle_src(&cond_src, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, + PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y); + struct tgsi_full_src_register one = + make_immediate_reg_double(emit, 1.0); + + emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC, + &tmp2_dst, &cond_src_xy, &zero, &one, + inst->Instruction.Saturate, inst->Instruction.Precise); + + /* tmp2 = tmp + tmp2 */ + emit_instruction_opn(emit, VGPU10_OPCODE_DADD, + &tmp2_dst, &tmp_src, &tmp2_src, NULL, + inst->Instruction.Saturate, inst->Instruction.Precise); + + /* cond = src>=0 */ + emit_instruction_opn(emit, VGPU10_OPCODE_DGE, + &cond_dst, &src, &zero, NULL, + inst->Instruction.Saturate, inst->Instruction.Precise); + + /* dst = cond ? tmp : tmp2 */ + emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC, + &inst->Dst[0], &cond_src_xy, &tmp_src, &tmp2_src, + inst->Instruction.Saturate, inst->Instruction.Precise); + + free_temp_indexes(emit); + return true; } -static boolean +static bool emit_interp_offset(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -8895,14 +9720,14 @@ emit_interp_offset(struct svga_shader_emitter_v10 *emit, free_temp_indexes(emit); - return TRUE; + return true; } /** * Emit a simple instruction (like ADD, MUL, MIN, etc). */ -static boolean +static bool emit_simple(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -8912,6 +9737,8 @@ emit_simple(struct svga_shader_emitter_v10 *emit, const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode); unsigned i; + struct tgsi_full_src_register src[3]; + if (inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) { emit->current_loop_depth++; } @@ -8919,6 +9746,13 @@ emit_simple(struct svga_shader_emitter_v10 *emit, emit->current_loop_depth--; } + for (i = 0; i < op->num_src; i++) { + if (dbl_src) + src[i] = check_double_src(emit, &inst->Src[i]); + else + src[i] = inst->Src[i]; + } + begin_emit_instruction(emit); emit_opcode_precise(emit, translate_opcode(inst->Instruction.Opcode), inst->Instruction.Saturate, @@ -8930,14 +9764,12 @@ emit_simple(struct svga_shader_emitter_v10 *emit, emit_dst_register(emit, &inst->Dst[i]); } for (i = 0; i < op->num_src; i++) { - if (dbl_src) { - check_double_src_swizzle(&inst->Src[i]); - } - emit_src_register(emit, &inst->Src[i]); + emit_src_register(emit, &src[i]); } end_emit_instruction(emit); - return TRUE; + free_temp_indexes(emit); + return true; } @@ -8949,7 +9781,7 @@ emit_simple(struct svga_shader_emitter_v10 *emit, * To get correct location as per glsl from SM5 device, we should * return (31 - index) if returned index is not -1. */ -static boolean +static bool emit_msb(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -9004,7 +9836,7 @@ emit_msb(struct svga_shader_emitter_v10 *emit, src_swizzle = src_swizzle + 1; } free_temp_indexes(emit); - return TRUE; + return true; } @@ -9017,7 +9849,7 @@ emit_msb(struct svga_shader_emitter_v10 *emit, * Note: SM5 has width & offset range (0-31); * whereas GLSL has width & offset range (0-32) */ -static boolean +static bool emit_bfe(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -9075,7 +9907,7 @@ emit_bfe(struct svga_shader_emitter_v10 *emit, emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); free_temp_indexes(emit); - return TRUE; + return true; } @@ -9088,7 +9920,7 @@ emit_bfe(struct svga_shader_emitter_v10 *emit, * Note: SM5 has width & offset range (0-31); * whereas GLSL has width & offset range (0-32) */ -static boolean +static bool emit_bfi(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -9153,7 +9985,7 @@ emit_bfi(struct svga_shader_emitter_v10 *emit, emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); free_temp_indexes(emit); - return TRUE; + return true; } @@ -9161,7 +9993,7 @@ emit_bfi(struct svga_shader_emitter_v10 *emit, * We only special case the MOV instruction to try to detect constant * color writes in the fragment shader. */ -static boolean +static bool emit_mov(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { @@ -9173,7 +10005,7 @@ emit_mov(struct svga_shader_emitter_v10 *emit, dst->Register.Index == 0 && src->Register.File == TGSI_FILE_CONSTANT && !src->Register.Indirect) { - emit->constant_color_output = TRUE; + emit->constant_color_output = true; } return emit_simple(emit, inst); @@ -9184,7 +10016,7 @@ emit_mov(struct svga_shader_emitter_v10 *emit, * Emit a simple VGPU10 instruction which writes to multiple dest registers, * where TGSI only uses one dest register. */ -static boolean +static bool emit_simple_1dst(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst, unsigned dst_count, @@ -9210,14 +10042,14 @@ emit_simple_1dst(struct svga_shader_emitter_v10 *emit, } end_emit_instruction(emit); - return TRUE; + return true; } /** * Emit a vmware specific VGPU10 instruction. */ -static boolean +static bool emit_vmware(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst, VGPU10_VMWARE_OPCODE_TYPE subopcode) @@ -9227,8 +10059,15 @@ emit_vmware(struct svga_shader_emitter_v10 *emit, const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode); const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode); - unsigned i; + struct tgsi_full_src_register src[3]; + + for (i = 0; i < op->num_src; i++) { + if (dbl_src) + src[i] = check_double_src(emit, &inst->Src[i]); + else + src[i] = inst->Src[i]; + } begin_emit_instruction(emit); @@ -9253,29 +10092,894 @@ emit_vmware(struct svga_shader_emitter_v10 *emit, } for (i = 0; i < op->num_src; i++) { - if (dbl_src) { - check_double_src_swizzle(&inst->Src[i]); + emit_src_register(emit, &src[i]); + } + end_emit_instruction(emit); + + free_temp_indexes(emit); + return true; +} + +/** + * Emit a memory register + */ + +typedef enum { + MEM_STORE = 0, + MEM_LOAD = 1, + MEM_ATOMIC_COUNTER +} memory_op; + +static void +emit_memory_register(struct svga_shader_emitter_v10 *emit, + memory_op mem_op, + const struct tgsi_full_instruction *inst, + unsigned regIndex, unsigned writemask) +{ + VGPU10OperandToken0 operand0; + unsigned resIndex = 0; + + operand0.value = 0; + operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + + switch (mem_op) { + case MEM_ATOMIC_COUNTER: + { + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + resIndex = inst->Src[regIndex].Register.Index; + break; + } + case MEM_STORE: + { + const struct tgsi_full_dst_register *reg = &inst->Dst[regIndex]; + + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; + operand0.mask = writemask; + resIndex = reg->Register.Index; + break; + } + case MEM_LOAD: + { + const struct tgsi_full_src_register *reg = &inst->Src[regIndex]; + + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; + operand0.swizzleX = reg->Register.SwizzleX; + operand0.swizzleY = reg->Register.SwizzleY; + operand0.swizzleZ = reg->Register.SwizzleZ; + operand0.swizzleW = reg->Register.SwizzleW; + resIndex = reg->Register.Index; + break; + } + default: + assert(!"Unexpected memory opcode"); + break; + } + + emit_dword(emit, operand0.value); + emit_dword(emit, resIndex); +} + + +typedef enum { + UAV_STORE = 0, + UAV_LOAD = 1, + UAV_ATOMIC = 2, + UAV_RESQ = 3, +} UAV_OP; + + +/** + * Emit a uav register + * \param uav_index index of resource register + * \param uav_op UAV_STORE/ UAV_LOAD/ UAV_ATOMIC depending on opcode + * \param resourceType resource file type + * \param writemask resource writemask + */ + +static void +emit_uav_register(struct svga_shader_emitter_v10 *emit, + unsigned res_index, UAV_OP uav_op, + enum tgsi_file_type resourceType, unsigned writemask) +{ + VGPU10OperandToken0 operand0; + unsigned uav_index = INVALID_INDEX; + + operand0.value = 0; + operand0.operandType = VGPU10_OPERAND_TYPE_UAV; + operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; + operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; + + switch (resourceType) { + case TGSI_FILE_IMAGE: + uav_index = emit->key.images[res_index].uav_index; + break; + case TGSI_FILE_BUFFER: + uav_index = emit->key.shader_buf_uav_index[res_index]; + break; + case TGSI_FILE_HW_ATOMIC: + uav_index = emit->key.atomic_buf_uav_index[res_index]; + break; + default: + assert(0); + } + + switch (uav_op) { + case UAV_ATOMIC: + operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; + break; + + case UAV_STORE: + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; + operand0.mask = writemask; + break; + + case UAV_LOAD: + case UAV_RESQ: + operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; + operand0.swizzleX = VGPU10_COMPONENT_X; + operand0.swizzleY = VGPU10_COMPONENT_Y; + operand0.swizzleZ = VGPU10_COMPONENT_Z; + operand0.swizzleW = VGPU10_COMPONENT_W; + break; + + default: + break; + } + + emit_dword(emit, operand0.value); + emit_dword(emit, uav_index); +} + + +/** + * A helper function to emit the uav address. + * For memory, buffer, and image resource, it is set to the specified address. + * For HW atomic counter, the address is the sum of the address offset and the + * offset into the HW atomic buffer as specified by the register index. + * It is also possible to specify the counter index as an indirect address. + * And in this case, the uav address will be the sum of the address offset and the + * counter index specified in the indirect address. + */ +static +struct tgsi_full_src_register +emit_uav_addr_offset(struct svga_shader_emitter_v10 *emit, + enum tgsi_file_type resourceType, + unsigned resourceIndex, + unsigned resourceIndirect, + unsigned resourceIndirectIndex, + const struct tgsi_full_src_register *addr_reg) +{ + unsigned addr_tmp; + struct tgsi_full_dst_register addr_dst; + struct tgsi_full_src_register addr_src; + struct tgsi_full_src_register two = make_immediate_reg_int(emit, 2); + struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0); + + addr_tmp = get_temp_index(emit); + addr_dst = make_dst_temp_reg(addr_tmp); + addr_src = make_src_temp_reg(addr_tmp); + + /* specified address offset */ + if (addr_reg) + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst, addr_reg); + else + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst, &zero); + + /* For HW atomic counter, we need to find the index to the + * HW atomic buffer. + */ + if (resourceType == TGSI_FILE_HW_ATOMIC) { + if (resourceIndirect) { + + /** + * uav addr offset = counter layout offset + + * counter indirect index address + address offset + */ + + /* counter layout offset */ + struct tgsi_full_src_register layout_offset; + layout_offset = + make_immediate_reg_int(emit, resourceIndex); + + /* counter layout offset + address offset */ + emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst, + &addr_src, &layout_offset); + + /* counter indirect index address */ + unsigned indirect_addr = + emit->address_reg_index[resourceIndirectIndex]; + + struct tgsi_full_src_register indirect_addr_src = + make_src_temp_reg(indirect_addr); + + indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X); + + /* counter layout offset + address offset + counter indirect address */ + emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst, + &addr_src, &indirect_addr_src); + + } else { + struct tgsi_full_src_register index_src; + + index_src = make_immediate_reg_int(emit, resourceIndex); + + /* uav addr offset = counter index address + address offset */ + emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &addr_dst, + &addr_src, &index_src); } - emit_src_register(emit, &inst->Src[i]); + + /* HW atomic buffer is declared as raw buffer, so the buffer address is + * the byte offset, so we need to multiple the counter addr offset by 4. + */ + emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &addr_dst, + &addr_src, &two); + } + else if (resourceType == TGSI_FILE_IMAGE) { + if ((emit->key.images[resourceIndex].resource_target == PIPE_TEXTURE_3D) + && emit->key.images[resourceIndex].is_single_layer) { + + struct tgsi_full_dst_register addr_dst_z = + writemask_dst(&addr_dst, TGSI_WRITEMASK_Z); + + /* For non-layered 3D texture image view, we have to make sure the z + * component of the address offset is set to 0. + */ + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst_z, + &zero); + } + } + + return addr_src; +} + + + +/** + * A helper function to expand indirect indexing to uav resource + * by looping through the resource array, compare the indirect index and + * emit the instruction for each resource in the array. + */ +static void +loop_instruction(unsigned index, unsigned count, + struct tgsi_full_src_register *addr_index, + void (*fb)(struct svga_shader_emitter_v10 *, + const struct tgsi_full_instruction *, unsigned), + struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + if (count == 0) + return; + + if (index > 0) { + /* ELSE */ + emit_instruction_op0(emit, VGPU10_OPCODE_ELSE); + } + + struct tgsi_full_src_register index_src = + make_immediate_reg_int(emit, index); + + unsigned tmp_index = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index); + struct tgsi_full_src_register tmp_src_x = + scalar_src(&tmp_src, TGSI_SWIZZLE_X); + struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index); + + /* IEQ tmp, addr_tmp_index, index */ + emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, &tmp_dst, + addr_index, &index_src); + + /* IF tmp */ + emit_if(emit, &tmp_src_x); + + free_temp_indexes(emit); + + (*fb)(emit, inst, index); + + loop_instruction(index+1, count-1, addr_index, fb, emit, inst); + + /* ENDIF */ + emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); +} + + +/** + * A helper function to emit the load instruction. + */ +static void +emit_load_instruction(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst, + unsigned resourceIndex) +{ + VGPU10OpcodeToken0 token0; + struct tgsi_full_src_register addr_src; + enum tgsi_file_type resourceType = inst->Src[0].Register.File; + + /* Resolve the resource address for this resource first */ + addr_src = emit_uav_addr_offset(emit, resourceType, resourceIndex, + inst->Src[0].Register.Indirect, + inst->Src[0].Indirect.Index, + &inst->Src[1]); + + /* LOAD resource, address, src */ + begin_emit_instruction(emit); + + token0.value = 0; + + if (resourceType == TGSI_FILE_MEMORY || + resourceType == TGSI_FILE_BUFFER || + resourceType == TGSI_FILE_HW_ATOMIC) { + token0.opcodeType = VGPU10_OPCODE_LD_RAW; + addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X); + } + else { + token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED; } + + token0.saturate = inst->Instruction.Saturate, + emit_dword(emit, token0.value); + + emit_dst_register(emit, &inst->Dst[0]); + emit_src_register(emit, &addr_src); + + if (resourceType == TGSI_FILE_MEMORY) { + emit_memory_register(emit, MEM_LOAD, inst, 0, 0); + } else if (resourceType == TGSI_FILE_HW_ATOMIC) { + emit_uav_register(emit, inst->Src[0].Dimension.Index, + UAV_LOAD, inst->Src[0].Register.File, 0); + } else if (resourceType == TGSI_FILE_BUFFER) { + if (emit->raw_shaderbufs & (1 << resourceIndex)) + emit_resource_register(emit, resourceIndex + + emit->raw_shaderbuf_srv_start_index); + else + emit_uav_register(emit, resourceIndex, + UAV_LOAD, inst->Src[0].Register.File, 0); + } else { + emit_uav_register(emit, resourceIndex, + UAV_LOAD, inst->Src[0].Register.File, 0); + } + end_emit_instruction(emit); - return TRUE; + free_temp_indexes(emit); } /** - * Translate a single TGSI instruction to VGPU10. + * Emit uav / memory load instruction */ -static boolean -emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, - unsigned inst_number, - const struct tgsi_full_instruction *inst) +static bool +emit_load(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) { - const enum tgsi_opcode opcode = inst->Instruction.Opcode; + enum tgsi_file_type resourceType = inst->Src[0].Register.File; + unsigned resourceIndex = inst->Src[0].Register.Index; - if (emit->skip_instruction) - return TRUE; + /* If the resource register has indirect index, we will need + * to expand it since SM5 device does not support indirect indexing + * for uav. + */ + if (inst->Src[0].Register.Indirect && + (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) { + + unsigned indirect_index = inst->Src[0].Indirect.Index; + unsigned num_resources = + resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs : + emit->num_images; + + /* indirect index tmp register */ + unsigned indirect_addr = emit->address_reg_index[indirect_index]; + struct tgsi_full_src_register indirect_addr_src = + make_src_temp_reg(indirect_addr); + indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X); + + /* Add offset to the indirect index */ + if (inst->Src[0].Register.Index != 0) { + struct tgsi_full_src_register offset = + make_immediate_reg_int(emit, inst->Src[0].Register.Index); + struct tgsi_full_dst_register indirect_addr_dst = + make_dst_temp_reg(indirect_addr); + emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst, + &indirect_addr_src, &offset); + } + + /* Loop through the resource array to find which resource to use. + */ + loop_instruction(0, num_resources, &indirect_addr_src, + emit_load_instruction, emit, inst); + } + else { + emit_load_instruction(emit, inst, resourceIndex); + } + + free_temp_indexes(emit); + + return true; +} + + +/** + * A helper function to emit a store instruction. + */ +static void +emit_store_instruction(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst, + unsigned resourceIndex) +{ + VGPU10OpcodeToken0 token0; + enum tgsi_file_type resourceType = inst->Dst[0].Register.File; + unsigned writemask = inst->Dst[0].Register.WriteMask; + struct tgsi_full_src_register addr_src; + + unsigned tmp_index = get_temp_index(emit); + struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index); + struct tgsi_full_dst_register tmp_dst_xyzw = make_dst_temp_reg(tmp_index); + struct tgsi_full_dst_register tmp_dst; + + struct tgsi_full_src_register src = inst->Src[1]; + struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4); + + bool needLoad = false; + bool needPerComponentStore = false; + unsigned swizzles = 0; + + /* Resolve the resource address for this resource first */ + addr_src = emit_uav_addr_offset(emit, resourceType, + inst->Dst[0].Register.Index, + inst->Dst[0].Register.Indirect, + inst->Dst[0].Indirect.Index, + &inst->Src[0]); + + /* First check the writemask to see if it can be supported + * by the store instruction. + * store_raw only allows .x, .xy, .xyz, .xyzw. For the typeless memory, + * we can adjust the address offset, and do a per-component store. + * store_uav_typed only allows .xyzw. In this case, we need to + * do a load first, update the temporary and then issue the + * store. This does have a small risk that if different threads + * update different components of the same address, data might not be + * in sync. + */ + if (resourceType == TGSI_FILE_IMAGE) { + needLoad = (writemask == TGSI_WRITEMASK_XYZW) ? false : true; + } + else if (resourceType == TGSI_FILE_BUFFER || + resourceType == TGSI_FILE_MEMORY) { + if (!(writemask == TGSI_WRITEMASK_X || writemask == TGSI_WRITEMASK_XY || + writemask == TGSI_WRITEMASK_XYZ || + writemask == TGSI_WRITEMASK_XYZW)) { + needPerComponentStore = true; + } + } + + if (needLoad) { + assert(resourceType == TGSI_FILE_IMAGE); + + /* LOAD resource, address, src */ + begin_emit_instruction(emit); + + token0.value = 0; + token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED; + token0.saturate = inst->Instruction.Saturate, + emit_dword(emit, token0.value); + + emit_dst_register(emit, &tmp_dst_xyzw); + emit_src_register(emit, &addr_src); + emit_uav_register(emit, resourceIndex, UAV_LOAD, resourceType, 0); + + end_emit_instruction(emit); + + /* MOV tmp(writemask) src */ + tmp_dst = writemask_dst(&tmp_dst_xyzw, writemask); + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp_dst, &inst->Src[1]); + + /* Now set the writemask to xyzw for the store_uav_typed instruction */ + writemask = TGSI_WRITEMASK_XYZW; + } + else if (needPerComponentStore) { + /* Save the src swizzles */ + swizzles = src.Register.SwizzleX | + src.Register.SwizzleY << 2 | + src.Register.SwizzleZ << 4 | + src.Register.SwizzleW << 6; + } + + bool storeDone = false; + unsigned perComponentWritemask = writemask; + unsigned shift = 0; + struct tgsi_full_src_register shift_src; + + while (!storeDone) { + + if (needPerComponentStore) { + assert(perComponentWritemask); + while (!(perComponentWritemask & TGSI_WRITEMASK_X)) { + shift++; + perComponentWritemask >>= 1; + } + + /* First adjust the addr_src to the next component */ + if (shift != 0) { + struct tgsi_full_dst_register addr_dst = + make_dst_temp_reg(addr_src.Register.Index); + shift_src = make_immediate_reg_int(emit, shift); + emit_instruction_op3(emit, VGPU10_OPCODE_UMAD, &addr_dst, &four, + &shift_src, &addr_src); + + /* Adjust the src swizzle as well */ + swizzles >>= (shift * 2); + } + + /* Now the address offset is set to the next component, + * we can set the writemask to .x and make sure to set + * the src swizzle as well. + */ + src.Register.SwizzleX = swizzles & 0x3; + writemask = TGSI_WRITEMASK_X; + + /* Shift for the next component check */ + perComponentWritemask >>= 1; + shift = 1; + } + + /* STORE resource, address, src */ + begin_emit_instruction(emit); + + token0.value = 0; + token0.saturate = inst->Instruction.Saturate; + + if (resourceType == TGSI_FILE_MEMORY) { + token0.opcodeType = VGPU10_OPCODE_STORE_RAW; + addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X); + emit_dword(emit, token0.value); + emit_memory_register(emit, MEM_STORE, inst, 0, writemask); + } + else if (resourceType == TGSI_FILE_BUFFER || + resourceType == TGSI_FILE_HW_ATOMIC) { + token0.opcodeType = VGPU10_OPCODE_STORE_RAW; + addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X); + emit_dword(emit, token0.value); + emit_uav_register(emit, resourceIndex, UAV_STORE, + resourceType, writemask); + } + else { + token0.opcodeType = VGPU10_OPCODE_STORE_UAV_TYPED; + emit_dword(emit, token0.value); + emit_uav_register(emit, resourceIndex, UAV_STORE, + resourceType, writemask); + } + + emit_src_register(emit, &addr_src); + + if (needLoad) + emit_src_register(emit, &tmp_src); + else + emit_src_register(emit, &src); + + end_emit_instruction(emit); + + if (!needPerComponentStore || !perComponentWritemask) + storeDone = true; + } + + free_temp_indexes(emit); +} + + +/** + * Emit uav / memory store instruction + */ +static bool +emit_store(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + enum tgsi_file_type resourceType = inst->Dst[0].Register.File; + unsigned resourceIndex = inst->Dst[0].Register.Index; + + /* If the resource register has indirect index, we will need + * to expand it since SM5 device does not support indirect indexing + * for uav. + */ + if (inst->Dst[0].Register.Indirect && + (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) { + + unsigned indirect_index = inst->Dst[0].Indirect.Index; + unsigned num_resources = + resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs : + emit->num_images; + + /* Indirect index tmp register */ + unsigned indirect_addr = emit->address_reg_index[indirect_index]; + struct tgsi_full_src_register indirect_addr_src = + make_src_temp_reg(indirect_addr); + indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X); + + /* Add offset to the indirect index */ + if (inst->Dst[0].Register.Index != 0) { + struct tgsi_full_src_register offset = + make_immediate_reg_int(emit, inst->Dst[0].Register.Index); + struct tgsi_full_dst_register indirect_addr_dst = + make_dst_temp_reg(indirect_addr); + emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst, + &indirect_addr_src, &offset); + } + + /* Loop through the resource array to find which resource to use. + */ + loop_instruction(0, num_resources, &indirect_addr_src, + emit_store_instruction, emit, inst); + } + else { + emit_store_instruction(emit, inst, resourceIndex); + } + + free_temp_indexes(emit); + + return true; +} + + +/** + * A helper function to emit an atomic instruction. + */ + +static void +emit_atomic_instruction(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst, + unsigned resourceIndex) +{ + VGPU10OpcodeToken0 token0; + enum tgsi_file_type resourceType = inst->Src[0].Register.File; + struct tgsi_full_src_register addr_src; + VGPU10_OPCODE_TYPE opcode = emit->cur_atomic_opcode; + const struct tgsi_full_src_register *offset; + + /* ntt does not specify offset for HWATOMIC. So just set offset to NULL. */ + offset = resourceType == TGSI_FILE_HW_ATOMIC ? NULL : &inst->Src[1]; + + /* Resolve the resource address */ + addr_src = emit_uav_addr_offset(emit, resourceType, + inst->Src[0].Register.Index, + inst->Src[0].Register.Indirect, + inst->Src[0].Indirect.Index, + offset); + + /* Emit the atomic operation */ + begin_emit_instruction(emit); + + token0.value = 0; + token0.opcodeType = opcode; + token0.saturate = inst->Instruction.Saturate, + emit_dword(emit, token0.value); + + emit_dst_register(emit, &inst->Dst[0]); + + if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) { + emit_memory_register(emit, MEM_ATOMIC_COUNTER, inst, 0, 0); + } else if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC) { + assert(inst->Src[0].Register.Dimension == 1); + emit_uav_register(emit, inst->Src[0].Dimension.Index, + UAV_ATOMIC, inst->Src[0].Register.File, 0); + } else { + emit_uav_register(emit, resourceIndex, + UAV_ATOMIC, inst->Src[0].Register.File, 0); + } + + /* resource address offset */ + emit_src_register(emit, &addr_src); + + struct tgsi_full_src_register src0_x = + swizzle_src(&inst->Src[2], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); + emit_src_register(emit, &src0_x); + + if (opcode == VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH) { + struct tgsi_full_src_register src1_x = + swizzle_src(&inst->Src[3], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); + + emit_src_register(emit, &src1_x); + } + + end_emit_instruction(emit); + + free_temp_indexes(emit); +} + + +/** + * Emit atomic instruction + */ +static bool +emit_atomic(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst, + VGPU10_OPCODE_TYPE opcode) +{ + enum tgsi_file_type resourceType = inst->Src[0].Register.File; + unsigned resourceIndex = inst->Src[0].Register.Index; + + emit->cur_atomic_opcode = opcode; + + /* If the resource register has indirect index, we will need + * to expand it since SM5 device does not support indirect indexing + * for uav. + */ + if (inst->Dst[0].Register.Indirect && + (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) { + + unsigned indirect_index = inst->Dst[0].Indirect.Index; + unsigned num_resources = + resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs : + emit->num_images; + + /* indirect index tmp register */ + unsigned indirect_addr = emit->address_reg_index[indirect_index]; + struct tgsi_full_src_register indirect_addr_src = + make_src_temp_reg(indirect_addr); + indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X); + + /* Loop through the resource array to find which resource to use. + */ + loop_instruction(0, num_resources, &indirect_addr_src, + emit_atomic_instruction, emit, inst); + } + else { + emit_atomic_instruction(emit, inst, resourceIndex); + } + + free_temp_indexes(emit); + + return true; +} + + +/** + * Emit barrier instruction + */ +static bool +emit_barrier(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + VGPU10OpcodeToken0 token0; + + assert(emit->version >= 50); + + token0.value = 0; + token0.opcodeType = VGPU10_OPCODE_SYNC; + + if (emit->unit == PIPE_SHADER_TESS_CTRL && emit->version == 50) { + /* SM5 device doesn't support BARRIER in tcs . If barrier is used + * in shader, don't do anything for this opcode and continue rest + * of shader translation + */ + util_debug_message(&emit->svga_debug_callback, INFO, + "barrier instruction is not supported in tessellation control shader\n"); + return true; + } + else if (emit->unit == PIPE_SHADER_COMPUTE) { + if (emit->cs.shared_memory_declared) + token0.syncThreadGroupShared = 1; + + if (emit->uav_declared) + token0.syncUAVMemoryGroup = 1; + + token0.syncThreadsInGroup = 1; + } else { + token0.syncUAVMemoryGlobal = 1; + } + + assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup || + token0.syncThreadGroupShared); + + begin_emit_instruction(emit); + emit_dword(emit, token0.value); + end_emit_instruction(emit); + + return true; +} + +/** + * Emit memory barrier instruction + */ +static bool +emit_memory_barrier(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + unsigned index = inst->Src[0].Register.Index; + unsigned swizzle = inst->Src[0].Register.SwizzleX; + unsigned bartype = emit->immediates[index][swizzle].Int; + VGPU10OpcodeToken0 token0; + + token0.value = 0; + token0.opcodeType = VGPU10_OPCODE_SYNC; + + if (emit->unit == PIPE_SHADER_COMPUTE) { + + /* For compute shader, issue sync opcode with different options + * depending on the memory barrier type. + * + * Bit 0: Shader storage buffers + * Bit 1: Atomic buffers + * Bit 2: Images + * Bit 3: Shared memory + * Bit 4: Thread group + */ + + if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER | + TGSI_MEMBAR_SHADER_IMAGE)) + token0.syncUAVMemoryGlobal = 1; + else if (bartype & TGSI_MEMBAR_THREAD_GROUP) + token0.syncUAVMemoryGroup = 1; + + if (bartype & TGSI_MEMBAR_SHARED) + token0.syncThreadGroupShared = 1; + } + else { + /** + * For graphics stages, only sync_uglobal is available. + */ + if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER | + TGSI_MEMBAR_SHADER_IMAGE)) + token0.syncUAVMemoryGlobal = 1; + } + + assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup || + token0.syncThreadGroupShared); + + begin_emit_instruction(emit); + emit_dword(emit, token0.value); + end_emit_instruction(emit); + + return true; +} + + +/** + * Emit code for TGSI_OPCODE_RESQ (image size) instruction. + */ +static bool +emit_resq(struct svga_shader_emitter_v10 *emit, + const struct tgsi_full_instruction *inst) +{ + struct tgsi_full_src_register zero = + make_immediate_reg_int(emit, 0); + + unsigned uav_resource = emit->image[inst->Src[0].Register.Index].Resource; + + if (uav_resource == TGSI_TEXTURE_CUBE_ARRAY) { + struct tgsi_full_src_register image_src; + + image_src = make_src_const_reg(emit->image_size_index + inst->Src[0].Register.Index); + + emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &image_src); + return true; + } + + begin_emit_instruction(emit); + if (uav_resource == TGSI_TEXTURE_BUFFER) { + emit_opcode(emit, VGPU10_OPCODE_BUFINFO, false); + emit_dst_register(emit, &inst->Dst[0]); + } + else { + emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT); + emit_dst_register(emit, &inst->Dst[0]); + emit_src_register(emit, &zero); + } + emit_uav_register(emit, inst->Src[0].Register.Index, + UAV_RESQ, inst->Src[0].Register.File, 0); + end_emit_instruction(emit); + + return true; +} + + +static bool +emit_instruction(struct svga_shader_emitter_v10 *emit, + unsigned inst_number, + const struct tgsi_full_instruction *inst) +{ + const enum tgsi_opcode opcode = inst->Instruction.Opcode; switch (opcode) { case TGSI_OPCODE_ADD: @@ -9381,7 +11085,7 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, return emit_arl_uarl(emit, inst); case TGSI_OPCODE_BGNSUB: /* no-op */ - return TRUE; + return true; case TGSI_OPCODE_CAL: return emit_cal(emit, inst); case TGSI_OPCODE_CMP: @@ -9397,9 +11101,9 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, case TGSI_OPCODE_IF: return emit_if(emit, &inst->Src[0]); case TGSI_OPCODE_KILL: - return emit_kill(emit, inst); + return emit_discard(emit, inst); case TGSI_OPCODE_KILL_IF: - return emit_kill_if(emit, inst); + return emit_cond_discard(emit, inst); case TGSI_OPCODE_LG2: return emit_lg2(emit, inst); case TGSI_OPCODE_LIT: @@ -9496,63 +11200,115 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, return emit_simple(emit, inst); case TGSI_OPCODE_INTERP_OFFSET: return emit_interp_offset(emit, inst); + case TGSI_OPCODE_FMA: + case TGSI_OPCODE_DFMA: + return emit_simple(emit, inst); + + case TGSI_OPCODE_DTRUNC: + return emit_dtrunc(emit, inst); /* The following opcodes should never be seen here. We return zero - * for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED, - * FMA_SUPPORTED, LDEXP_SUPPORTED queries. + * for PIPE_CAP_TGSI_DROUND_SUPPORTED. */ - case TGSI_OPCODE_FMA: case TGSI_OPCODE_LDEXP: case TGSI_OPCODE_DSSG: - case TGSI_OPCODE_DFRACEXP: case TGSI_OPCODE_DLDEXP: - case TGSI_OPCODE_DTRUNC: case TGSI_OPCODE_DCEIL: case TGSI_OPCODE_DFLR: debug_printf("Unexpected TGSI opcode %s. " "Should have been translated away by the GLSL compiler.\n", tgsi_get_opcode_name(opcode)); - return FALSE; + return false; case TGSI_OPCODE_LOAD: + return emit_load(emit, inst); + case TGSI_OPCODE_STORE: + return emit_store(emit, inst); + case TGSI_OPCODE_ATOMAND: + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_AND); + case TGSI_OPCODE_ATOMCAS: + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH); + case TGSI_OPCODE_ATOMIMAX: + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMAX); + case TGSI_OPCODE_ATOMIMIN: + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMIN); + case TGSI_OPCODE_ATOMOR: + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_OR); + case TGSI_OPCODE_ATOMUADD: + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IADD); + case TGSI_OPCODE_ATOMUMAX: + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMAX); + case TGSI_OPCODE_ATOMUMIN: + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMIN); + case TGSI_OPCODE_ATOMXCHG: + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_EXCH); + case TGSI_OPCODE_ATOMXOR: - return FALSE; + return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_XOR); + case TGSI_OPCODE_BARRIER: - if (emit->unit == PIPE_SHADER_TESS_CTRL) { - /* SM5 device doesn't support BARRIER in tcs . If barrier is used - * in shader, don't do anything for this opcode and continue rest - * of shader translation - */ - pipe_debug_message(&emit->svga_debug_callback, INFO, - "barrier instruction is not supported in tessellation control shader\n"); - return TRUE; - } - else { - return emit_simple(emit, inst); - } + return emit_barrier(emit, inst); + + case TGSI_OPCODE_MEMBAR: + return emit_memory_barrier(emit, inst); + + case TGSI_OPCODE_RESQ: + return emit_resq(emit, inst); case TGSI_OPCODE_END: if (!emit_post_helpers(emit)) - return FALSE; + return false; return emit_simple(emit, inst); default: debug_printf("Unimplemented tgsi instruction %s\n", tgsi_get_opcode_name(opcode)); - return FALSE; + return false; } - return TRUE; + return true; +} + + +/** + * Translate a single TGSI instruction to VGPU10. + */ +static bool +emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, + unsigned inst_number, + const struct tgsi_full_instruction *inst) +{ + if (emit->skip_instruction) + return true; + + bool ret = true; + unsigned start_token = emit_get_num_tokens(emit); + + emit->reemit_tgsi_instruction = false; + + ret = emit_instruction(emit, inst_number, inst); + + if (emit->reemit_tgsi_instruction) { + /** + * Reset emit->ptr to where the translation of this tgsi instruction + * started. + */ + VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf; + emit->ptr = (char *) (tokens + start_token); + + emit->reemit_tgsi_instruction = false; + } + return ret; } @@ -9686,7 +11442,7 @@ emit_vpos_instructions(struct svga_shader_emitter_v10 *emit) /* MOV pos, tmp_pos */ begin_emit_instruction(emit); - emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); + emit_opcode(emit, VGPU10_OPCODE_MOV, false); emit_dst_register(emit, &pos_dst); emit_src_register(emit, &tmp_pos_src); end_emit_instruction(emit); @@ -9753,11 +11509,11 @@ emit_vertex_instructions(struct svga_shader_emitter_v10 *emit) /** * Translate the TGSI_OPCODE_EMIT GS instruction. */ -static boolean +static bool emit_vertex(struct svga_shader_emitter_v10 *emit, const struct tgsi_full_instruction *inst) { - unsigned ret = TRUE; + unsigned ret = true; assert(emit->unit == PIPE_SHADER_GEOMETRY); @@ -9811,15 +11567,15 @@ emit_vertex(struct svga_shader_emitter_v10 *emit, /** * If there is no output for this stream, discard this instruction. */ - emit->discard_instruction = TRUE; + emit->discard_instruction = true; } else { - emit_opcode(emit, VGPU10_OPCODE_EMIT_STREAM, FALSE); + emit_opcode(emit, VGPU10_OPCODE_EMIT_STREAM, false); emit_stream_register(emit, streamIndex); } } else { - emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE); + emit_opcode(emit, VGPU10_OPCODE_EMIT, false); } end_emit_instruction(emit); @@ -9849,7 +11605,7 @@ emit_frontface_instructions(struct svga_shader_emitter_v10 *emit) /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */ begin_emit_instruction(emit); - emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE); + emit_opcode(emit, VGPU10_OPCODE_MOVC, false); emit_dst_register(emit, &tmp_dst); emit_face_register(emit); emit_src_register(emit, &one); @@ -9886,14 +11642,14 @@ emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit) /* MOV fragcoord_tmp.xyz, fragcoord.xyz */ begin_emit_instruction(emit); - emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); + emit_opcode(emit, VGPU10_OPCODE_MOV, false); emit_dst_register(emit, &tmp_dst_xyz); emit_src_register(emit, &fragcoord); end_emit_instruction(emit); /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */ begin_emit_instruction(emit); - emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE); + emit_opcode(emit, VGPU10_OPCODE_DIV, false); emit_dst_register(emit, &tmp_dst_w); emit_src_register(emit, &one); emit_src_register(emit, &fragcoord); @@ -9936,7 +11692,7 @@ emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit) /* SAMPLE_POS dst, RASTERIZER, sampleIndex */ begin_emit_instruction(emit); - emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, FALSE); + emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, false); emit_dst_register(emit, &tmp_dst); emit_rasterizer_register(emit); emit_src_register(emit, &sample_index_reg); @@ -9945,7 +11701,7 @@ emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit) /* Convert from D3D coords to GL coords by adding 0.5 bias */ /* ADD dst, dst, half */ begin_emit_instruction(emit); - emit_opcode(emit, VGPU10_OPCODE_ADD, FALSE); + emit_opcode(emit, VGPU10_OPCODE_ADD, false); emit_dst_register(emit, &tmp_dst); emit_src_register(emit, &tmp_src); emit_src_register(emit, &half); @@ -10213,8 +11969,8 @@ emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 *emit) unsigned vertex_id_tmp_index = emit->vs.vertex_id_tmp_index; emit->vs.vertex_id_tmp_index = INVALID_INDEX; emit_instruction_opn(emit, VGPU10_OPCODE_IADD, &vertex_id_tmp_dst, - &vertex_id_sys_src_x, &vertex_id_bias_index, NULL, FALSE, - FALSE); + &vertex_id_sys_src_x, &vertex_id_bias_index, NULL, false, + false); emit->vs.vertex_id_tmp_index = vertex_id_tmp_index; } @@ -10232,50 +11988,61 @@ emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit) { assert(emit->unit == PIPE_SHADER_TESS_CTRL); assert(emit->tcs.control_point_phase); - assert(emit->tcs.control_point_input_index != INVALID_INDEX); assert(emit->tcs.control_point_out_index != INVALID_INDEX); assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX); - /* UARL ADDR[INDEX].x INVOCATION.xxxx */ + struct tgsi_full_dst_register output_control_point; + output_control_point = + make_dst_output_reg(emit->tcs.control_point_out_index); - struct tgsi_full_src_register invocation_src; - struct tgsi_full_dst_register addr_dst; - struct tgsi_full_dst_register addr_dst_x; - unsigned addr_tmp; + if (emit->tcs.control_point_input_index == INVALID_INDEX) { + /* MOV OUTPUT 0.0f */ + struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); + begin_emit_instruction(emit); + emit_opcode_precise(emit, VGPU10_OPCODE_MOV, false, false); + emit_dst_register(emit, &output_control_point); + emit_src_register(emit, &zero); + end_emit_instruction(emit); + } + else { + /* UARL ADDR[INDEX].x INVOCATION.xxxx */ - addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index]; - addr_dst = make_dst_temp_reg(addr_tmp); - addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X); + struct tgsi_full_src_register invocation_src; + struct tgsi_full_dst_register addr_dst; + struct tgsi_full_dst_register addr_dst_x; + unsigned addr_tmp; - invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE, - emit->tcs.invocation_id_sys_index); + addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index]; + addr_dst = make_dst_temp_reg(addr_tmp); + addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X); - begin_emit_instruction(emit); - emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE); - emit_dst_register(emit, &addr_dst_x); - emit_src_register(emit, &invocation_src); - end_emit_instruction(emit); + invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE, + emit->tcs.invocation_id_sys_index); + begin_emit_instruction(emit); + emit_opcode_precise(emit, VGPU10_OPCODE_MOV, false, false); + emit_dst_register(emit, &addr_dst_x); + emit_src_register(emit, &invocation_src); + end_emit_instruction(emit); - /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */ - struct tgsi_full_src_register input_control_point; - struct tgsi_full_dst_register output_control_point; + /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */ - input_control_point = make_src_reg(TGSI_FILE_INPUT, - emit->tcs.control_point_input_index); - input_control_point.Register.Dimension = 1; - input_control_point.Dimension.Indirect = 1; - input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS; - input_control_point.DimIndirect.Index = emit->tcs.control_point_addr_index; - output_control_point = - make_dst_output_reg(emit->tcs.control_point_out_index); + struct tgsi_full_src_register input_control_point; + input_control_point = make_src_reg(TGSI_FILE_INPUT, + emit->tcs.control_point_input_index); + input_control_point.Register.Dimension = 1; + input_control_point.Dimension.Indirect = 1; + input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS; + input_control_point.DimIndirect.Index = + emit->tcs.control_point_addr_index; - begin_emit_instruction(emit); - emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE); - emit_dst_register(emit, &output_control_point); - emit_src_register(emit, &input_control_point); - end_emit_instruction(emit); + begin_emit_instruction(emit); + emit_opcode_precise(emit, VGPU10_OPCODE_MOV, false, false); + emit_dst_register(emit, &output_control_point); + emit_src_register(emit, &input_control_point); + end_emit_instruction(emit); + } } /** @@ -10296,19 +12063,19 @@ emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 *emit) dst = make_dst_temp_reg(emit->tes.inner.temp_index); switch (emit->tes.prim_mode) { - case PIPE_PRIM_QUADS: + case MESA_PRIM_QUADS: src = make_src_scalar_reg(TGSI_FILE_INPUT, emit->tes.inner.in_index + 1, TGSI_SWIZZLE_X); dst = writemask_dst(&dst, TGSI_WRITEMASK_Y); emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); FALLTHROUGH; - case PIPE_PRIM_TRIANGLES: + case MESA_PRIM_TRIANGLES: src = make_src_scalar_reg(TGSI_FILE_INPUT, emit->tes.inner.in_index, TGSI_SWIZZLE_X); dst = writemask_dst(&dst, TGSI_WRITEMASK_X); emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); break; - case PIPE_PRIM_LINES: + case MESA_PRIM_LINES: /** * As per SM5 spec, InsideTessFactor for isolines are unused. * In fact glsl tessInnerLevel for isolines doesn't mean anything but if @@ -10328,19 +12095,19 @@ emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 *emit) dst = make_dst_temp_reg(emit->tes.outer.temp_index); switch (emit->tes.prim_mode) { - case PIPE_PRIM_QUADS: + case MESA_PRIM_QUADS: src = make_src_scalar_reg(TGSI_FILE_INPUT, emit->tes.outer.in_index + 3, TGSI_SWIZZLE_X); dst = writemask_dst(&dst, TGSI_WRITEMASK_W); emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); FALLTHROUGH; - case PIPE_PRIM_TRIANGLES: + case MESA_PRIM_TRIANGLES: src = make_src_scalar_reg(TGSI_FILE_INPUT, emit->tes.outer.in_index + 2, TGSI_SWIZZLE_X); dst = writemask_dst(&dst, TGSI_WRITEMASK_Z); emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); FALLTHROUGH; - case PIPE_PRIM_LINES: + case MESA_PRIM_LINES: src = make_src_scalar_reg(TGSI_FILE_INPUT, emit->tes.outer.in_index + 1, TGSI_SWIZZLE_X); dst = writemask_dst(&dst, TGSI_WRITEMASK_Y); @@ -10369,7 +12136,7 @@ emit_initialize_temp_instruction(struct svga_shader_emitter_v10 *emit) src = make_immediate_reg_float(emit, 0.0f); dst = make_dst_temp_reg(vgpu10_temp_index); emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); - emit->temp_map[emit->initialize_temp_index].initialized = TRUE; + emit->temp_map[emit->initialize_temp_index].initialized = true; emit->initialize_temp_index = INVALID_INDEX; } @@ -10378,7 +12145,7 @@ emit_initialize_temp_instruction(struct svga_shader_emitter_v10 *emit) * Emit any extra/helper declarations/code that we might need between * the declaration section and code section. */ -static boolean +static bool emit_pre_helpers(struct svga_shader_emitter_v10 *emit) { /* Properties */ @@ -10391,27 +12158,30 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit) * do a second pass of the instructions for the patch constant phase. */ emit->tcs.instruction_token_pos = emit->cur_tgsi_token; - emit->tcs.fork_phase_add_signature = FALSE; + emit->tcs.fork_phase_add_signature = false; if (!emit_hull_shader_control_point_phase(emit)) { - emit->skip_instruction = TRUE; - return TRUE; + emit->skip_instruction = true; + return true; } /* Set the current tcs phase to control point phase */ - emit->tcs.control_point_phase = TRUE; + emit->tcs.control_point_phase = true; } else if (emit->unit == PIPE_SHADER_TESS_EVAL) { emit_domain_shader_declarations(emit); } + else if (emit->unit == PIPE_SHADER_COMPUTE) { + emit_compute_shader_declarations(emit); + } /* Declare inputs */ if (!emit_input_declarations(emit)) - return FALSE; + return false; /* Declare outputs */ if (!emit_output_declarations(emit)) - return FALSE; + return false; /* Declare temporary registers */ emit_temporaries_declaration(emit); @@ -10420,6 +12190,9 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit) * will already be declared in hs_decls (emit_hull_shader_declarations) */ if (emit->unit != PIPE_SHADER_TESS_CTRL) { + + alloc_common_immediates(emit); + /* Declare constant registers */ emit_constant_declaration(emit); @@ -10427,13 +12200,18 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit) emit_sampler_declarations(emit); emit_resource_declarations(emit); - alloc_common_immediates(emit); - /* Now, emit the constant block containing all the immediates - * declared by shader, as well as the extra ones seen above. - */ + /* Declare images */ + emit_image_declarations(emit); + + /* Declare shader buffers */ + emit_shader_buf_declarations(emit); + + /* Declare atomic buffers */ + emit_atomic_buf_declarations(emit); } - if (emit->unit != PIPE_SHADER_FRAGMENT) { + if (emit->unit != PIPE_SHADER_FRAGMENT && + emit->unit != PIPE_SHADER_COMPUTE) { /* * Declare clip distance output registers for ClipVertex or * user defined planes @@ -10441,6 +12219,18 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit) emit_clip_distance_declarations(emit); } + if (emit->unit == PIPE_SHADER_COMPUTE) { + emit_memory_declarations(emit); + + if (emit->cs.grid_size.tgsi_index != INVALID_INDEX) { + emit->cs.grid_size.imm_index = + alloc_immediate_int4(emit, + emit->key.cs.grid_size[0], + emit->key.cs.grid_size[1], + emit->key.cs.grid_size[2], 0); + } + } + if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { float alpha = emit->key.fs.alpha_ref; @@ -10482,7 +12272,7 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit) if (emit->vposition.need_prescale && emit->vposition.num_prescale == 1) emit_temp_prescale_instructions(emit); - return TRUE; + return true; } @@ -10536,7 +12326,9 @@ static void emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit, unsigned fs_color_tmp_index) { - /* compare output color's alpha to alpha ref and kill */ + /* compare output color's alpha to alpha ref and discard if comparison + * fails. + */ unsigned tmp = get_temp_index(emit); struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); struct tgsi_full_src_register tmp_src_x = @@ -10559,7 +12351,7 @@ emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit, /* DISCARD if dst.x == 0 */ begin_emit_instruction(emit); - emit_discard_opcode(emit, FALSE); /* discard if src0.x is zero */ + emit_discard_opcode(emit, false); /* discard if src0.x is zero */ emit_src_register(emit, &tmp_src_x); end_emit_instruction(emit); @@ -10625,7 +12417,7 @@ emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit, * For vertex shaders this means emitting the extra code to apply the * prescale scale/translation. */ -static boolean +static bool emit_post_helpers(struct svga_shader_emitter_v10 *emit) { if (emit->unit == PIPE_SHADER_VERTEX) { @@ -10666,20 +12458,114 @@ emit_post_helpers(struct svga_shader_emitter_v10 *emit) emit_vertex_instructions(emit); } - return TRUE; + return true; +} + + +/** + * Reemit rawbuf instruction + */ +static bool +emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit, + unsigned inst_number, + const struct tgsi_full_instruction *inst) +{ + bool ret; + + /* For all the rawbuf references in this instruction, + * load the rawbuf reference and assign to the designated temporary. + * Then reeemit the instruction. + */ + emit->reemit_rawbuf_instruction = REEMIT_IN_PROGRESS; + + unsigned offset_tmp = get_temp_index(emit); + struct tgsi_full_dst_register offset_dst = make_dst_temp_reg(offset_tmp); + struct tgsi_full_src_register offset_src = make_src_temp_reg(offset_tmp); + struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4); + + for (unsigned i = 0; i < emit->raw_buf_cur_tmp_index; i++) { + struct tgsi_full_src_register element_src; + + /* First get the element index register. */ + + if (emit->raw_buf_tmp[i].indirect) { + unsigned tmp = get_temp_index(emit); + struct tgsi_full_dst_register element_dst = make_dst_temp_reg(tmp); + struct tgsi_full_src_register element_index = + make_src_temp_reg(emit->raw_buf_tmp[i].element_index); + struct tgsi_full_src_register element_rel = + make_immediate_reg_int(emit, emit->raw_buf_tmp[i].element_rel); + + element_src = make_src_temp_reg(tmp); + element_src = scalar_src(&element_src, TGSI_SWIZZLE_X); + element_dst = writemask_dst(&element_dst, TGSI_WRITEMASK_X); + + /* element index from the indirect register */ + element_index = make_src_temp_reg(emit->raw_buf_tmp[i].element_index); + element_index = scalar_src(&element_index, TGSI_SWIZZLE_X); + + /* IADD element_src element_index element_index_relative */ + emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &element_dst, + &element_index, &element_rel); + } + else { + unsigned element_index = emit->raw_buf_tmp[i].element_index; + union tgsi_immediate_data imm; + imm.Int = element_index; + int immpos = find_immediate(emit, imm, 0); + if (immpos < 0) { + UNUSED unsigned element_index_imm = + add_immediate_int(emit, element_index); + } + element_src = make_immediate_reg_int(emit, element_index); + } + + /* byte offset = element index << 4 */ + emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &offset_dst, + &element_src, &four); + + struct tgsi_full_dst_register dst_tmp = + make_dst_temp_reg(i + emit->raw_buf_tmp_index); + + /* LD_RAW tmp, rawbuf byte offset, rawbuf */ + + begin_emit_instruction(emit); + emit_opcode(emit, VGPU10_OPCODE_LD_RAW, false); + emit_dst_register(emit, &dst_tmp); + + struct tgsi_full_src_register offset_x = + scalar_src(&offset_src, TGSI_SWIZZLE_X); + emit_src_register(emit, &offset_x); + + emit_resource_register(emit, + emit->raw_buf_tmp[i].buffer_index + emit->raw_buf_srv_start_index); + end_emit_instruction(emit); + } + + emit->raw_buf_cur_tmp_index = 0; + + ret = emit_vgpu10_instruction(emit, inst_number, inst); + + /* reset raw buf state */ + emit->raw_buf_cur_tmp_index = 0; + emit->reemit_rawbuf_instruction = REEMIT_FALSE; + + free_temp_indexes(emit); + + return ret; } /** * Translate the TGSI tokens into VGPU10 tokens. */ -static boolean +static bool emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit, const struct tgsi_token *tokens) { struct tgsi_parse_context parse; - boolean ret = TRUE; - boolean pre_helpers_emitted = FALSE; + bool ret = true; + bool pre_helpers_emitted = false; unsigned inst_number = 0; tgsi_parse_init(&parse, tokens); @@ -10709,7 +12595,7 @@ emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit, ret = emit_pre_helpers(emit); if (!ret) goto done; - pre_helpers_emitted = TRUE; + pre_helpers_emitted = true; } ret = emit_vgpu10_instruction(emit, inst_number++, &parse.FullToken.FullInstruction); @@ -10730,6 +12616,10 @@ emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit, ret = emit_vgpu10_instruction(emit, inst_number - 1, &parse.FullToken.FullInstruction); } + else if (emit->reemit_rawbuf_instruction) { + ret = emit_rawbuf_instruction(emit, inst_number - 1, + &parse.FullToken.FullInstruction); + } if (!ret) goto done; @@ -10759,24 +12649,28 @@ done: /** * Emit the first VGPU10 shader tokens. */ -static boolean +static bool emit_vgpu10_header(struct svga_shader_emitter_v10 *emit) { VGPU10ProgramToken ptoken; /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */ + + /* Maximum supported shader version is 50 */ + unsigned version = MIN2(emit->version, 50); + ptoken.value = 0; /* init whole token to zero */ - ptoken.majorVersion = emit->version / 10; - ptoken.minorVersion = emit->version % 10; + ptoken.majorVersion = version / 10; + ptoken.minorVersion = version % 10; ptoken.programType = translate_shader_type(emit->unit); if (!emit_dword(emit, ptoken.value)) - return FALSE; + return false; /* Second token: total length of shader, in tokens. We can't fill this * in until we're all done. Emit zero for now. */ if (!emit_dword(emit, 0)) - return FALSE; + return false; if (emit->version >= 50) { VGPU10OpcodeToken0 token; @@ -10798,7 +12692,7 @@ emit_vgpu10_header(struct svga_shader_emitter_v10 *emit) token.enableDoublePrecisionFloatOps = 1; /* set bit */ token.instructionLength = 1; if (!emit_dword(emit, token.value)) - return FALSE; + return false; } if (emit->version >= 40) { @@ -10814,14 +12708,14 @@ emit_vgpu10_header(struct svga_shader_emitter_v10 *emit) token.opcodeType = VGPU10_OPCODE_NOP; token.instructionLength = 1; if (!emit_dword(emit, token.value)) - return FALSE; + return false; } - return TRUE; + return true; } -static boolean +static bool emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit) { VGPU10ProgramToken *tokens; @@ -10840,7 +12734,16 @@ emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit) ptoken->refactoringAllowed = 1; } - return TRUE; + if (emit->version >= 50 && emit->fs.forceEarlyDepthStencil) { + /* Replace the reserved token with the forceEarlyDepthStencil global flag */ + VGPU10OpcodeToken0 *ptoken; + + ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token]; + ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS; + ptoken->forceEarlyDepthStencil = 1; + } + + return true; } @@ -10884,6 +12787,9 @@ transform_fs_pstipple(struct svga_shader_emitter_v10 *emit, emit->fs.pstipple_sampler_unit = unit; + /* The new sampler state is appended to the end of the samplers list */ + emit->fs.pstipple_sampler_state_index = emit->key.num_samplers++; + /* Setup texture state for stipple */ emit->sampler_target[unit] = TGSI_TEXTURE_2D; emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; @@ -10891,6 +12797,7 @@ transform_fs_pstipple(struct svga_shader_emitter_v10 *emit, emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; emit->key.tex[unit].target = PIPE_TEXTURE_2D; + emit->key.tex[unit].sampler_index = emit->fs.pstipple_sampler_state_index; if (0) { debug_printf("After pstipple ------------------\n"); @@ -10904,14 +12811,18 @@ transform_fs_pstipple(struct svga_shader_emitter_v10 *emit, * Modify the FS to support anti-aliasing point. */ static const struct tgsi_token * -transform_fs_aapoint(const struct tgsi_token *tokens, +transform_fs_aapoint(struct svga_context *svga, + const struct tgsi_token *tokens, int aa_coord_index) { + bool need_texcoord_semantic = + svga->pipe.screen->get_param(svga->pipe.screen, PIPE_CAP_TGSI_TEXCOORD); + if (0) { debug_printf("Before tgsi_add_aa_point ------------------\n"); tgsi_dump(tokens,0); } - tokens = tgsi_add_aa_point(tokens, aa_coord_index); + tokens = tgsi_add_aa_point(tokens, aa_coord_index, need_texcoord_semantic); if (0) { debug_printf("After tgsi_add_aa_point ------------------\n"); tgsi_dump(tokens, 0); @@ -10947,8 +12858,8 @@ compute_input_mapping(struct svga_context *svga, } if (prevShader != NULL) { - svga_link_shaders(&prevShader->info, &emit->info, &emit->linkage); - emit->prevShaderInfo = &prevShader->info; + svga_link_shaders(&prevShader->tgsi_info, &emit->info, &emit->linkage); + emit->prevShaderInfo = &prevShader->tgsi_info; } else { /** @@ -11020,6 +12931,7 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, const struct svga_compile_key *key, enum pipe_shader_type unit) { + struct svga_screen *svgascreen = svga_screen(svga->pipe.screen); struct svga_shader_variant *variant = NULL; struct svga_shader_emitter_v10 *emit; const struct tgsi_token *tokens = shader->tokens; @@ -11045,7 +12957,9 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, goto done; emit->unit = unit; - if (svga_have_sm5(svga)) { + if (svga_have_gl43(svga)) { + emit->version = 51; + } else if (svga_have_sm5(svga)) { emit->version = 50; } else if (svga_have_sm4_1(svga)) { emit->version = 41; @@ -11053,6 +12967,8 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, emit->version = 40; } + emit->use_sampler_state_mapping = emit->key.sampler_state_mapping; + emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0; emit->key = *key; @@ -11098,7 +13014,6 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, emit->tcs.control_point_tmp_index = INVALID_INDEX; emit->tcs.control_point_out_count = 0; emit->tcs.inner.out_index = INVALID_INDEX; - emit->tcs.inner.out_index = INVALID_INDEX; emit->tcs.inner.temp_index = INVALID_INDEX; emit->tcs.inner.tgsi_index = INVALID_INDEX; emit->tcs.outer.out_index = INVALID_INDEX; @@ -11118,6 +13033,14 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, emit->tes.outer.tgsi_index = INVALID_INDEX; emit->tes.prim_id_index = INVALID_INDEX; + emit->cs.thread_id_index = INVALID_INDEX; + emit->cs.block_id_index = INVALID_INDEX; + emit->cs.grid_size.tgsi_index = INVALID_INDEX; + emit->cs.grid_size.imm_index = INVALID_INDEX; + emit->cs.block_width = 1; + emit->cs.block_height = 1; + emit->cs.block_depth = 1; + emit->clip_dist_out_index = INVALID_INDEX; emit->clip_dist_tmp_index = INVALID_INDEX; emit->clip_dist_so_index = INVALID_INDEX; @@ -11127,7 +13050,7 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, emit->index_range.start_index = INVALID_INDEX; emit->index_range.count = 0; - emit->index_range.required = FALSE; + emit->index_range.required = false; emit->index_range.operandType = VGPU10_NUM_OPERANDS; emit->index_range.dim = 0; emit->index_range.size = 0; @@ -11135,6 +13058,11 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, emit->current_loop_depth = 0; emit->initialize_temp_index = INVALID_INDEX; + emit->image_size_index = INVALID_INDEX; + + emit->max_vs_inputs = svgascreen->max_vs_inputs; + emit->max_vs_outputs = svgascreen->max_vs_outputs; + emit->max_gs_inputs = svgascreen->max_gs_inputs; if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) { emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS; @@ -11154,7 +13082,8 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, tokens = new_tokens; } if (key->fs.aa_point) { - tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index); + tokens = transform_fs_aapoint(svga, tokens, + key->fs.aa_point_coord_index); } } @@ -11171,7 +13100,7 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, if (tokens != shader->tokens) { tgsi_scan_shader(tokens, &emit->info); } else { - emit->info = shader->info; + emit->info = shader->tgsi_info; } emit->num_outputs = emit->info.num_outputs; @@ -11202,6 +13131,15 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, } } + /* Determine if constbuf to rawbuf translation is needed */ + emit->raw_buf_srv_start_index = emit->key.srv_raw_constbuf_index; + if (emit->info.const_buffers_declared) + emit->raw_bufs = emit->key.raw_constbufs; + + emit->raw_shaderbuf_srv_start_index = emit->key.srv_raw_shaderbuf_index; + if (emit->info.shader_buffers_declared) + emit->raw_shaderbufs = emit->key.raw_shaderbufs; + /* * Do actual shader translation. */ @@ -11215,6 +13153,10 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, goto cleanup; } + if (emit->num_new_immediates > 0) { + reemit_immediates_block(emit); + } + if (!emit_vgpu10_tail(emit)) { debug_printf("svga: emit VGPU10 tail failed\n"); goto cleanup; @@ -11262,6 +13204,8 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, struct svga_fs_variant *fs_variant = svga_fs_variant(variant); fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit; + fs_variant->pstipple_sampler_state_index = + emit->fs.pstipple_sampler_state_index; /* If there was exactly one write to a fragment shader output register * and it came from a constant buffer, we know all fragments will have @@ -11275,7 +13219,7 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga, */ fs_variant->uses_flat_interp = emit->uses_flat_interp; - fs_variant->fs_shadow_compare_units = emit->fs.shadow_compare_units; + fs_variant->fs_shadow_compare_units = emit->shadow_compare_units; } else if (unit == PIPE_SHADER_TESS_EVAL) { struct svga_tes_variant *tes_variant = svga_tes_variant(variant); |