From f379d8f73063a4c4d6cf379318c6b37118d46bfa Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 25 Apr 2011 23:37:47 -0500 Subject: st/mesa: Add a GLSL IR to TGSI translator. It is still a work in progress at this point, but it produces working and reasonably well-optimized code. Originally based on ir_to_mesa and st_mesa_to_tgsi, but does not directly use Mesa IR instructions in TGSI generation, instead generating TGSI from the intermediate class glsl_to_tgsi_instruction. It also has new optimization passes to replace _mesa_optimize_program. --- src/mesa/sources.mak | 3 +- src/mesa/state_tracker/st_cb_program.c | 14 + src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4431 ++++++++++++++++++++++++++++ src/mesa/state_tracker/st_glsl_to_tgsi.h | 66 + src/mesa/state_tracker/st_mesa_to_tgsi.c | 4 +- src/mesa/state_tracker/st_mesa_to_tgsi.h | 6 + src/mesa/state_tracker/st_program.c | 399 +-- src/mesa/state_tracker/st_program.h | 27 + 8 files changed, 4767 insertions(+), 183 deletions(-) create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi.cpp create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi.h diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak index 4b2ec08bbb0..ed008f8813e 100644 --- a/src/mesa/sources.mak +++ b/src/mesa/sources.mak @@ -336,7 +336,8 @@ MESA_GALLIUM_SOURCES = \ MESA_GALLIUM_CXX_SOURCES = \ $(MAIN_CXX_SOURCES) \ - $(SHADER_CXX_SOURCES) + $(SHADER_CXX_SOURCES) \ + state_tracker/st_glsl_to_tgsi.cpp # All the core C sources, for dependency checking ALL_SOURCES = \ diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index 32694975d17..2abb4d8f082 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -44,6 +44,7 @@ #include "st_program.h" #include "st_mesa_to_tgsi.h" #include "st_cb_program.h" +#include "st_glsl_to_tgsi.h" @@ -129,6 +130,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) { struct st_vertex_program *stvp = (struct st_vertex_program *) prog; st_release_vp_variants( st, stvp ); + + if (stvp->glsl_to_tgsi) + free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi); } break; case MESA_GEOMETRY_PROGRAM: @@ -137,6 +141,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) (struct st_geometry_program *) prog; st_release_gp_variants(st, stgp); + + if (stgp->glsl_to_tgsi) + free_glsl_to_tgsi_visitor(stgp->glsl_to_tgsi); if (stgp->tgsi.tokens) { st_free_tokens((void *) stgp->tgsi.tokens); @@ -151,6 +158,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) st_release_fp_variants(st, stfp); + if (stfp->glsl_to_tgsi) + free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi); + if (stfp->tgsi.tokens) { st_free_tokens(stfp->tgsi.tokens); stfp->tgsi.tokens = NULL; @@ -242,4 +252,8 @@ st_init_program_functions(struct dd_function_table *functions) functions->DeleteProgram = st_delete_program; functions->IsProgramNative = st_is_program_native; functions->ProgramStringNotify = st_program_string_notify; + + functions->NewShader = st_new_shader; + functions->NewShaderProgram = st_new_shader_program; + functions->LinkShader = st_link_shader; } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp new file mode 100644 index 00000000000..e1102503ee0 --- /dev/null +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -0,0 +1,4431 @@ +/* + * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. + * Copyright (C) 2008 VMware, Inc. All Rights Reserved. + * Copyright © 2010 Intel Corporation + * Copyright © 2011 Bryan Cain + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file glsl_to_tgsi.cpp + * + * Translate GLSL IR to Mesa's gl_program representation and to TGSI. + */ + +#include +#include "main/compiler.h" +#include "ir.h" +#include "ir_visitor.h" +#include "ir_print_visitor.h" +#include "ir_expression_flattening.h" +#include "glsl_types.h" +#include "glsl_parser_extras.h" +#include "../glsl/program.h" +#include "ir_optimization.h" +#include "ast.h" + +extern "C" { +#include "main/mtypes.h" +#include "main/shaderapi.h" +#include "main/shaderobj.h" +#include "main/uniforms.h" +#include "program/hash_table.h" +#include "program/prog_instruction.h" +#include "program/prog_optimize.h" +#include "program/prog_print.h" +#include "program/program.h" +#include "program/prog_uniform.h" +#include "program/prog_parameter.h" +#include "program/sampler.h" + +#include "pipe/p_compiler.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_state.h" +#include "util/u_math.h" +#include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_dump.h" +#include "st_context.h" +#include "st_program.h" +#include "st_glsl_to_tgsi.h" +#include "st_mesa_to_tgsi.h" + +#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ + (1 << PROGRAM_ENV_PARAM) | \ + (1 << PROGRAM_STATE_VAR) | \ + (1 << PROGRAM_NAMED_PARAM) | \ + (1 << PROGRAM_CONSTANT) | \ + (1 << PROGRAM_UNIFORM)) +} + +class st_src_reg; +class st_dst_reg; + +static int swizzle_for_size(int size); + +/** + * This struct is a corresponding struct to Mesa prog_src_register, with + * wider fields. + */ +class st_src_reg { +public: + st_src_reg(gl_register_file file, int index, const glsl_type *type) + { + this->file = file; + this->index = index; + if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) + this->swizzle = swizzle_for_size(type->vector_elements); + else + this->swizzle = SWIZZLE_XYZW; + this->negate = 0; + this->reladdr = NULL; + } + + st_src_reg() + { + this->file = PROGRAM_UNDEFINED; + this->index = 0; + this->swizzle = 0; + this->negate = 0; + this->reladdr = NULL; + } + + explicit st_src_reg(st_dst_reg reg); + + gl_register_file file; /**< PROGRAM_* from Mesa */ + int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ + GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ + int negate; /**< NEGATE_XYZW mask from mesa */ + /** Register index should be offset by the integer in this reg. */ + st_src_reg *reladdr; +}; + +class st_dst_reg { +public: + st_dst_reg(gl_register_file file, int writemask) + { + this->file = file; + this->index = 0; + this->writemask = writemask; + this->cond_mask = COND_TR; + this->reladdr = NULL; + } + + st_dst_reg() + { + this->file = PROGRAM_UNDEFINED; + this->index = 0; + this->writemask = 0; + this->cond_mask = COND_TR; + this->reladdr = NULL; + } + + explicit st_dst_reg(st_src_reg reg); + + gl_register_file file; /**< PROGRAM_* from Mesa */ + int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ + int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ + GLuint cond_mask:4; + /** Register index should be offset by the integer in this reg. */ + st_src_reg *reladdr; +}; + +st_src_reg::st_src_reg(st_dst_reg reg) +{ + this->file = reg.file; + this->index = reg.index; + this->swizzle = SWIZZLE_XYZW; + this->negate = 0; + this->reladdr = NULL; +} + +st_dst_reg::st_dst_reg(st_src_reg reg) +{ + this->file = reg.file; + this->index = reg.index; + this->writemask = WRITEMASK_XYZW; + this->cond_mask = COND_TR; + this->reladdr = reg.reladdr; +} + +class glsl_to_tgsi_instruction : public exec_node { +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = rzalloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + enum prog_opcode op; + st_dst_reg dst; + st_src_reg src[3]; + /** Pointer to the ir source this tree came from for debugging */ + ir_instruction *ir; + GLboolean cond_update; + bool saturate; + int sampler; /**< sampler index */ + int tex_target; /**< One of TEXTURE_*_INDEX */ + GLboolean tex_shadow; + + class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */ +}; + +class variable_storage : public exec_node { +public: + variable_storage(ir_variable *var, gl_register_file file, int index) + : file(file), index(index), var(var) + { + /* empty */ + } + + gl_register_file file; + int index; + ir_variable *var; /* variable that maps to this, if any */ +}; + +class function_entry : public exec_node { +public: + ir_function_signature *sig; + + /** + * identifier of this function signature used by the program. + * + * At the point that Mesa instructions for function calls are + * generated, we don't know the address of the first instruction of + * the function body. So we make the BranchTarget that is called a + * small integer and rewrite them during set_branchtargets(). + */ + int sig_id; + + /** + * Pointer to first instruction of the function body. + * + * Set during function body emits after main() is processed. + */ + glsl_to_tgsi_instruction *bgn_inst; + + /** + * Index of the first instruction of the function body in actual + * Mesa IR. + * + * Set after convertion from glsl_to_tgsi_instruction to prog_instruction. + */ + int inst; + + /** Storage for the return value. */ + st_src_reg return_reg; +}; + +class glsl_to_tgsi_visitor : public ir_visitor { +public: + glsl_to_tgsi_visitor(); + ~glsl_to_tgsi_visitor(); + + function_entry *current_function; + + struct gl_context *ctx; + struct gl_program *prog; + struct gl_shader_program *shader_program; + struct gl_shader_compiler_options *options; + + int next_temp; + + int num_address_regs; + bool indirect_addr_temps; + bool indirect_addr_consts; + + variable_storage *find_variable_storage(ir_variable *var); + + function_entry *get_function_signature(ir_function_signature *sig); + + st_src_reg get_temp(const glsl_type *type); + void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); + + st_src_reg st_src_reg_for_float(float val); + + /** + * \name Visit methods + * + * As typical for the visitor pattern, there must be one \c visit method for + * each concrete subclass of \c ir_instruction. Virtual base classes within + * the hierarchy should not have \c visit methods. + */ + /*@{*/ + virtual void visit(ir_variable *); + virtual void visit(ir_loop *); + virtual void visit(ir_loop_jump *); + virtual void visit(ir_function_signature *); + virtual void visit(ir_function *); + virtual void visit(ir_expression *); + virtual void visit(ir_swizzle *); + virtual void visit(ir_dereference_variable *); + virtual void visit(ir_dereference_array *); + virtual void visit(ir_dereference_record *); + virtual void visit(ir_assignment *); + virtual void visit(ir_constant *); + virtual void visit(ir_call *); + virtual void visit(ir_return *); + virtual void visit(ir_discard *); + virtual void visit(ir_texture *); + virtual void visit(ir_if *); + /*@}*/ + + st_src_reg result; + + /** List of variable_storage */ + exec_list variables; + + /** List of function_entry */ + exec_list function_signatures; + int next_signature_id; + + /** List of glsl_to_tgsi_instruction */ + exec_list instructions; + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op); + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0); + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0, st_src_reg src1); + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1, st_src_reg src2); + + /** + * Emit the correct dot-product instruction for the type of arguments + */ + void emit_dp(ir_instruction *ir, + st_dst_reg dst, + st_src_reg src0, + st_src_reg src1, + unsigned elements); + + void emit_scalar(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0); + + void emit_scalar(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0, st_src_reg src1); + + void emit_scs(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, const st_src_reg &src); + + GLboolean try_emit_mad(ir_expression *ir, + int mul_operand); + GLboolean try_emit_sat(ir_expression *ir); + + void emit_swz(ir_expression *ir); + + bool process_move_condition(ir_rvalue *ir); + + void rename_temp_register(int index, int new_index); + int get_first_temp_read(int index); + int get_first_temp_write(int index); + int get_last_temp_read(int index); + int get_last_temp_write(int index); + + void copy_propagate(void); + void eliminate_dead_code(void); + void merge_registers(void); + void renumber_registers(void); + + void *mem_ctx; +}; + +static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL); + +static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); + +static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); + +static void +fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); + +static void +fail_link(struct gl_shader_program *prog, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + ralloc_vasprintf_append(&prog->InfoLog, fmt, args); + va_end(args); + + prog->LinkStatus = GL_FALSE; +} + +static int +swizzle_for_size(int size) +{ + int size_swizzles[4] = { + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), + }; + + assert((size >= 1) && (size <= 4)); + return size_swizzles[size - 1]; +} + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1, st_src_reg src2) +{ + glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); + int num_reladdr = 0, i; + + /* If we have to do relative addressing, we want to load the ARL + * reg directly for one of the regs, and preload the other reladdr + * sources into temps. + */ + num_reladdr += dst.reladdr != NULL; + num_reladdr += src0.reladdr != NULL; + num_reladdr += src1.reladdr != NULL; + num_reladdr += src2.reladdr != NULL; + + reladdr_to_temp(ir, &src2, &num_reladdr); + reladdr_to_temp(ir, &src1, &num_reladdr); + reladdr_to_temp(ir, &src0, &num_reladdr); + + if (dst.reladdr) { + emit(ir, OPCODE_ARL, address_reg, *dst.reladdr); + num_reladdr--; + } + assert(num_reladdr == 0); + + inst->op = op; + inst->dst = dst; + inst->src[0] = src0; + inst->src[1] = src1; + inst->src[2] = src2; + inst->ir = ir; + + inst->function = NULL; + + if (op == OPCODE_ARL) + this->num_address_regs = 1; + + /* Update indirect addressing status used by TGSI */ + if (dst.reladdr) { + switch(dst.file) { + case PROGRAM_TEMPORARY: + this->indirect_addr_temps = true; + break; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + this->indirect_addr_consts = true; + break; + default: + break; + } + } + else { + for (i=0; i<3; i++) { + if(inst->src[i].reladdr) { + switch(dst.file) { + case PROGRAM_TEMPORARY: + this->indirect_addr_temps = true; + break; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + this->indirect_addr_consts = true; + break; + default: + break; + } + } + } + } + + this->instructions.push_tail(inst); + + return inst; +} + + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0, st_src_reg src1) +{ + return emit(ir, op, dst, src0, src1, undef_src); +} + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0) +{ + assert(dst.writemask != 0); + return emit(ir, op, dst, src0, undef_src, undef_src); +} + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op) +{ + return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); +} + +void +glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, + st_dst_reg dst, st_src_reg src0, st_src_reg src1, + unsigned elements) +{ + static const gl_inst_opcode dot_opcodes[] = { + OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 + }; + + emit(ir, dot_opcodes[elements - 2], dst, src0, src1); +} + +/** + * Emits Mesa scalar opcodes to produce unique answers across channels. + * + * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X + * channel determines the result across all channels. So to do a vec4 + * of this operation, we want to emit a scalar per source channel used + * to produce dest channels. + */ +void +glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, + st_src_reg orig_src0, st_src_reg orig_src1) +{ + int i, j; + int done_mask = ~dst.writemask; + + /* Mesa RCP is a scalar operation splatting results to all channels, + * like ARB_fp/vp. So emit as many RCPs as necessary to cover our + * dst channels. + */ + for (i = 0; i < 4; i++) { + GLuint this_mask = (1 << i); + glsl_to_tgsi_instruction *inst; + st_src_reg src0 = orig_src0; + st_src_reg src1 = orig_src1; + + if (done_mask & this_mask) + continue; + + GLuint src0_swiz = GET_SWZ(src0.swizzle, i); + GLuint src1_swiz = GET_SWZ(src1.swizzle, i); + for (j = i + 1; j < 4; j++) { + /* If there is another enabled component in the destination that is + * derived from the same inputs, generate its value on this pass as + * well. + */ + if (!(done_mask & (1 << j)) && + GET_SWZ(src0.swizzle, j) == src0_swiz && + GET_SWZ(src1.swizzle, j) == src1_swiz) { + this_mask |= (1 << j); + } + } + src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, + src0_swiz, src0_swiz); + src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, + src1_swiz, src1_swiz); + + inst = emit(ir, op, dst, src0, src1); + inst->dst.writemask = this_mask; + done_mask |= this_mask; + } +} + +void +glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0) +{ + st_src_reg undef = undef_src; + + undef.swizzle = SWIZZLE_XXXX; + + emit_scalar(ir, op, dst, src0, undef); +} + +/** + * Emit an OPCODE_SCS instruction + * + * The \c SCS opcode functions a bit differently than the other Mesa (or + * ARB_fragment_program) opcodes. Instead of splatting its result across all + * four components of the destination, it writes one value to the \c x + * component and another value to the \c y component. + * + * \param ir IR instruction being processed + * \param op Either \c OPCODE_SIN or \c OPCODE_COS depending on which + * value is desired. + * \param dst Destination register + * \param src Source register + */ +void +glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, + const st_src_reg &src) +{ + /* Vertex programs cannot use the SCS opcode. + */ + if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { + emit_scalar(ir, op, dst, src); + return; + } + + const unsigned component = (op == OPCODE_SIN) ? 0 : 1; + const unsigned scs_mask = (1U << component); + int done_mask = ~dst.writemask; + st_src_reg tmp; + + assert(op == OPCODE_SIN || op == OPCODE_COS); + + /* If there are compnents in the destination that differ from the component + * that will be written by the SCS instrution, we'll need a temporary. + */ + if (scs_mask != unsigned(dst.writemask)) { + tmp = get_temp(glsl_type::vec4_type); + } + + for (unsigned i = 0; i < 4; i++) { + unsigned this_mask = (1U << i); + st_src_reg src0 = src; + + if ((done_mask & this_mask) != 0) + continue; + + /* The source swizzle specified which component of the source generates + * sine / cosine for the current component in the destination. The SCS + * instruction requires that this value be swizzle to the X component. + * Replace the current swizzle with a swizzle that puts the source in + * the X component. + */ + unsigned src0_swiz = GET_SWZ(src.swizzle, i); + + src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, + src0_swiz, src0_swiz); + for (unsigned j = i + 1; j < 4; j++) { + /* If there is another enabled component in the destination that is + * derived from the same inputs, generate its value on this pass as + * well. + */ + if (!(done_mask & (1 << j)) && + GET_SWZ(src0.swizzle, j) == src0_swiz) { + this_mask |= (1 << j); + } + } + + if (this_mask != scs_mask) { + glsl_to_tgsi_instruction *inst; + st_dst_reg tmp_dst = st_dst_reg(tmp); + + /* Emit the SCS instruction. + */ + inst = emit(ir, OPCODE_SCS, tmp_dst, src0); + inst->dst.writemask = scs_mask; + + /* Move the result of the SCS instruction to the desired location in + * the destination. + */ + tmp.swizzle = MAKE_SWIZZLE4(component, component, + component, component); + inst = emit(ir, OPCODE_SCS, dst, tmp); + inst->dst.writemask = this_mask; + } else { + /* Emit the SCS instruction to write directly to the destination. + */ + glsl_to_tgsi_instruction *inst = emit(ir, OPCODE_SCS, dst, src0); + inst->dst.writemask = scs_mask; + } + + done_mask |= this_mask; + } +} + +struct st_src_reg +glsl_to_tgsi_visitor::st_src_reg_for_float(float val) +{ + st_src_reg src(PROGRAM_CONSTANT, -1, NULL); + + src.index = _mesa_add_unnamed_constant(this->prog->Parameters, + &val, 1, &src.swizzle); + + return src; +} + +static int +type_size(const struct glsl_type *type) +{ + unsigned int i; + int size; + + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (type->is_matrix()) { + return type->matrix_columns; + } else { + /* Regardless of size of vector, it gets a vec4. This is bad + * packing for things like floats, but otherwise arrays become a + * mess. Hopefully a later pass over the code can pack scalars + * down if appropriate. + */ + return 1; + } + case GLSL_TYPE_ARRAY: + assert(type->length > 0); + return type_size(type->fields.array) * type->length; + case GLSL_TYPE_STRUCT: + size = 0; + for (i = 0; i < type->length; i++) { + size += type_size(type->fields.structure[i].type); + } + return size; + case GLSL_TYPE_SAMPLER: + /* Samplers take up one slot in UNIFORMS[], but they're baked in + * at link time. + */ + return 1; + default: + assert(0); + return 0; + } +} + +/** + * In the initial pass of codegen, we assign temporary numbers to + * intermediate results. (not SSA -- variable assignments will reuse + * storage). Actual register allocation for the Mesa VM occurs in a + * pass over the Mesa IR later. + */ +st_src_reg +glsl_to_tgsi_visitor::get_temp(const glsl_type *type) +{ + st_src_reg src; + int swizzle[4]; + int i; + + src.file = PROGRAM_TEMPORARY; + src.index = next_temp; + src.reladdr = NULL; + next_temp += type_size(type); + + if (type->is_array() || type->is_record()) { + src.swizzle = SWIZZLE_NOOP; + } else { + for (i = 0; i < type->vector_elements; i++) + swizzle[i] = i; + for (; i < 4; i++) + swizzle[i] = type->vector_elements - 1; + src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], + swizzle[2], swizzle[3]); + } + src.negate = 0; + + return src; +} + +variable_storage * +glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) +{ + + variable_storage *entry; + + foreach_iter(exec_list_iterator, iter, this->variables) { + entry = (variable_storage *)iter.get(); + + if (entry->var == var) + return entry; + } + + return NULL; +} + +void +glsl_to_tgsi_visitor::visit(ir_variable *ir) +{ + if (strcmp(ir->name, "gl_FragCoord") == 0) { + struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; + + fp->OriginUpperLeft = ir->origin_upper_left; + fp->PixelCenterInteger = ir->pixel_center_integer; + + } else if (strcmp(ir->name, "gl_FragDepth") == 0) { + struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; + switch (ir->depth_layout) { + case ir_depth_layout_none: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE; + break; + case ir_depth_layout_any: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY; + break; + case ir_depth_layout_greater: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER; + break; + case ir_depth_layout_less: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS; + break; + case ir_depth_layout_unchanged: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED; + break; + default: + assert(0); + break; + } + } + + if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { + unsigned int i; + const ir_state_slot *const slots = ir->state_slots; + assert(ir->state_slots != NULL); + + /* Check if this statevar's setup in the STATE file exactly + * matches how we'll want to reference it as a + * struct/array/whatever. If not, then we need to move it into + * temporary storage and hope that it'll get copy-propagated + * out. + */ + for (i = 0; i < ir->num_state_slots; i++) { + if (slots[i].swizzle != SWIZZLE_XYZW) { + break; + } + } + + struct variable_storage *storage; + st_dst_reg dst; + if (i == ir->num_state_slots) { + /* We'll set the index later. */ + storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); + this->variables.push_tail(storage); + + dst = undef_dst; + } else { + /* The variable_storage constructor allocates slots based on the size + * of the type. However, this had better match the number of state + * elements that we're going to copy into the new temporary. + */ + assert((int) ir->num_state_slots == type_size(ir->type)); + + storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, + this->next_temp); + this->variables.push_tail(storage); + this->next_temp += type_size(ir->type); + + dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, NULL)); + } + + + for (unsigned int i = 0; i < ir->num_state_slots; i++) { + int index = _mesa_add_state_reference(this->prog->Parameters, + (gl_state_index *)slots[i].tokens); + + if (storage->file == PROGRAM_STATE_VAR) { + if (storage->index == -1) { + storage->index = index; + } else { + assert(index == storage->index + (int)i); + } + } else { + st_src_reg src(PROGRAM_STATE_VAR, index, NULL); + src.swizzle = slots[i].swizzle; + emit(ir, OPCODE_MOV, dst, src); + /* even a float takes up a whole vec4 reg in a struct/array. */ + dst.index++; + } + } + + if (storage->file == PROGRAM_TEMPORARY && + dst.index != storage->index + (int) ir->num_state_slots) { + fail_link(this->shader_program, + "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", + ir->name, dst.index - storage->index, + type_size(ir->type)); + } + } +} + +void +glsl_to_tgsi_visitor::visit(ir_loop *ir) +{ + ir_dereference_variable *counter = NULL; + + if (ir->counter != NULL) + counter = new(ir) ir_dereference_variable(ir->counter); + + if (ir->from != NULL) { + assert(ir->counter != NULL); + + ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); + + a->accept(this); + delete a; + } + + emit(NULL, OPCODE_BGNLOOP); + + if (ir->to) { + ir_expression *e = + new(ir) ir_expression(ir->cmp, glsl_type::bool_type, + counter, ir->to); + ir_if *if_stmt = new(ir) ir_if(e); + + ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); + + if_stmt->then_instructions.push_tail(brk); + + if_stmt->accept(this); + + delete if_stmt; + delete e; + delete brk; + } + + visit_exec_list(&ir->body_instructions, this); + + if (ir->increment) { + ir_expression *e = + new(ir) ir_expression(ir_binop_add, counter->type, + counter, ir->increment); + + ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); + + a->accept(this); + delete a; + delete e; + } + + emit(NULL, OPCODE_ENDLOOP); +} + +void +glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) +{ + switch (ir->mode) { + case ir_loop_jump::jump_break: + emit(NULL, OPCODE_BRK); + break; + case ir_loop_jump::jump_continue: + emit(NULL, OPCODE_CONT); + break; + } +} + + +void +glsl_to_tgsi_visitor::visit(ir_function_signature *ir) +{ + assert(0); + (void)ir; +} + +void +glsl_to_tgsi_visitor::visit(ir_function *ir) +{ + /* Ignore function bodies other than main() -- we shouldn't see calls to + * them since they should all be inlined before we get to glsl_to_tgsi. + */ + if (strcmp(ir->name, "main") == 0) { + const ir_function_signature *sig; + exec_list empty; + + sig = ir->matching_signature(&empty); + + assert(sig); + + foreach_iter(exec_list_iterator, iter, sig->body) { + ir_instruction *ir = (ir_instruction *)iter.get(); + + ir->accept(this); + } + } +} + +GLboolean +glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) +{ + int nonmul_operand = 1 - mul_operand; + st_src_reg a, b, c; + + ir_expression *expr = ir->operands[mul_operand]->as_expression(); + if (!expr || expr->operation != ir_binop_mul) + return false; + + expr->operands[0]->accept(this); + a = this->result; + expr->operands[1]->accept(this); + b = this->result; + ir->operands[nonmul_operand]->accept(this); + c = this->result; + + this->result = get_temp(ir->type); + emit(ir, OPCODE_MAD, st_dst_reg(this->result), a, b, c); + + return true; +} + +GLboolean +glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) +{ + /* Saturates were only introduced to vertex programs in + * NV_vertex_program3, so don't give them to drivers in the VP. + */ + if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) + return false; + + ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); + if (!sat_src) + return false; + + sat_src->accept(this); + st_src_reg src = this->result; + + this->result = get_temp(ir->type); + glsl_to_tgsi_instruction *inst; + inst = emit(ir, OPCODE_MOV, st_dst_reg(this->result), src); + inst->saturate = true; + + return true; +} + +void +glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, + st_src_reg *reg, int *num_reladdr) +{ + if (!reg->reladdr) + return; + + emit(ir, OPCODE_ARL, address_reg, *reg->reladdr); + + if (*num_reladdr != 1) { + st_src_reg temp = get_temp(glsl_type::vec4_type); + + emit(ir, OPCODE_MOV, st_dst_reg(temp), *reg); + *reg = temp; + } + + (*num_reladdr)--; +} + +void +glsl_to_tgsi_visitor::emit_swz(ir_expression *ir) +{ + /* Assume that the vector operator is in a form compatible with OPCODE_SWZ. + * This means that each of the operands is either an immediate value of -1, + * 0, or 1, or is a component from one source register (possibly with + * negation). + */ + uint8_t components[4] = { 0 }; + bool negate[4] = { false }; + ir_variable *var = NULL; + + for (unsigned i = 0; i < ir->type->vector_elements; i++) { + ir_rvalue *op = ir->operands[i]; + + assert(op->type->is_scalar()); + + while (op != NULL) { + switch (op->ir_type) { + case ir_type_constant: { + + assert(op->type->is_scalar()); + + const ir_constant *const c = op->as_constant(); + if (c->is_one()) { + components[i] = SWIZZLE_ONE; + } else if (c->is_zero()) { + components[i] = SWIZZLE_ZERO; + } else if (c->is_negative_one()) { + components[i] = SWIZZLE_ONE; + negate[i] = true; + } else { + assert(!"SWZ constant must be 0.0 or 1.0."); + } + + op = NULL; + break; + } + + case ir_type_dereference_variable: { + ir_dereference_variable *const deref = + (ir_dereference_variable *) op; + + assert((var == NULL) || (deref->var == var)); + components[i] = SWIZZLE_X; + var = deref->var; + op = NULL; + break; + } + + case ir_type_expression: { + ir_expression *const expr = (ir_expression *) op; + + assert(expr->operation == ir_unop_neg); + negate[i] = true; + + op = expr->operands[0]; + break; + } + + case ir_type_swizzle: { + ir_swizzle *const swiz = (ir_swizzle *) op; + + components[i] = swiz->mask.x; + op = swiz->val; + break; + } + + default: + assert(!"Should not get here."); + return; + } + } + } + + assert(var != NULL); + + ir_dereference_variable *const deref = + new(mem_ctx) ir_dereference_variable(var); + + this->result.file = PROGRAM_UNDEFINED; + deref->accept(this); + if (this->result.file == PROGRAM_UNDEFINED) { + ir_print_visitor v; + printf("Failed to get tree for expression operand:\n"); + deref->accept(&v); + exit(1); + } + + st_src_reg src; + + src = this->result; + src.swizzle = MAKE_SWIZZLE4(components[0], + components[1], + components[2], + components[3]); + src.negate = ((unsigned(negate[0]) << 0) + | (unsigned(negate[1]) << 1) + | (unsigned(negate[2]) << 2) + | (unsigned(negate[3]) << 3)); + + /* Storage for our result. Ideally for an assignment we'd be using the + * actual storage for the result here, instead. + */ + const st_src_reg result_src = get_temp(ir->type); + st_dst_reg result_dst = st_dst_reg(result_src); + + /* Limit writes to the channels that will be used by result_src later. + * This does limit this temp's use as a temporary for multi-instruction + * sequences. + */ + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + + emit(ir, OPCODE_SWZ, result_dst, src); + this->result = result_src; +} + +void +glsl_to_tgsi_visitor::visit(ir_expression *ir) +{ + unsigned int operand; + st_src_reg op[Elements(ir->operands)]; + st_src_reg result_src; + st_dst_reg result_dst; + + /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c) + */ + if (ir->operation == ir_binop_add) { + if (try_emit_mad(ir, 1)) + return; + if (try_emit_mad(ir, 0)) + return; + } + if (try_emit_sat(ir)) + return; + + if (ir->operation == ir_quadop_vector) { + this->emit_swz(ir); + return; + } + + for (operand = 0; operand < ir->get_num_operands(); operand++) { + this->result.file = PROGRAM_UNDEFINED; + ir->operands[operand]->accept(this); + if (this->result.file == PROGRAM_UNDEFINED) { + ir_print_visitor v; + printf("Failed to get tree for expression operand:\n"); + ir->operands[operand]->accept(&v); + exit(1); + } + op[operand] = this->result; + + /* Matrix expression operands should have been broken down to vector + * operations already. + */ + assert(!ir->operands[operand]->type->is_matrix()); + } + + int vector_elements = ir->operands[0]->type->vector_elements; + if (ir->operands[1]) { + vector_elements = MAX2(vector_elements, + ir->operands[1]->type->vector_elements); + } + + this->result.file = PROGRAM_UNDEFINED; + + /* Storage for our result. Ideally for an assignment we'd be using + * the actual storage for the result here, instead. + */ + result_src = get_temp(ir->type); + /* convenience for the emit functions below. */ + result_dst = st_dst_reg(result_src); + /* Limit writes to the channels that will be used by result_src later. + * This does limit this temp's use as a temporary for multi-instruction + * sequences. + */ + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + + switch (ir->operation) { + case ir_unop_logic_not: + emit(ir, OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0)); + break; + case ir_unop_neg: + op[0].negate = ~op[0].negate; + result_src = op[0]; + break; + case ir_unop_abs: + emit(ir, OPCODE_ABS, result_dst, op[0]); + break; + case ir_unop_sign: + emit(ir, OPCODE_SSG, result_dst, op[0]); + break; + case ir_unop_rcp: + emit_scalar(ir, OPCODE_RCP, result_dst, op[0]); + break; + + case ir_unop_exp2: + emit_scalar(ir, OPCODE_EX2, result_dst, op[0]); + break; + case ir_unop_exp: + case ir_unop_log: + assert(!"not reached: should be handled by ir_explog_to_explog2"); + break; + case ir_unop_log2: + emit_scalar(ir, OPCODE_LG2, result_dst, op[0]); + break; + case ir_unop_sin: + emit_scalar(ir, OPCODE_SIN, result_dst, op[0]); + break; + case ir_unop_cos: + emit_scalar(ir, OPCODE_COS, result_dst, op[0]); + break; + case ir_unop_sin_reduced: + emit_scs(ir, OPCODE_SIN, result_dst, op[0]); + break; + case ir_unop_cos_reduced: + emit_scs(ir, OPCODE_COS, result_dst, op[0]); + break; + + case ir_unop_dFdx: + emit(ir, OPCODE_DDX, result_dst, op[0]); + break; + case ir_unop_dFdy: + emit(ir, OPCODE_DDY, result_dst, op[0]); + break; + + case ir_unop_noise: { + const enum prog_opcode opcode = + prog_opcode(OPCODE_NOISE1 + + (ir->operands[0]->type->vector_elements) - 1); + assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4)); + + emit(ir, opcode, result_dst, op[0]); + break; + } + + case ir_binop_add: + emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); + break; + case ir_binop_sub: + emit(ir, OPCODE_SUB, result_dst, op[0], op[1]); + break; + + case ir_binop_mul: + emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); + break; + case ir_binop_div: + assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + case ir_binop_mod: + assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + break; + + case ir_binop_less: + emit(ir, OPCODE_SLT, result_dst, op[0], op[1]); + break; + case ir_binop_greater: + emit(ir, OPCODE_SGT, result_dst, op[0], op[1]); + break; + case ir_binop_lequal: + emit(ir, OPCODE_SLE, result_dst, op[0], op[1]); + break; + case ir_binop_gequal: + emit(ir, OPCODE_SGE, result_dst, op[0], op[1]); + break; + case ir_binop_equal: + emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); + break; + case ir_binop_nequal: + emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); + break; + case ir_binop_all_equal: + /* "==" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + st_src_reg temp = get_temp(glsl_type::vec4_type); + emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + emit_dp(ir, result_dst, temp, temp, vector_elements); + emit(ir, OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0)); + } else { + emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); + } + break; + case ir_binop_any_nequal: + /* "!=" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + st_src_reg temp = get_temp(glsl_type::vec4_type); + emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + emit_dp(ir, result_dst, temp, temp, vector_elements); + emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + } else { + emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); + } + break; + + case ir_unop_any: + assert(ir->operands[0]->type->is_vector()); + emit_dp(ir, result_dst, op[0], op[0], + ir->operands[0]->type->vector_elements); + emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + break; + + case ir_binop_logic_xor: + emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); + break; + + case ir_binop_logic_or: + /* This could be a saturated add and skip the SNE. */ + emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); + emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + break; + + case ir_binop_logic_and: + /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ + emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); + break; + + case ir_binop_dot: + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[0]->type == ir->operands[1]->type); + emit_dp(ir, result_dst, op[0], op[1], + ir->operands[0]->type->vector_elements); + break; + + case ir_unop_sqrt: + /* sqrt(x) = x * rsq(x). */ + emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); + emit(ir, OPCODE_MUL, result_dst, result_src, op[0]); + /* For incoming channels <= 0, set the result to 0. */ + op[0].negate = ~op[0].negate; + emit(ir, OPCODE_CMP, result_dst, + op[0], result_src, st_src_reg_for_float(0.0)); + break; + case ir_unop_rsq: + emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); + break; + case ir_unop_i2f: + case ir_unop_b2f: + case ir_unop_b2i: + /* Mesa IR lacks types, ints are stored as truncated floats. */ + result_src = op[0]; + break; + case ir_unop_f2i: + emit(ir, OPCODE_TRUNC, result_dst, op[0]); + break; + case ir_unop_f2b: + case ir_unop_i2b: + emit(ir, OPCODE_SNE, result_dst, + op[0], st_src_reg_for_float(0.0)); + break; + case ir_unop_trunc: + emit(ir, OPCODE_TRUNC, result_dst, op[0]); + break; + case ir_unop_ceil: + op[0].negate = ~op[0].negate; + emit(ir, OPCODE_FLR, result_dst, op[0]); + result_src.negate = ~result_src.negate; + break; + case ir_unop_floor: + emit(ir, OPCODE_FLR, result_dst, op[0]); + break; + case ir_unop_fract: + emit(ir, OPCODE_FRC, result_dst, op[0]); + break; + + case ir_binop_min: + emit(ir, OPCODE_MIN, result_dst, op[0], op[1]); + break; + case ir_binop_max: + emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); + break; + case ir_binop_pow: + emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]); + break; + + case ir_unop_bit_not: + case ir_unop_u2f: + case ir_binop_lshift: + case ir_binop_rshift: + case ir_binop_bit_and: + case ir_binop_bit_xor: + case ir_binop_bit_or: + case ir_unop_round_even: + assert(!"GLSL 1.30 features unsupported"); + break; + + case ir_quadop_vector: + /* This operation should have already been handled. + */ + assert(!"Should not get here."); + break; + } + + this->result = result_src; +} + + +void +glsl_to_tgsi_visitor::visit(ir_swizzle *ir) +{ + st_src_reg src; + int i; + int swizzle[4]; + + /* Note that this is only swizzles in expressions, not those on the left + * hand side of an assignment, which do write masking. See ir_assignment + * for that. + */ + + ir->val->accept(this); + src = this->result; + assert(src.file != PROGRAM_UNDEFINED); + + for (i = 0; i < 4; i++) { + if (i < ir->type->vector_elements) { + switch (i) { + case 0: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); + break; + case 1: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); + break; + case 2: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); + break; + case 3: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); + break; + } + } else { + /* If the type is smaller than a vec4, replicate the last + * channel out. + */ + swizzle[i] = swizzle[ir->type->vector_elements - 1]; + } + } + + src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); + + this->result = src; +} + +void +glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) +{ + variable_storage *entry = find_variable_storage(ir->var); + ir_variable *var = ir->var; + + if (!entry) { + switch (var->mode) { + case ir_var_uniform: + entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, + var->location); + this->variables.push_tail(entry); + break; + case ir_var_in: + case ir_var_inout: + /* The linker assigns locations for varyings and attributes, + * including deprecated builtins (like gl_Color), user-assign + * generic attributes (glBindVertexLocation), and + * user-defined varyings. + * + * FINISHME: We would hit this path for function arguments. Fix! + */ + assert(var->location != -1); + entry = new(mem_ctx) variable_storage(var, + PROGRAM_INPUT, + var->location); + if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && + var->location >= VERT_ATTRIB_GENERIC0) { + _mesa_add_attribute(this->prog->Attributes, + var->name, + _mesa_sizeof_glsl_type(var->type->gl_type), + var->type->gl_type, + var->location - VERT_ATTRIB_GENERIC0); + } + break; + case ir_var_out: + assert(var->location != -1); + entry = new(mem_ctx) variable_storage(var, + PROGRAM_OUTPUT, + var->location); + break; + case ir_var_system_value: + entry = new(mem_ctx) variable_storage(var, + PROGRAM_SYSTEM_VALUE, + var->location); + break; + case ir_var_auto: + case ir_var_temporary: + entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, + this->next_temp); + this->variables.push_tail(entry); + + next_temp += type_size(var->type); + break; + } + + if (!entry) { + printf("Failed to make storage for %s\n", var->name); + exit(1); + } + } + + this->result = st_src_reg(entry->file, entry->index, var->type); +} + +void +glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) +{ + ir_constant *index; + st_src_reg src; + int element_size = type_size(ir->type); + + index = ir->array_index->constant_expression_value(); + + ir->array->accept(this); + src = this->result; + + if (index) { + src.index += index->value.i[0] * element_size; + } else { + st_src_reg array_base = this->result; + /* Variable index array dereference. It eats the "vec4" of the + * base of the array and an index that offsets the Mesa register + * index. + */ + ir->array_index->accept(this); + + st_src_reg index_reg; + + if (element_size == 1) { + index_reg = this->result; + } else { + index_reg = get_temp(glsl_type::float_type); + + emit(ir, OPCODE_MUL, st_dst_reg(index_reg), + this->result, st_src_reg_for_float(element_size)); + } + + src.reladdr = ralloc(mem_ctx, st_src_reg); + memcpy(src.reladdr, &index_reg, sizeof(index_reg)); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + src.swizzle = swizzle_for_size(ir->type->vector_elements); + else + src.swizzle = SWIZZLE_NOOP; + + this->result = src; +} + +void +glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) +{ + unsigned int i; + const glsl_type *struct_type = ir->record->type; + int offset = 0; + + ir->record->accept(this); + + for (i = 0; i < struct_type->length; i++) { + if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) + break; + offset += type_size(struct_type->fields.structure[i].type); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + this->result.swizzle = swizzle_for_size(ir->type->vector_elements); + else + this->result.swizzle = SWIZZLE_NOOP; + + this->result.index += offset; +} + +/** + * We want to be careful in assignment setup to hit the actual storage + * instead of potentially using a temporary like we might with the + * ir_dereference handler. + */ +static st_dst_reg +get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v) +{ + /* The LHS must be a dereference. If the LHS is a variable indexed array + * access of a vector, it must be separated into a series conditional moves + * before reaching this point (see ir_vec_index_to_cond_assign). + */ + assert(ir->as_dereference()); + ir_dereference_array *deref_array = ir->as_dereference_array(); + if (deref_array) { + assert(!deref_array->array->type->is_vector()); + } + + /* Use the rvalue deref handler for the most part. We'll ignore + * swizzles in it and write swizzles using writemask, though. + */ + ir->accept(v); + return st_dst_reg(v->result); +} + +/** + * Process the condition of a conditional assignment + * + * Examines the condition of a conditional assignment to generate the optimal + * first operand of a \c CMP instruction. If the condition is a relational + * operator with 0 (e.g., \c ir_binop_less), the value being compared will be + * used as the source for the \c CMP instruction. Otherwise the comparison + * is processed to a boolean result, and the boolean result is used as the + * operand to the CMP instruction. + */ +bool +glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) +{ + ir_rvalue *src_ir = ir; + bool negate = true; + bool switch_order = false; + + ir_expression *const expr = ir->as_expression(); + if ((expr != NULL) && (expr->get_num_operands() == 2)) { + bool zero_on_left = false; + + if (expr->operands[0]->is_zero()) { + src_ir = expr->operands[1]; + zero_on_left = true; + } else if (expr->operands[1]->is_zero()) { + src_ir = expr->operands[0]; + zero_on_left = false; + } + + /* a is - 0 + - 0 + + * (a < 0) T F F ( a < 0) T F F + * (0 < a) F F T (-a < 0) F F T + * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) + * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) + * (a > 0) F F T (-a < 0) F F T + * (0 > a) T F F ( a < 0) T F F + * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) + * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) + * + * Note that exchanging the order of 0 and 'a' in the comparison simply + * means that the value of 'a' should be negated. + */ + if (src_ir != ir) { + switch (expr->operation) { + case ir_binop_less: + switch_order = false; + negate = zero_on_left; + break; + + case ir_binop_greater: + switch_order = false; + negate = !zero_on_left; + break; + + case ir_binop_lequal: + switch_order = true; + negate = !zero_on_left; + break; + + case ir_binop_gequal: + switch_order = true; + negate = zero_on_left; + break; + + default: + /* This isn't the right kind of comparison afterall, so make sure + * the whole condition is visited. + */ + src_ir = ir; + break; + } + } + } + + src_ir->accept(this); + + /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the + * condition we produced is 0.0 or 1.0. By flipping the sign, we can + * choose which value OPCODE_CMP produces without an extra instruction + * computing the condition. + */ + if (negate) + this->result.negate = ~this->result.negate; + + return switch_order; +} + +void +glsl_to_tgsi_visitor::visit(ir_assignment *ir) +{ + st_dst_reg l; + st_src_reg r; + int i; + + ir->rhs->accept(this); + r = this->result; + + l = get_assignment_lhs(ir->lhs, this); + + /* FINISHME: This should really set to the correct maximal writemask for each + * FINISHME: component written (in the loops below). This case can only + * FINISHME: occur for matrices, arrays, and structures. + */ + if (ir->write_mask == 0) { + assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); + l.writemask = WRITEMASK_XYZW; + } else if (ir->lhs->type->is_scalar()) { + /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the + * FINISHME: W component of fragment shader output zero, work correctly. + */ + l.writemask = WRITEMASK_XYZW; + } else { + int swizzles[4]; + int first_enabled_chan = 0; + int rhs_chan = 0; + + assert(ir->lhs->type->is_vector()); + l.writemask = ir->write_mask; + + for (int i = 0; i < 4; i++) { + if (l.writemask & (1 << i)) { + first_enabled_chan = GET_SWZ(r.swizzle, i); + break; + } + } + + /* Swizzle a small RHS vector into the channels being written. + * + * glsl ir treats write_mask as dictating how many channels are + * present on the RHS while Mesa IR treats write_mask as just + * showing which channels of the vec4 RHS get written. + */ + for (int i = 0; i < 4; i++) { + if (l.writemask & (1 << i)) + swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); + else + swizzles[i] = first_enabled_chan; + } + r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], + swizzles[2], swizzles[3]); + } + + assert(l.file != PROGRAM_UNDEFINED); + assert(r.file != PROGRAM_UNDEFINED); + + if (ir->condition) { + const bool switch_order = this->process_move_condition(ir->condition); + st_src_reg condition = this->result; + + for (i = 0; i < type_size(ir->lhs->type); i++) { + if (switch_order) { + emit(ir, OPCODE_CMP, l, condition, st_src_reg(l), r); + } else { + emit(ir, OPCODE_CMP, l, condition, r, st_src_reg(l)); + } + + l.index++; + r.index++; + } + } else { + for (i = 0; i < type_size(ir->lhs->type); i++) { + emit(ir, OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } +} + + +void +glsl_to_tgsi_visitor::visit(ir_constant *ir) +{ + st_src_reg src; + GLfloat stack_vals[4] = { 0 }; + GLfloat *values = stack_vals; + unsigned int i; + + /* Unfortunately, 4 floats is all we can get into + * _mesa_add_unnamed_constant. So, make a temp to store an + * aggregate constant and move each constant value into it. If we + * get lucky, copy propagation will eliminate the extra moves. + */ + + if (ir->type->base_type == GLSL_TYPE_STRUCT) { + st_src_reg temp_base = get_temp(ir->type); + st_dst_reg temp = st_dst_reg(temp_base); + + foreach_iter(exec_list_iterator, iter, ir->components) { + ir_constant *field_value = (ir_constant *)iter.get(); + int size = type_size(field_value->type); + + assert(size > 0); + + field_value->accept(this); + src = this->result; + + for (i = 0; i < (unsigned int)size; i++) { + emit(ir, OPCODE_MOV, temp, src); + + src.index++; + temp.index++; + } + } + this->result = temp_base; + return; + } + + if (ir->type->is_array()) { + st_src_reg temp_base = get_temp(ir->type); + st_dst_reg temp = st_dst_reg(temp_base); + int size = type_size(ir->type->fields.array); + + assert(size > 0); + + for (i = 0; i < ir->type->length; i++) { + ir->array_elements[i]->accept(this); + src = this->result; + for (int j = 0; j < size; j++) { + emit(ir, OPCODE_MOV, temp, src); + + src.index++; + temp.index++; + } + } + this->result = temp_base; + return; + } + + if (ir->type->is_matrix()) { + st_src_reg mat = get_temp(ir->type); + st_dst_reg mat_column = st_dst_reg(mat); + + for (i = 0; i < ir->type->matrix_columns; i++) { + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + values = &ir->value.f[i * ir->type->vector_elements]; + + src = st_src_reg(PROGRAM_CONSTANT, -1, NULL); + src.index = _mesa_add_unnamed_constant(this->prog->Parameters, + values, + ir->type->vector_elements, + &src.swizzle); + emit(ir, OPCODE_MOV, mat_column, src); + + mat_column.index++; + } + + this->result = mat; + return; + } + + src.file = PROGRAM_CONSTANT; + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + values = &ir->value.f[0]; + break; + case GLSL_TYPE_UINT: + for (i = 0; i < ir->type->vector_elements; i++) { + values[i] = ir->value.u[i]; + } + break; + case GLSL_TYPE_INT: + for (i = 0; i < ir->type->vector_elements; i++) { + values[i] = ir->value.i[i]; + } + break; + case GLSL_TYPE_BOOL: + for (i = 0; i < ir->type->vector_elements; i++) { + values[i] = ir->value.b[i]; + } + break; + default: + assert(!"Non-float/uint/int/bool constant"); + } + + this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type); + this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, + values, + ir->type->vector_elements, + &this->result.swizzle); +} + +function_entry * +glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) +{ + function_entry *entry; + + foreach_iter(exec_list_iterator, iter, this->function_signatures) { + entry = (function_entry *)iter.get(); + + if (entry->sig == sig) + return entry; + } + + entry = ralloc(mem_ctx, function_entry); + entry->sig = sig; + entry->sig_id = this->next_signature_id++; + entry->bgn_inst = NULL; + + /* Allocate storage for all the parameters. */ + foreach_iter(exec_list_iterator, iter, sig->parameters) { + ir_variable *param = (ir_variable *)iter.get(); + variable_storage *storage; + + storage = find_variable_storage(param); + assert(!storage); + + storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, + this->next_temp); + this->variables.push_tail(storage); + + this->next_temp += type_size(param->type); + } + + if (!sig->return_type->is_void()) { + entry->return_reg = get_temp(sig->return_type); + } else { + entry->return_reg = undef_src; + } + + this->function_signatures.push_tail(entry); + return entry; +} + +void +glsl_to_tgsi_visitor::visit(ir_call *ir) +{ + glsl_to_tgsi_instruction *call_inst; + ir_function_signature *sig = ir->get_callee(); + function_entry *entry = get_function_signature(sig); + int i; + + /* Process in parameters. */ + exec_list_iterator sig_iter = sig->parameters.iterator(); + foreach_iter(exec_list_iterator, iter, *ir) { + ir_rvalue *param_rval = (ir_rvalue *)iter.get(); + ir_variable *param = (ir_variable *)sig_iter.get(); + + if (param->mode == ir_var_in || + param->mode == ir_var_inout) { + variable_storage *storage = find_variable_storage(param); + assert(storage); + + param_rval->accept(this); + st_src_reg r = this->result; + + st_dst_reg l; + l.file = storage->file; + l.index = storage->index; + l.reladdr = NULL; + l.writemask = WRITEMASK_XYZW; + l.cond_mask = COND_TR; + + for (i = 0; i < type_size(param->type); i++) { + emit(ir, OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } + + sig_iter.next(); + } + assert(!sig_iter.has_next()); + + /* Emit call instruction */ + call_inst = emit(ir, OPCODE_CAL); + call_inst->function = entry; + + /* Process out parameters. */ + sig_iter = sig->parameters.iterator(); + foreach_iter(exec_list_iterator, iter, *ir) { + ir_rvalue *param_rval = (ir_rvalue *)iter.get(); + ir_variable *param = (ir_variable *)sig_iter.get(); + + if (param->mode == ir_var_out || + param->mode == ir_var_inout) { + variable_storage *storage = find_variable_storage(param); + assert(storage); + + st_src_reg r; + r.file = storage->file; + r.index = storage->index; + r.reladdr = NULL; + r.swizzle = SWIZZLE_NOOP; + r.negate = 0; + + param_rval->accept(this); + st_dst_reg l = st_dst_reg(this->result); + + for (i = 0; i < type_size(param->type); i++) { + emit(ir, OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } + + sig_iter.next(); + } + assert(!sig_iter.has_next()); + + /* Process return value. */ + this->result = entry->return_reg; +} + +void +glsl_to_tgsi_visitor::visit(ir_texture *ir) +{ + st_src_reg result_src, coord, lod_info, projector, dx, dy; + st_dst_reg result_dst, coord_dst; + glsl_to_tgsi_instruction *inst = NULL; + prog_opcode opcode = OPCODE_NOP; + + ir->coordinate->accept(this); + + /* Put our coords in a temp. We'll need to modify them for shadow, + * projection, or LOD, so the only case we'd use it as is is if + * we're doing plain old texturing. Mesa IR optimization should + * handle cleaning up our mess in that case. + */ + coord = get_temp(glsl_type::vec4_type); + coord_dst = st_dst_reg(coord); + emit(ir, OPCODE_MOV, coord_dst, this->result); + + if (ir->projector) { + ir->projector->accept(this); + projector = this->result; + } + + /* Storage for our result. Ideally for an assignment we'd be using + * the actual storage for the result here, instead. + */ + result_src = get_temp(glsl_type::vec4_type); + result_dst = st_dst_reg(result_src); + + switch (ir->op) { + case ir_tex: + opcode = OPCODE_TEX; + break; + case ir_txb: + opcode = OPCODE_TXB; + ir->lod_info.bias->accept(this); + lod_info = this->result; + break; + case ir_txl: + opcode = OPCODE_TXL; + ir->lod_info.lod->accept(this); + lod_info = this->result; + break; + case ir_txd: + opcode = OPCODE_TXD; + ir->lod_info.grad.dPdx->accept(this); + dx = this->result; + ir->lod_info.grad.dPdy->accept(this); + dy = this->result; + break; + case ir_txf: // TODO: use TGSI_OPCODE_TXF here + assert(!"GLSL 1.30 features unsupported"); + break; + } + + if (ir->projector) { + if (opcode == OPCODE_TEX) { + /* Slot the projector in as the last component of the coord. */ + coord_dst.writemask = WRITEMASK_W; + emit(ir, OPCODE_MOV, coord_dst, projector); + coord_dst.writemask = WRITEMASK_XYZW; + opcode = OPCODE_TXP; + } else { + st_src_reg coord_w = coord; + coord_w.swizzle = SWIZZLE_WWWW; + + /* For the other TEX opcodes there's no projective version + * since the last slot is taken up by lod info. Do the + * projective divide now. + */ + coord_dst.writemask = WRITEMASK_W; + emit(ir, OPCODE_RCP, coord_dst, projector); + + /* In the case where we have to project the coordinates "by hand," + * the shadow comparitor value must also be projected. + */ + st_src_reg tmp_src = coord; + if (ir->shadow_comparitor) { + /* Slot the shadow value in as the second to last component of the + * coord. + */ + ir->shadow_comparitor->accept(this); + + tmp_src = get_temp(glsl_type::vec4_type); + st_dst_reg tmp_dst = st_dst_reg(tmp_src); + + tmp_dst.writemask = WRITEMASK_Z; + emit(ir, OPCODE_MOV, tmp_dst, this->result); + + tmp_dst.writemask = WRITEMASK_XY; + emit(ir, OPCODE_MOV, tmp_dst, coord); + } + + coord_dst.writemask = WRITEMASK_XYZ; + emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w); + + coord_dst.writemask = WRITEMASK_XYZW; + coord.swizzle = SWIZZLE_XYZW; + } + } + + /* If projection is done and the opcode is not OPCODE_TXP, then the shadow + * comparitor was put in the correct place (and projected) by the code, + * above, that handles by-hand projection. + */ + if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) { + /* Slot the shadow value in as the second to last component of the + * coord. + */ + ir->shadow_comparitor->accept(this); + coord_dst.writemask = WRITEMASK_Z; + emit(ir, OPCODE_MOV, coord_dst, this->result); + coord_dst.writemask = WRITEMASK_XYZW; + } + + if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) { + /* Mesa IR stores lod or lod bias in the last channel of the coords. */ + coord_dst.writemask = WRITEMASK_W; + emit(ir, OPCODE_MOV, coord_dst, lod_info); + coord_dst.writemask = WRITEMASK_XYZW; + } + + if (opcode == OPCODE_TXD) + inst = emit(ir, opcode, result_dst, coord, dx, dy); + else + inst = emit(ir, opcode, result_dst, coord); + + if (ir->shadow_comparitor) + inst->tex_shadow = GL_TRUE; + + inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, + this->shader_program, + this->prog); + + const glsl_type *sampler_type = ir->sampler->type; + + switch (sampler_type->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: + inst->tex_target = (sampler_type->sampler_array) + ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; + break; + case GLSL_SAMPLER_DIM_2D: + inst->tex_target = (sampler_type->sampler_array) + ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; + break; + case GLSL_SAMPLER_DIM_3D: + inst->tex_target = TEXTURE_3D_INDEX; + break; + case GLSL_SAMPLER_DIM_CUBE: + inst->tex_target = TEXTURE_CUBE_INDEX; + break; + case GLSL_SAMPLER_DIM_RECT: + inst->tex_target = TEXTURE_RECT_INDEX; + break; + case GLSL_SAMPLER_DIM_BUF: + assert(!"FINISHME: Implement ARB_texture_buffer_object"); + break; + default: + assert(!"Should not get here."); + } + + this->result = result_src; +} + +void +glsl_to_tgsi_visitor::visit(ir_return *ir) +{ + if (ir->get_value()) { + st_dst_reg l; + int i; + + assert(current_function); + + ir->get_value()->accept(this); + st_src_reg r = this->result; + + l = st_dst_reg(current_function->return_reg); + + for (i = 0; i < type_size(current_function->sig->return_type); i++) { + emit(ir, OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } + + emit(ir, OPCODE_RET); +} + +void +glsl_to_tgsi_visitor::visit(ir_discard *ir) +{ + struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; + + if (ir->condition) { + ir->condition->accept(this); + this->result.negate = ~this->result.negate; + emit(ir, OPCODE_KIL, undef_dst, this->result); + } else { + emit(ir, OPCODE_KIL_NV); + } + + fp->UsesKill = GL_TRUE; +} + +void +glsl_to_tgsi_visitor::visit(ir_if *ir) +{ + glsl_to_tgsi_instruction *cond_inst, *if_inst, *else_inst = NULL; + glsl_to_tgsi_instruction *prev_inst; + + prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + + ir->condition->accept(this); + assert(this->result.file != PROGRAM_UNDEFINED); + + if (this->options->EmitCondCodes) { + cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + + /* See if we actually generated any instruction for generating + * the condition. If not, then cook up a move to a temp so we + * have something to set cond_update on. + */ + if (cond_inst == prev_inst) { + st_src_reg temp = get_temp(glsl_type::bool_type); + cond_inst = emit(ir->condition, OPCODE_MOV, st_dst_reg(temp), result); + } + cond_inst->cond_update = GL_TRUE; + + if_inst = emit(ir->condition, OPCODE_IF); + if_inst->dst.cond_mask = COND_NE; + } else { + if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result); + } + + this->instructions.push_tail(if_inst); + + visit_exec_list(&ir->then_instructions, this); + + if (!ir->else_instructions.is_empty()) { + else_inst = emit(ir->condition, OPCODE_ELSE); + visit_exec_list(&ir->else_instructions, this); + } + + if_inst = emit(ir->condition, OPCODE_ENDIF); +} + +glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() +{ + result.file = PROGRAM_UNDEFINED; + next_temp = 1; + next_signature_id = 1; + current_function = NULL; + num_address_regs = 0; + indirect_addr_temps = false; + indirect_addr_consts = false; + mem_ctx = ralloc_context(NULL); +} + +glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() +{ + ralloc_free(mem_ctx); +} + +extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) +{ + delete v; +} + +static struct prog_src_register +mesa_st_src_reg_from_ir_st_src_reg(st_src_reg reg) +{ + struct prog_src_register mesa_reg; + + mesa_reg.File = reg.file; + assert(reg.index < (1 << INST_INDEX_BITS)); + mesa_reg.Index = reg.index; + mesa_reg.Swizzle = reg.swizzle; + mesa_reg.RelAddr = reg.reladdr != NULL; + mesa_reg.Negate = reg.negate; + mesa_reg.Abs = 0; + mesa_reg.HasIndex2 = GL_FALSE; + mesa_reg.RelAddr2 = 0; + mesa_reg.Index2 = 0; + + return mesa_reg; +} + +static void +set_branchtargets(glsl_to_tgsi_visitor *v, + struct prog_instruction *mesa_instructions, + int num_instructions) +{ + int if_count = 0, loop_count = 0; + int *if_stack, *loop_stack; + int if_stack_pos = 0, loop_stack_pos = 0; + int i, j; + + for (i = 0; i < num_instructions; i++) { + switch (mesa_instructions[i].Opcode) { + case OPCODE_IF: + if_count++; + break; + case OPCODE_BGNLOOP: + loop_count++; + break; + case OPCODE_BRK: + case OPCODE_CONT: + mesa_instructions[i].BranchTarget = -1; + break; + default: + break; + } + } + + if_stack = rzalloc_array(v->mem_ctx, int, if_count); + loop_stack = rzalloc_array(v->mem_ctx, int, loop_count); + + for (i = 0; i < num_instructions; i++) { + switch (mesa_instructions[i].Opcode) { + case OPCODE_IF: + if_stack[if_stack_pos] = i; + if_stack_pos++; + break; + case OPCODE_ELSE: + mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; + if_stack[if_stack_pos - 1] = i; + break; + case OPCODE_ENDIF: + mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; + if_stack_pos--; + break; + case OPCODE_BGNLOOP: + loop_stack[loop_stack_pos] = i; + loop_stack_pos++; + break; + case OPCODE_ENDLOOP: + loop_stack_pos--; + /* Rewrite any breaks/conts at this nesting level (haven't + * already had a BranchTarget assigned) to point to the end + * of the loop. + */ + for (j = loop_stack[loop_stack_pos]; j < i; j++) { + if (mesa_instructions[j].Opcode == OPCODE_BRK || + mesa_instructions[j].Opcode == OPCODE_CONT) { + if (mesa_instructions[j].BranchTarget == -1) { + mesa_instructions[j].BranchTarget = i; + } + } + } + /* The loop ends point at each other. */ + mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos]; + mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i; + break; + case OPCODE_CAL: + foreach_iter(exec_list_iterator, iter, v->function_signatures) { + function_entry *entry = (function_entry *)iter.get(); + + if (entry->sig_id == mesa_instructions[i].BranchTarget) { + mesa_instructions[i].BranchTarget = entry->inst; + break; + } + } + break; + default: + break; + } + } +} + +static void +print_program(struct prog_instruction *mesa_instructions, + ir_instruction **mesa_instruction_annotation, + int num_instructions) +{ + /*ir_instruction *last_ir = NULL;*/ + int i; + int indent = 0; + + for (i = 0; i < num_instructions; i++) { + struct prog_instruction *mesa_inst = mesa_instructions + i; + + fprintf(stdout, "%3d: ", i); + +#if 0 +/* Disable this for now, since printing GLSL IR along with its corresponding + * Mesa IR makes the Mesa IR unreadable. */ + ir_instruction *ir = mesa_instruction_annotation[i]; + if (last_ir != ir && ir) { + int j; + + for (j = 0; j < indent; j++) { + fprintf(stdout, " "); + } + ir->print(); + printf("\n"); + last_ir = ir; + + fprintf(stdout, " "); /* line number spacing. */ + } +#endif + + indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent, + PROG_PRINT_DEBUG, NULL); + } +} + + +/** + * Count resources used by the given gpu program (number of texture + * samplers, etc). + */ +static void +count_resources(struct gl_program *prog) +{ + unsigned int i; + + prog->SamplersUsed = 0; + + for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *inst = &prog->Instructions[i]; + + if (_mesa_is_tex_instruction(inst->Opcode)) { + prog->SamplerTargets[inst->TexSrcUnit] = + (gl_texture_index)inst->TexSrcTarget; + prog->SamplersUsed |= 1 << inst->TexSrcUnit; + if (inst->TexShadow) { + prog->ShadowSamplers |= 1 << inst->TexSrcUnit; + } + } + } + + _mesa_update_shader_textures_used(prog); +} + + +/** + * Check if the given vertex/fragment/shader program is within the + * resource limits of the context (number of texture units, etc). + * If any of those checks fail, record a linker error. + * + * XXX more checks are needed... + */ +static void +check_resources(const struct gl_context *ctx, + struct gl_shader_program *shader_program, + struct gl_program *prog) +{ + switch (prog->Target) { + case GL_VERTEX_PROGRAM_ARB: + if (_mesa_bitcount(prog->SamplersUsed) > + ctx->Const.MaxVertexTextureImageUnits) { + fail_link(shader_program, "Too many vertex shader texture samplers"); + } + if (prog->Parameters->NumParameters > MAX_UNIFORMS) { + fail_link(shader_program, "Too many vertex shader constants"); + } + break; + case MESA_GEOMETRY_PROGRAM: + if (_mesa_bitcount(prog->SamplersUsed) > + ctx->Const.MaxGeometryTextureImageUnits) { + fail_link(shader_program, "Too many geometry shader texture samplers"); + } + if (prog->Parameters->NumParameters > + MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { + fail_link(shader_program, "Too many geometry shader constants"); + } + break; + case GL_FRAGMENT_PROGRAM_ARB: + if (_mesa_bitcount(prog->SamplersUsed) > + ctx->Const.MaxTextureImageUnits) { + fail_link(shader_program, "Too many fragment shader texture samplers"); + } + if (prog->Parameters->NumParameters > MAX_UNIFORMS) { + fail_link(shader_program, "Too many fragment shader constants"); + } + break; + default: + _mesa_problem(ctx, "unexpected program type in check_resources()"); + } +} + + + +struct uniform_sort { + struct gl_uniform *u; + int pos; +}; + +/* The shader_program->Uniforms list is almost sorted in increasing + * uniform->{Frag,Vert}Pos locations, but not quite when there are + * uniforms shared between targets. We need to add parameters in + * increasing order for the targets. + */ +static int +sort_uniforms(const void *a, const void *b) +{ + struct uniform_sort *u1 = (struct uniform_sort *)a; + struct uniform_sort *u2 = (struct uniform_sort *)b; + + return u1->pos - u2->pos; +} + +/* Add the uniforms to the parameters. The linker chose locations + * in our parameters lists (which weren't created yet), which the + * uniforms code will use to poke values into our parameters list + * when uniforms are updated. + */ +static void +add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, + struct gl_shader *shader, + struct gl_program *prog) +{ + unsigned int i; + unsigned int next_sampler = 0, num_uniforms = 0; + struct uniform_sort *sorted_uniforms; + + sorted_uniforms = ralloc_array(NULL, struct uniform_sort, + shader_program->Uniforms->NumUniforms); + + for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) { + struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i; + int parameter_index = -1; + + switch (shader->Type) { + case GL_VERTEX_SHADER: + parameter_index = uniform->VertPos; + break; + case GL_FRAGMENT_SHADER: + parameter_index = uniform->FragPos; + break; + case GL_GEOMETRY_SHADER: + parameter_index = uniform->GeomPos; + break; + } + + /* Only add uniforms used in our target. */ + if (parameter_index != -1) { + sorted_uniforms[num_uniforms].pos = parameter_index; + sorted_uniforms[num_uniforms].u = uniform; + num_uniforms++; + } + } + + qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort), + sort_uniforms); + + for (i = 0; i < num_uniforms; i++) { + struct gl_uniform *uniform = sorted_uniforms[i].u; + int parameter_index = sorted_uniforms[i].pos; + const glsl_type *type = uniform->Type; + unsigned int size; + + if (type->is_vector() || + type->is_scalar()) { + size = type->vector_elements; + } else { + size = type_size(type) * 4; + } + + gl_register_file file; + if (type->is_sampler() || + (type->is_array() && type->fields.array->is_sampler())) { + file = PROGRAM_SAMPLER; + } else { + file = PROGRAM_UNIFORM; + } + + GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1, + uniform->Name); + + if (index < 0) { + index = _mesa_add_parameter(prog->Parameters, file, + uniform->Name, size, type->gl_type, + NULL, NULL, 0x0); + + /* Sampler uniform values are stored in prog->SamplerUnits, + * and the entry in that array is selected by this index we + * store in ParameterValues[]. + */ + if (file == PROGRAM_SAMPLER) { + for (unsigned int j = 0; j < size / 4; j++) + prog->Parameters->ParameterValues[index + j][0] = next_sampler++; + } + + /* The location chosen in the Parameters list here (returned + * from _mesa_add_uniform) has to match what the linker chose. + */ + if (index != parameter_index) { + fail_link(shader_program, "Allocation of uniform `%s' to target " + "failed (%d vs %d)\n", + uniform->Name, index, parameter_index); + } + } + } + + ralloc_free(sorted_uniforms); +} + +static void +set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, + struct gl_shader_program *shader_program, + const char *name, const glsl_type *type, + ir_constant *val) +{ + if (type->is_record()) { + ir_constant *field_constant; + + field_constant = (ir_constant *)val->components.get_head(); + + for (unsigned int i = 0; i < type->length; i++) { + const glsl_type *field_type = type->fields.structure[i].type; + const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, + type->fields.structure[i].name); + set_uniform_initializer(ctx, mem_ctx, shader_program, field_name, + field_type, field_constant); + field_constant = (ir_constant *)field_constant->next; + } + return; + } + + int loc = _mesa_get_uniform_location(ctx, shader_program, name); + + if (loc == -1) { + fail_link(shader_program, + "Couldn't find uniform for initializer %s\n", name); + return; + } + + for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) { + ir_constant *element; + const glsl_type *element_type; + if (type->is_array()) { + element = val->array_elements[i]; + element_type = type->fields.array; + } else { + element = val; + element_type = type; + } + + void *values; + + if (element_type->base_type == GLSL_TYPE_BOOL) { + int *conv = ralloc_array(mem_ctx, int, element_type->components()); + for (unsigned int j = 0; j < element_type->components(); j++) { + conv[j] = element->value.b[j]; + } + values = (void *)conv; + element_type = glsl_type::get_instance(GLSL_TYPE_INT, + element_type->vector_elements, + 1); + } else { + values = &element->value; + } + + if (element_type->is_matrix()) { + _mesa_uniform_matrix(ctx, shader_program, + element_type->matrix_columns, + element_type->vector_elements, + loc, 1, GL_FALSE, (GLfloat *)values); + loc += element_type->matrix_columns; + } else { + _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, + values, element_type->gl_type); + loc += type_size(element_type); + } + } +} + +static void +set_uniform_initializers(struct gl_context *ctx, + struct gl_shader_program *shader_program) +{ + void *mem_ctx = NULL; + + for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) { + struct gl_shader *shader = shader_program->_LinkedShaders[i]; + + if (shader == NULL) + continue; + + foreach_iter(exec_list_iterator, iter, *shader->ir) { + ir_instruction *ir = (ir_instruction *)iter.get(); + ir_variable *var = ir->as_variable(); + + if (!var || var->mode != ir_var_uniform || !var->constant_value) + continue; + + if (!mem_ctx) + mem_ctx = ralloc_context(NULL); + + set_uniform_initializer(ctx, mem_ctx, shader_program, var->name, + var->type, var->constant_value); + } + } + + ralloc_free(mem_ctx); +} + +/* Replaces all references to a temporary register index with another index. */ +void +glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) +{ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + unsigned j; + + for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY && + inst->src[j].index == index) { + inst->src[j].index = new_index; + } + } + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { + inst->dst.index = new_index; + } + } +} + +int +glsl_to_tgsi_visitor::get_first_temp_read(int index) +{ + int depth = 0; /* loop depth */ + int loop_start = -1; /* index of the first active BGNLOOP (if any) */ + unsigned i = 0, j; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY && + inst->src[j].index == index) { + return (depth == 0) ? i : loop_start; + } + } + + if (inst->op == OPCODE_BGNLOOP) { + if(depth++ == 0) + loop_start = i; + } else if (inst->op == OPCODE_ENDLOOP) { + if (--depth == 0) + loop_start = -1; + } + assert(depth >= 0); + + i++; + } + + return -1; +} + +int +glsl_to_tgsi_visitor::get_first_temp_write(int index) +{ + int depth = 0; /* loop depth */ + int loop_start = -1; /* index of the first active BGNLOOP (if any) */ + int i = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { + return (depth == 0) ? i : loop_start; + } + + if (inst->op == OPCODE_BGNLOOP) { + if(depth++ == 0) + loop_start = i; + } else if (inst->op == OPCODE_ENDLOOP) { + if (--depth == 0) + loop_start = -1; + } + assert(depth >= 0); + + i++; + } + + return -1; +} + +int +glsl_to_tgsi_visitor::get_last_temp_read(int index) +{ + int depth = 0; /* loop depth */ + int last = -1; /* index of last instruction that reads the temporary */ + unsigned i = 0, j; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY && + inst->src[j].index == index) { + last = (depth == 0) ? i : -2; + } + } + + if (inst->op == OPCODE_BGNLOOP) + depth++; + else if (inst->op == OPCODE_ENDLOOP) + if (--depth == 0 && last == -2) + last = i; + assert(depth >= 0); + + i++; + } + + assert(last >= -1); + return last; +} + +int +glsl_to_tgsi_visitor::get_last_temp_write(int index) +{ + int depth = 0; /* loop depth */ + int last = -1; /* index of last instruction that writes to the temporary */ + int i = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) + last = (depth == 0) ? i : -2; + + if (inst->op == OPCODE_BGNLOOP) + depth++; + else if (inst->op == OPCODE_ENDLOOP) + if (--depth == 0 && last == -2) + last = i; + assert(depth >= 0); + + i++; + } + + assert(last >= -1); + return last; +} + +/* + * On a basic block basis, tracks available PROGRAM_TEMPORARY register + * channels for copy propagation and updates following instructions to + * use the original versions. + * + * The glsl_to_tgsi_visitor lazily produces code assuming that this pass + * will occur. As an example, a TXP production before this pass: + * + * 0: MOV TEMP[1], INPUT[4].xyyy; + * 1: MOV TEMP[1].w, INPUT[4].wwww; + * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; + * + * and after: + * + * 0: MOV TEMP[1], INPUT[4].xyyy; + * 1: MOV TEMP[1].w, INPUT[4].wwww; + * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; + * + * which allows for dead code elimination on TEMP[1]'s writes. + */ +void +glsl_to_tgsi_visitor::copy_propagate(void) +{ + glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, + glsl_to_tgsi_instruction *, + this->next_temp * 4); + int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); + int level = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + assert(inst->dst.file != PROGRAM_TEMPORARY + || inst->dst.index < this->next_temp); + + /* First, do any copy propagation possible into the src regs. */ + for (int r = 0; r < 3; r++) { + glsl_to_tgsi_instruction *first = NULL; + bool good = true; + int acp_base = inst->src[r].index * 4; + + if (inst->src[r].file != PROGRAM_TEMPORARY || + inst->src[r].reladdr) + continue; + + /* See if we can find entries in the ACP consisting of MOVs + * from the same src register for all the swizzled channels + * of this src register reference. + */ + for (int i = 0; i < 4; i++) { + int src_chan = GET_SWZ(inst->src[r].swizzle, i); + glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; + + if (!copy_chan) { + good = false; + break; + } + + assert(acp_level[acp_base + src_chan] <= level); + + if (!first) { + first = copy_chan; + } else { + if (first->src[0].file != copy_chan->src[0].file || + first->src[0].index != copy_chan->src[0].index) { + good = false; + break; + } + } + } + + if (good) { + /* We've now validated that we can copy-propagate to + * replace this src register reference. Do it. + */ + inst->src[r].file = first->src[0].file; + inst->src[r].index = first->src[0].index; + + int swizzle = 0; + for (int i = 0; i < 4; i++) { + int src_chan = GET_SWZ(inst->src[r].swizzle, i); + glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; + swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << + (3 * i)); + } + inst->src[r].swizzle = swizzle; + } + } + + switch (inst->op) { + case OPCODE_BGNLOOP: + case OPCODE_ENDLOOP: + /* End of a basic block, clear the ACP entirely. */ + memset(acp, 0, sizeof(*acp) * this->next_temp * 4); + break; + + case OPCODE_IF: + ++level; + break; + + case OPCODE_ENDIF: + case OPCODE_ELSE: + /* Clear all channels written inside the block from the ACP, but + * leaving those that were not touched. + */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!acp[4 * r + c]) + continue; + + if (acp_level[4 * r + c] >= level) + acp[4 * r + c] = NULL; + } + } + if (inst->op == OPCODE_ENDIF) + --level; + break; + + default: + /* Continuing the block, clear any written channels from + * the ACP. + */ + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { + /* Any temporary might be written, so no copy propagation + * across this instruction. + */ + memset(acp, 0, sizeof(*acp) * this->next_temp * 4); + } else if (inst->dst.file == PROGRAM_OUTPUT && + inst->dst.reladdr) { + /* Any output might be written, so no copy propagation + * from outputs across this instruction. + */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!acp[4 * r + c]) + continue; + + if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) + acp[4 * r + c] = NULL; + } + } + } else if (inst->dst.file == PROGRAM_TEMPORARY || + inst->dst.file == PROGRAM_OUTPUT) { + /* Clear where it's used as dst. */ + if (inst->dst.file == PROGRAM_TEMPORARY) { + for (int c = 0; c < 4; c++) { + if (inst->dst.writemask & (1 << c)) { + acp[4 * inst->dst.index + c] = NULL; + } + } + } + + /* Clear where it's used as src. */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!acp[4 * r + c]) + continue; + + int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); + + if (acp[4 * r + c]->src[0].file == inst->dst.file && + acp[4 * r + c]->src[0].index == inst->dst.index && + inst->dst.writemask & (1 << src_chan)) + { + acp[4 * r + c] = NULL; + } + } + } + } + break; + } + + /* If this is a copy, add it to the ACP. */ + if (inst->op == OPCODE_MOV && + inst->dst.file == PROGRAM_TEMPORARY && + !inst->dst.reladdr && + !inst->saturate && + !inst->src[0].reladdr && + !inst->src[0].negate) { + for (int i = 0; i < 4; i++) { + if (inst->dst.writemask & (1 << i)) { + acp[4 * inst->dst.index + i] = inst; + acp_level[4 * inst->dst.index + i] = level; + } + } + } + } + + ralloc_free(acp_level); + ralloc_free(acp); +} + +/* + * Tracks available PROGRAM_TEMPORARY registers for dead code elimination. + * + * The glsl_to_tgsi_visitor lazily produces code assuming that this pass + * will occur. As an example, a TXP production after copy propagation but + * before this pass: + * + * 0: MOV TEMP[1], INPUT[4].xyyy; + * 1: MOV TEMP[1].w, INPUT[4].wwww; + * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; + * + * and after this pass: + * + * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; + * + * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB) + * FIXME: doesn't eliminate all dead code inside of loops; it steps around them + */ +void +glsl_to_tgsi_visitor::eliminate_dead_code(void) +{ + int i; + + for (i=0; i < this->next_temp; i++) { + int last_read = get_last_temp_read(i); + int j = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i && + j > last_read) + { + iter.remove(); + delete inst; + } + + j++; + } + } +} + +/* Merges temporary registers together where possible to reduce the number of + * registers needed to run a program. + * + * Produces optimal code only after copy propagation and dead code elimination + * have been run. */ +void +glsl_to_tgsi_visitor::merge_registers(void) +{ + int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); + int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); + int i, j; + + /* Read the indices of the last read and first write to each temp register + * into an array so that we don't have to traverse the instruction list as + * much. */ + for (i=0; i < this->next_temp; i++) { + last_reads[i] = get_last_temp_read(i); + first_writes[i] = get_first_temp_write(i); + } + + /* Start looking for registers with non-overlapping usages that can be + * merged together. */ + for (i=0; i < this->next_temp - 1; i++) { + /* Don't touch unused registers. */ + if (last_reads[i] < 0 || first_writes[i] < 0) continue; + + for (j=i+1; j < this->next_temp; j++) { + /* Don't touch unused registers. */ + if (last_reads[j] < 0 || first_writes[j] < 0) continue; + + /* We can merge the two registers if the first write to j is after or + * in the same instruction as the last read from i. Note that the + * register at index i will always be used earlier or at the same time + * as the register at index j. */ + assert(first_writes[i] <= first_writes[j]); + if (last_reads[i] <= first_writes[j]) { + rename_temp_register(j, i); /* Replace all references to j with i.*/ + + /* Update the first_writes and last_reads arrays with the new + * values for the merged register index, and mark the newly unused + * register index as such. */ + last_reads[i] = last_reads[j]; + first_writes[j] = -1; + last_reads[j] = -1; + } + } + } + + ralloc_free(last_reads); + ralloc_free(first_writes); +} + +/* Reassign indices to temporary registers by reusing unused indices created + * by optimization passes. */ +void +glsl_to_tgsi_visitor::renumber_registers(void) +{ + int i = 0; + int new_index = 0; + + for (i=0; i < this->next_temp; i++) { + if (get_first_temp_read(i) < 0) continue; + if (i != new_index) + rename_temp_register(i, new_index); + new_index++; + } + + this->next_temp = new_index; +} + +/* ------------------------- TGSI conversion stuff -------------------------- */ +struct label { + unsigned branch_target; + unsigned token; +}; + +/** + * Intermediate state used during shader translation. + */ +struct st_translate { + struct ureg_program *ureg; + + struct ureg_dst temps[MAX_PROGRAM_TEMPS]; + struct ureg_src *constants; + struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; + struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; + struct ureg_dst address[1]; + struct ureg_src samplers[PIPE_MAX_SAMPLERS]; + struct ureg_src systemValues[SYSTEM_VALUE_MAX]; + + /* Extra info for handling point size clamping in vertex shader */ + struct ureg_dst pointSizeResult; /**< Actual point size output register */ + struct ureg_src pointSizeConst; /**< Point size range constant register */ + GLint pointSizeOutIndex; /**< Temp point size output register */ + GLboolean prevInstWrotePointSize; + + const GLuint *inputMapping; + const GLuint *outputMapping; + + /* For every instruction that contains a label (eg CALL), keep + * details so that we can go back afterwards and emit the correct + * tgsi instruction number for each label. + */ + struct label *labels; + unsigned labels_size; + unsigned labels_count; + + /* Keep a record of the tgsi instruction number that each mesa + * instruction starts at, will be used to fix up labels after + * translation. + */ + unsigned *insn; + unsigned insn_size; + unsigned insn_count; + + unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ + + boolean error; +}; + +/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ +static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { + TGSI_SEMANTIC_FACE, + TGSI_SEMANTIC_INSTANCEID +}; + +/** + * Make note of a branch to a label in the TGSI code. + * After we've emitted all instructions, we'll go over the list + * of labels built here and patch the TGSI code with the actual + * location of each label. + */ +static unsigned *get_label( struct st_translate *t, + unsigned branch_target ) +{ + unsigned i; + + if (t->labels_count + 1 >= t->labels_size) { + t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); + t->labels = (struct label *)realloc(t->labels, + t->labels_size * sizeof t->labels[0]); + if (t->labels == NULL) { + static unsigned dummy; + t->error = TRUE; + return &dummy; + } + } + + i = t->labels_count++; + t->labels[i].branch_target = branch_target; + return &t->labels[i].token; +} + +/** + * Called prior to emitting the TGSI code for each Mesa instruction. + * Allocate additional space for instructions if needed. + * Update the insn[] array so the next Mesa instruction points to + * the next TGSI instruction. + */ +static void set_insn_start( struct st_translate *t, + unsigned start ) +{ + if (t->insn_count + 1 >= t->insn_size) { + t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); + t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof t->insn[0]); + if (t->insn == NULL) { + t->error = TRUE; + return; + } + } + + t->insn[t->insn_count++] = start; +} + +/** + * Map a Mesa dst register to a TGSI ureg_dst register. + */ +static struct ureg_dst +dst_register( struct st_translate *t, + gl_register_file file, + GLuint index ) +{ + switch( file ) { + case PROGRAM_UNDEFINED: + return ureg_dst_undef(); + + case PROGRAM_TEMPORARY: + if (ureg_dst_is_undef(t->temps[index])) + t->temps[index] = ureg_DECL_temporary( t->ureg ); + + return t->temps[index]; + + case PROGRAM_OUTPUT: + if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ) + t->prevInstWrotePointSize = GL_TRUE; + + if (t->procType == TGSI_PROCESSOR_VERTEX) + assert(index < VERT_RESULT_MAX); + else if (t->procType == TGSI_PROCESSOR_FRAGMENT) + assert(index < FRAG_RESULT_MAX); + else + assert(index < GEOM_RESULT_MAX); + + assert(t->outputMapping[index] < Elements(t->outputs)); + + return t->outputs[t->outputMapping[index]]; + + case PROGRAM_ADDRESS: + return t->address[index]; + + default: + debug_assert( 0 ); + return ureg_dst_undef(); + } +} + +/** + * Map a Mesa src register to a TGSI ureg_src register. + */ +static struct ureg_src +src_register( struct st_translate *t, + gl_register_file file, + GLuint index ) +{ + switch( file ) { + case PROGRAM_UNDEFINED: + return ureg_src_undef(); + + case PROGRAM_TEMPORARY: + assert(index >= 0); + assert(index < Elements(t->temps)); + if (ureg_dst_is_undef(t->temps[index])) + t->temps[index] = ureg_DECL_temporary( t->ureg ); + return ureg_src(t->temps[index]); + + case PROGRAM_NAMED_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_UNIFORM: + assert(index >= 0); + return t->constants[index]; + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: /* ie, immediate */ + if (index < 0) + return ureg_DECL_constant( t->ureg, 0 ); + else + return t->constants[index]; + + case PROGRAM_INPUT: + assert(t->inputMapping[index] < Elements(t->inputs)); + return t->inputs[t->inputMapping[index]]; + + case PROGRAM_OUTPUT: + assert(t->outputMapping[index] < Elements(t->outputs)); + return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ + + case PROGRAM_ADDRESS: + return ureg_src(t->address[index]); + + case PROGRAM_SYSTEM_VALUE: + assert(index < Elements(t->systemValues)); + return t->systemValues[index]; + + default: + debug_assert( 0 ); + return ureg_src_undef(); + } +} + +/** + * Create a TGSI ureg_dst register from a Mesa dest register. + */ +static struct ureg_dst +translate_dst( struct st_translate *t, + const st_dst_reg *dst_reg, //const struct prog_dst_register *DstReg, + boolean saturate ) +{ + struct ureg_dst dst = dst_register( t, + dst_reg->file, + dst_reg->index ); + + dst = ureg_writemask( dst, + dst_reg->writemask ); + + if (saturate) + dst = ureg_saturate( dst ); + + if (dst_reg->reladdr != NULL) + dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) ); + + return dst; +} + +/** + * Create a TGSI ureg_src register from a Mesa src register. + */ +static struct ureg_src +translate_src( struct st_translate *t, + const st_src_reg *src_reg ) +{ + struct ureg_src src = src_register( t, src_reg->file, src_reg->index ); + + src = ureg_swizzle( src, + GET_SWZ( src_reg->swizzle, 0 ) & 0x3, + GET_SWZ( src_reg->swizzle, 1 ) & 0x3, + GET_SWZ( src_reg->swizzle, 2 ) & 0x3, + GET_SWZ( src_reg->swizzle, 3 ) & 0x3); + + if ((src_reg->negate & 0xf) == NEGATE_XYZW) + src = ureg_negate(src); + +#if 0 + // src_reg currently does not have an equivalent to SrcReg->Abs in Mesa IR + if (src_reg->abs) + src = ureg_abs(src); +#endif + + if (src_reg->reladdr != NULL) { + /* Normally ureg_src_indirect() would be used here, but a stupid compiler + * bug in g++ makes ureg_src_indirect (an inline C function) erroneously + * set the bit for src.Negate. So we have to do the operation manually + * here to work around the compiler's problems. */ + /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/ + struct ureg_src addr = ureg_src(t->address[0]); + src.Indirect = 1; + src.IndirectFile = addr.File; + src.IndirectIndex = addr.Index; + src.IndirectSwizzle = addr.SwizzleX; + + if (src_reg->file != PROGRAM_INPUT && + src_reg->file != PROGRAM_OUTPUT) { + /* If src_reg->index was negative, it was set to zero in + * src_register(). Reassign it now. But don't do this + * for input/output regs since they get remapped while + * const buffers don't. + */ + src.Index = src_reg->index; + } + } + + return src; +} + +static void +compile_tgsi_instruction(struct st_translate *t, + const struct glsl_to_tgsi_instruction *inst) +{ + struct ureg_program *ureg = t->ureg; + GLuint i; + struct ureg_dst dst[1]; + struct ureg_src src[4]; + unsigned num_dst; + unsigned num_src; + + num_dst = _mesa_num_inst_dst_regs( inst->op ); + num_src = _mesa_num_inst_src_regs( inst->op ); + + if (num_dst) + dst[0] = translate_dst( t, + &inst->dst, + inst->saturate); // inst->SaturateMode + + for (i = 0; i < num_src; i++) + src[i] = translate_src( t, &inst->src[i] ); + + switch( inst->op ) { + case OPCODE_SWZ: + // TODO: copy emit_swz function from st_mesa_to_tgsi.c + //emit_swz( t, dst[0], &inst->src[0] ); + assert(!"OPCODE_SWZ"); + return; + + case OPCODE_BGNLOOP: + case OPCODE_CAL: + case OPCODE_ELSE: + case OPCODE_ENDLOOP: + case OPCODE_IF: + debug_assert(num_dst == 0); + ureg_label_insn( ureg, + translate_opcode( inst->op ), + src, num_src, + get_label( t, + inst->op == OPCODE_CAL ? inst->function->sig_id : 0 )); + return; + + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXD: + case OPCODE_TXL: + case OPCODE_TXP: + src[num_src++] = t->samplers[inst->sampler]; + ureg_tex_insn( ureg, + translate_opcode( inst->op ), + dst, num_dst, + translate_texture_target( inst->tex_target, + inst->tex_shadow ), + src, num_src ); + return; + + case OPCODE_SCS: + dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); + ureg_insn( ureg, + translate_opcode( inst->op ), + dst, num_dst, + src, num_src ); + break; + + case OPCODE_XPD: + dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); + ureg_insn( ureg, + translate_opcode( inst->op ), + dst, num_dst, + src, num_src ); + break; + + case OPCODE_NOISE1: + case OPCODE_NOISE2: + case OPCODE_NOISE3: + case OPCODE_NOISE4: + /* At some point, a motivated person could add a better + * implementation of noise. Currently not even the nvidia + * binary drivers do anything more than this. In any case, the + * place to do this is in the GL state tracker, not the poor + * driver. + */ + ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) ); + break; + + case OPCODE_DDY: + // TODO: copy emit_ddy() function from st_mesa_to_tgsi.c + assert(!"OPCODE_DDY"); + //emit_ddy( t, dst[0], &inst->src[0] ); + break; + + default: + ureg_insn( ureg, + translate_opcode( inst->op ), + dst, num_dst, + src, num_src ); + break; + } +} + +/** + * Emit the TGSI instructions to adjust the WPOS pixel center convention + * Basically, add (adjX, adjY) to the fragment position. + */ +static void +emit_adjusted_wpos( struct st_translate *t, + const struct gl_program *program, + GLfloat adjX, GLfloat adjY) +{ + struct ureg_program *ureg = t->ureg; + struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); + struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; + + /* Note that we bias X and Y and pass Z and W through unchanged. + * The shader might also use gl_FragCoord.w and .z. + */ + ureg_ADD(ureg, wpos_temp, wpos_input, + ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f)); + + t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); +} + + +/** + * Emit the TGSI instructions for inverting the WPOS y coordinate. + * This code is unavoidable because it also depends on whether + * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). + */ +static void +emit_wpos_inversion( struct st_translate *t, + const struct gl_program *program, + boolean invert) +{ + struct ureg_program *ureg = t->ureg; + + /* Fragment program uses fragment position input. + * Need to replace instances of INPUT[WPOS] with temp T + * where T = INPUT[WPOS] by y is inverted. + */ + static const gl_state_index wposTransformState[STATE_LENGTH] + = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, + (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; + + /* XXX: note we are modifying the incoming shader here! Need to + * do this before emitting the constant decls below, or this + * will be missed: + */ + unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, + wposTransformState); + + struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); + struct ureg_dst wpos_temp; + struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; + + /* MOV wpos_temp, input[wpos] + */ + if (wpos_input.File == TGSI_FILE_TEMPORARY) + wpos_temp = ureg_dst(wpos_input); + else { + wpos_temp = ureg_DECL_temporary( ureg ); + ureg_MOV( ureg, wpos_temp, wpos_input ); + } + + if (invert) { + /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy + */ + ureg_MAD( ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), + wpos_input, + ureg_scalar(wpostrans, 0), + ureg_scalar(wpostrans, 1)); + } else { + /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww + */ + ureg_MAD( ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), + wpos_input, + ureg_scalar(wpostrans, 2), + ureg_scalar(wpostrans, 3)); + } + + /* Use wpos_temp as position input from here on: + */ + t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); +} + + +/** + * Emit fragment position/ooordinate code. + */ +static void +emit_wpos(struct st_context *st, + struct st_translate *t, + const struct gl_program *program, + struct ureg_program *ureg) +{ + const struct gl_fragment_program *fp = + (const struct gl_fragment_program *) program; + struct pipe_screen *pscreen = st->pipe->screen; + boolean invert = FALSE; + + if (fp->OriginUpperLeft) { + /* Fragment shader wants origin in upper-left */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { + /* the driver supports upper-left origin */ + } + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { + /* the driver supports lower-left origin, need to invert Y */ + ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); + invert = TRUE; + } + else + assert(0); + } + else { + /* Fragment shader wants origin in lower-left */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) + /* the driver supports lower-left origin */ + ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) + /* the driver supports upper-left origin, need to invert Y */ + invert = TRUE; + else + assert(0); + } + + if (fp->PixelCenterInteger) { + /* Fragment shader wants pixel center integer */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) + /* the driver supports pixel center integer */ + ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) + /* the driver supports pixel center half integer, need to bias X,Y */ + emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f); + else + assert(0); + } + else { + /* Fragment shader wants pixel center half integer */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { + /* the driver supports pixel center half integer */ + } + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { + /* the driver supports pixel center integer, need to bias X,Y */ + ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); + emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f); + } + else + assert(0); + } + + /* we invert after adjustment so that we avoid the MOV to temporary, + * and reuse the adjustment ADD instead */ + emit_wpos_inversion(t, program, invert); +} + +/** + * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. + * \param program the program to translate + * \param numInputs number of input registers used + * \param inputMapping maps Mesa fragment program inputs to TGSI generic + * input indexes + * \param inputSemanticName the TGSI_SEMANTIC flag for each input + * \param inputSemanticIndex the semantic index (ex: which texcoord) for + * each input + * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input + * \param numOutputs number of output registers used + * \param outputMapping maps Mesa fragment program outputs to TGSI + * generic outputs + * \param outputSemanticName the TGSI_SEMANTIC flag for each output + * \param outputSemanticIndex the semantic index (ex: which texcoord) for + * each output + * + * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY + */ +extern "C" enum pipe_error +st_translate_program( + struct gl_context *ctx, + uint procType, + struct ureg_program *ureg, + glsl_to_tgsi_visitor *program, + const struct gl_program *proginfo, + GLuint numInputs, + const GLuint inputMapping[], + const ubyte inputSemanticName[], + const ubyte inputSemanticIndex[], + const GLuint interpMode[], + GLuint numOutputs, + const GLuint outputMapping[], + const ubyte outputSemanticName[], + const ubyte outputSemanticIndex[], + boolean passthrough_edgeflags ) +{ + struct st_translate translate, *t; + unsigned i; + enum pipe_error ret = PIPE_OK; + + assert(numInputs <= Elements(t->inputs)); + assert(numOutputs <= Elements(t->outputs)); + + t = &translate; + memset(t, 0, sizeof *t); + + t->procType = procType; + t->inputMapping = inputMapping; + t->outputMapping = outputMapping; + t->ureg = ureg; + t->pointSizeOutIndex = -1; + t->prevInstWrotePointSize = GL_FALSE; + + /*_mesa_print_program(program);*/ + + /* + * Declare input attributes. + */ + if (procType == TGSI_PROCESSOR_FRAGMENT) { + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_fs_input(ureg, + inputSemanticName[i], + inputSemanticIndex[i], + interpMode[i]); + } + + if (proginfo->InputsRead & FRAG_BIT_WPOS) { + /* Must do this after setting up t->inputs, and before + * emitting constant references, below: + */ + printf("FRAG_BIT_WPOS\n"); + emit_wpos(st_context(ctx), t, proginfo, ureg); + } + + if (proginfo->InputsRead & FRAG_BIT_FACE) { + // TODO: uncomment + printf("FRAG_BIT_FACE\n"); + //emit_face_var( t, program ); + } + + /* + * Declare output attributes. + */ + for (i = 0; i < numOutputs; i++) { + switch (outputSemanticName[i]) { + case TGSI_SEMANTIC_POSITION: + t->outputs[i] = ureg_DECL_output( ureg, + TGSI_SEMANTIC_POSITION, /* Z / Depth */ + outputSemanticIndex[i] ); + + t->outputs[i] = ureg_writemask( t->outputs[i], + TGSI_WRITEMASK_Z ); + break; + case TGSI_SEMANTIC_STENCIL: + t->outputs[i] = ureg_DECL_output( ureg, + TGSI_SEMANTIC_STENCIL, /* Stencil */ + outputSemanticIndex[i] ); + t->outputs[i] = ureg_writemask( t->outputs[i], + TGSI_WRITEMASK_Y ); + break; + case TGSI_SEMANTIC_COLOR: + t->outputs[i] = ureg_DECL_output( ureg, + TGSI_SEMANTIC_COLOR, + outputSemanticIndex[i] ); + break; + default: + debug_assert(0); + return PIPE_ERROR_BAD_INPUT; + } + } + } + else if (procType == TGSI_PROCESSOR_GEOMETRY) { + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_gs_input(ureg, + i, + inputSemanticName[i], + inputSemanticIndex[i]); + } + + for (i = 0; i < numOutputs; i++) { + t->outputs[i] = ureg_DECL_output( ureg, + outputSemanticName[i], + outputSemanticIndex[i] ); + } + } + else { + assert(procType == TGSI_PROCESSOR_VERTEX); + + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_vs_input(ureg, i); + } + + for (i = 0; i < numOutputs; i++) { + t->outputs[i] = ureg_DECL_output( ureg, + outputSemanticName[i], + outputSemanticIndex[i] ); + if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) { + /* Writing to the point size result register requires special + * handling to implement clamping. + */ + static const gl_state_index pointSizeClampState[STATE_LENGTH] + = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; + /* XXX: note we are modifying the incoming shader here! Need to + * do this before emitting the constant decls below, or this + * will be missed. + * XXX: depends on "Parameters" field specific to Mesa IR + */ + unsigned pointSizeClampConst = + _mesa_add_state_reference(proginfo->Parameters, + pointSizeClampState); + struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg ); + t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst ); + t->pointSizeResult = t->outputs[i]; + t->pointSizeOutIndex = i; + t->outputs[i] = psizregtemp; + } + } + /*if (passthrough_edgeflags) + emit_edgeflags( t, program ); */ // TODO: uncomment + } + + /* Declare address register. + */ + if (program->num_address_regs > 0) { + debug_assert( program->num_address_regs == 1 ); + t->address[0] = ureg_DECL_address( ureg ); + } + + /* Declare misc input registers + */ + { + GLbitfield sysInputs = proginfo->SystemValuesRead; + unsigned numSys = 0; + for (i = 0; sysInputs; i++) { + if (sysInputs & (1 << i)) { + unsigned semName = mesa_sysval_to_semantic[i]; + t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0); + numSys++; + sysInputs &= ~(1 << i); + } + } + } + + if (program->indirect_addr_temps) { + /* If temps are accessed with indirect addressing, declare temporaries + * in sequential order. Else, we declare them on demand elsewhere. + * (Note: the number of temporaries is equal to program->next_temp) + */ + for (i = 0; i < (unsigned)program->next_temp; i++) { + /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ + t->temps[i] = ureg_DECL_temporary( t->ureg ); + } + } + + /* Emit constants and immediates. Mesa uses a single index space + * for these, so we put all the translated regs in t->constants. + * XXX: this entire if block depends on proginfo->Parameters from Mesa IR + */ + if (proginfo->Parameters) { + t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] ); + if (t->constants == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + + for (i = 0; i < proginfo->Parameters->NumParameters; i++) { + switch (proginfo->Parameters->Parameters[i].Type) { + case PROGRAM_ENV_PARAM: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_UNIFORM: + t->constants[i] = ureg_DECL_constant( ureg, i ); + break; + + /* Emit immediates only when there's no indirect addressing of + * the const buffer. + * FIXME: Be smarter and recognize param arrays: + * indirect addressing is only valid within the referenced + * array. + */ + case PROGRAM_CONSTANT: + if (program->indirect_addr_consts) + t->constants[i] = ureg_DECL_constant( ureg, i ); + else + t->constants[i] = + ureg_DECL_immediate( ureg, + proginfo->Parameters->ParameterValues[i], + 4 ); + break; + default: + break; + } + } + } + + /* texture samplers */ + for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { + // XXX: depends on SamplersUsed property generated by conversion to Mesa IR + if (proginfo->SamplersUsed & (1 << i)) { + t->samplers[i] = ureg_DECL_sampler( ureg, i ); + } + } + + /* Emit each instruction in turn: + */ + foreach_iter(exec_list_iterator, iter, program->instructions) { + set_insn_start( t, ureg_get_instruction_number( ureg )); + compile_tgsi_instruction( t, (glsl_to_tgsi_instruction *)iter.get() ); + + if (t->prevInstWrotePointSize && proginfo->Id) { + /* The previous instruction wrote to the (fake) vertex point size + * result register. Now we need to clamp that value to the min/max + * point size range, putting the result into the real point size + * register. + * Note that we can't do this easily at the end of program due to + * possible early return. + */ + set_insn_start( t, ureg_get_instruction_number( ureg )); + ureg_MAX( t->ureg, + ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), + ureg_src(t->outputs[t->pointSizeOutIndex]), + ureg_swizzle(t->pointSizeConst, 1,1,1,1)); + ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), + ureg_src(t->outputs[t->pointSizeOutIndex]), + ureg_swizzle(t->pointSizeConst, 2,2,2,2)); + } + t->prevInstWrotePointSize = GL_FALSE; + } + + /* Fix up all emitted labels: + */ + for (i = 0; i < t->labels_count; i++) { + ureg_fixup_label( ureg, + t->labels[i].token, + t->insn[t->labels[i].branch_target] ); + } + +out: + FREE(t->insn); + FREE(t->labels); + FREE(t->constants); + + if (t->error) { + debug_printf("%s: translate error flag set\n", __FUNCTION__); + } + + return ret; +} +/* ----------------------------- End TGSI code ------------------------------ */ + +/** + * Convert a shader's GLSL IR into both a Mesa gl_program and a TGSI shader. + */ +static struct gl_program * +get_mesa_program(struct gl_context *ctx, + struct gl_shader_program *shader_program, + struct gl_shader *shader) +{ + glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); + struct prog_instruction *mesa_instructions, *mesa_inst; + ir_instruction **mesa_instruction_annotation; + int i; + struct gl_program *prog; + GLenum target; + const char *target_string; + GLboolean progress; + struct gl_shader_compiler_options *options = + &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; + + switch (shader->Type) { + case GL_VERTEX_SHADER: + target = GL_VERTEX_PROGRAM_ARB; + target_string = "vertex"; + break; + case GL_FRAGMENT_SHADER: + target = GL_FRAGMENT_PROGRAM_ARB; + target_string = "fragment"; + break; + case GL_GEOMETRY_SHADER: + target = GL_GEOMETRY_PROGRAM_NV; + target_string = "geometry"; + break; + default: + assert(!"should not be reached"); + return NULL; + } + + validate_ir_tree(shader->ir); + + prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); + if (!prog) + return NULL; + prog->Parameters = _mesa_new_parameter_list(); + prog->Varying = _mesa_new_parameter_list(); + prog->Attributes = _mesa_new_parameter_list(); + v->ctx = ctx; + v->prog = prog; + v->shader_program = shader_program; + v->options = options; + + add_uniforms_to_parameters_list(shader_program, shader, prog); + + /* Emit Mesa IR for main(). */ + visit_exec_list(shader->ir, v); + v->emit(NULL, OPCODE_END); + + /* Now emit bodies for any functions that were used. */ + do { + progress = GL_FALSE; + + foreach_iter(exec_list_iterator, iter, v->function_signatures) { + function_entry *entry = (function_entry *)iter.get(); + + if (!entry->bgn_inst) { + v->current_function = entry; + + entry->bgn_inst = v->emit(NULL, OPCODE_BGNSUB); + entry->bgn_inst->function = entry; + + visit_exec_list(&entry->sig->body, v); + + glsl_to_tgsi_instruction *last; + last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); + if (last->op != OPCODE_RET) + v->emit(NULL, OPCODE_RET); + + glsl_to_tgsi_instruction *end; + end = v->emit(NULL, OPCODE_ENDSUB); + end->function = entry; + + progress = GL_TRUE; + } + } + } while (progress); + +#if 0 + /* Print out some information (for debugging purposes) used by the + * optimization passes. */ + for (i=0; i < v->next_temp; i++) { + int fr = v->get_first_temp_read(i); + int fw = v->get_first_temp_write(i); + int lr = v->get_last_temp_read(i); + int lw = v->get_last_temp_write(i); + + printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); + assert(fw <= fr); + } +#endif + + /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ + v->copy_propagate(); + v->eliminate_dead_code(); + v->merge_registers(); + v->renumber_registers(); + + prog->NumTemporaries = v->next_temp; + + int num_instructions = 0; + foreach_iter(exec_list_iterator, iter, v->instructions) { + num_instructions++; + } + + mesa_instructions = + (struct prog_instruction *)calloc(num_instructions, + sizeof(*mesa_instructions)); + mesa_instruction_annotation = ralloc_array(v->mem_ctx, ir_instruction *, + num_instructions); + + /* Convert glsl_to_tgsi_instructions into Mesa IR prog_instructions. + * TODO: remove + */ + mesa_inst = mesa_instructions; + i = 0; + foreach_iter(exec_list_iterator, iter, v->instructions) { + const glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + mesa_inst->Opcode = inst->op; + mesa_inst->CondUpdate = inst->cond_update; + if (inst->saturate) + mesa_inst->SaturateMode = SATURATE_ZERO_ONE; + mesa_inst->DstReg.File = inst->dst.file; + mesa_inst->DstReg.Index = inst->dst.index; + mesa_inst->DstReg.CondMask = inst->dst.cond_mask; + mesa_inst->DstReg.WriteMask = inst->dst.writemask; + mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL; + mesa_inst->SrcReg[0] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[0]); + mesa_inst->SrcReg[1] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[1]); + mesa_inst->SrcReg[2] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[2]); + mesa_inst->TexSrcUnit = inst->sampler; + mesa_inst->TexSrcTarget = inst->tex_target; + mesa_inst->TexShadow = inst->tex_shadow; + mesa_instruction_annotation[i] = inst->ir; + + /* Set IndirectRegisterFiles. */ + if (mesa_inst->DstReg.RelAddr) + prog->IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File; + + /* Update program's bitmask of indirectly accessed register files */ + for (unsigned src = 0; src < 3; src++) + if (mesa_inst->SrcReg[src].RelAddr) + prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; + + if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) { + fail_link(shader_program, "Couldn't flatten if statement\n"); + } + + switch (mesa_inst->Opcode) { + case OPCODE_BGNSUB: + inst->function->inst = i; + mesa_inst->Comment = strdup(inst->function->sig->function_name()); + break; + case OPCODE_ENDSUB: + mesa_inst->Comment = strdup(inst->function->sig->function_name()); + break; + case OPCODE_CAL: + mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */ + break; + case OPCODE_ARL: + prog->NumAddressRegs = 1; + break; + default: + break; + } + + mesa_inst++; + i++; + + if (!shader_program->LinkStatus) + break; + } + + if (!shader_program->LinkStatus) { + free(mesa_instructions); + _mesa_reference_program(ctx, &shader->Program, NULL); + return NULL; + } + + set_branchtargets(v, mesa_instructions, num_instructions); + + if (ctx->Shader.Flags & GLSL_DUMP) { + printf("\n"); + printf("GLSL IR for linked %s program %d:\n", target_string, + shader_program->Name); + _mesa_print_ir(shader->ir, NULL); + printf("\n"); + printf("\n"); + printf("Mesa IR for linked %s program %d:\n", target_string, + shader_program->Name); + print_program(mesa_instructions, mesa_instruction_annotation, + num_instructions); + } + + prog->Instructions = mesa_instructions; + prog->NumInstructions = num_instructions; + + do_set_program_inouts(shader->ir, prog); + count_resources(prog); + + check_resources(ctx, shader_program, prog); + + _mesa_reference_program(ctx, &shader->Program, prog); + + if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) { + _mesa_optimize_program(ctx, prog); + } + + struct st_vertex_program *stvp; + struct st_fragment_program *stfp; + struct st_geometry_program *stgp; + + switch (shader->Type) { + case GL_VERTEX_SHADER: + stvp = (struct st_vertex_program *)prog; + stvp->glsl_to_tgsi = v; + break; + case GL_FRAGMENT_SHADER: + stfp = (struct st_fragment_program *)prog; + stfp->glsl_to_tgsi = v; + break; + case GL_GEOMETRY_SHADER: + stgp = (struct st_geometry_program *)prog; + stgp->glsl_to_tgsi = v; + break; + default: + assert(!"should not be reached"); + return NULL; + } + + return prog; +} + +extern "C" { + +struct gl_shader * +st_new_shader(struct gl_context *ctx, GLuint name, GLuint type) +{ + struct gl_shader *shader; + assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER || + type == GL_GEOMETRY_SHADER_ARB); + shader = rzalloc(NULL, struct gl_shader); + if (shader) { + shader->Type = type; + shader->Name = name; + _mesa_init_shader(ctx, shader); + } + return shader; +} + +struct gl_shader_program * +st_new_shader_program(struct gl_context *ctx, GLuint name) +{ + struct gl_shader_program *shProg; + shProg = rzalloc(NULL, struct gl_shader_program); + if (shProg) { + shProg->Name = name; + _mesa_init_shader_program(ctx, shProg); + } + return shProg; +} + +/** + * Link a shader. + * Called via ctx->Driver.LinkShader() + * This actually involves converting GLSL IR into Mesa gl_programs with + * code lowering and other optimizations. + */ +GLboolean +st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) +{ + assert(prog->LinkStatus); + + for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + bool progress; + exec_list *ir = prog->_LinkedShaders[i]->ir; + const struct gl_shader_compiler_options *options = + &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; + + do { + progress = false; + + /* Lowering */ + do_mat_op_to_vec(ir); + lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 + | LOG_TO_LOG2 + | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); + + progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; + + progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress; + + progress = lower_quadop_vector(ir, true) || progress; + + if (options->EmitNoIfs) { + progress = lower_discard(ir) || progress; + progress = lower_if_to_cond_assign(ir) || progress; + } + + if (options->EmitNoNoise) + progress = lower_noise(ir) || progress; + + /* If there are forms of indirect addressing that the driver + * cannot handle, perform the lowering pass. + */ + if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput + || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) + progress = + lower_variable_index_to_cond_assign(ir, + options->EmitNoIndirectInput, + options->EmitNoIndirectOutput, + options->EmitNoIndirectTemp, + options->EmitNoIndirectUniform) + || progress; + + progress = do_vec_index_to_cond_assign(ir) || progress; + } while (progress); + + validate_ir_tree(ir); + } + + for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { + struct gl_program *linked_prog; + + if (prog->_LinkedShaders[i] == NULL) + continue; + + linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); + + if (linked_prog) { + bool ok = true; + + switch (prog->_LinkedShaders[i]->Type) { + case GL_VERTEX_SHADER: + _mesa_reference_vertprog(ctx, &prog->VertexProgram, + (struct gl_vertex_program *)linked_prog); + ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, + linked_prog); + break; + case GL_FRAGMENT_SHADER: + _mesa_reference_fragprog(ctx, &prog->FragmentProgram, + (struct gl_fragment_program *)linked_prog); + ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB, + linked_prog); + break; + case GL_GEOMETRY_SHADER: + _mesa_reference_geomprog(ctx, &prog->GeometryProgram, + (struct gl_geometry_program *)linked_prog); + ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV, + linked_prog); + break; + } + if (!ok) { + return GL_FALSE; + } + } + + _mesa_reference_program(ctx, &linked_prog, NULL); + } + + return GL_TRUE; +} + + +/** + * Link a GLSL shader program. Called via glLinkProgram(). + */ +void +st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) +{ + unsigned int i; + + _mesa_clear_shader_program_data(ctx, prog); + + prog->LinkStatus = GL_TRUE; + + for (i = 0; i < prog->NumShaders; i++) { + if (!prog->Shaders[i]->CompileStatus) { + fail_link(prog, "linking with uncompiled shader"); + prog->LinkStatus = GL_FALSE; + } + } + + prog->Varying = _mesa_new_parameter_list(); + _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL); + _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL); + _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL); + + if (prog->LinkStatus) { + link_shaders(ctx, prog); + } + + if (prog->LinkStatus) { + if (!ctx->Driver.LinkShader(ctx, prog)) { + prog->LinkStatus = GL_FALSE; + } + } + + set_uniform_initializers(ctx, prog); + + if (ctx->Shader.Flags & GLSL_DUMP) { + if (!prog->LinkStatus) { + printf("GLSL shader program %d failed to link\n", prog->Name); + } + + if (prog->InfoLog && prog->InfoLog[0] != 0) { + printf("GLSL shader program %d info log:\n", prog->Name); + printf("%s\n", prog->InfoLog); + } + } +} + +} /* extern "C" */ diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h new file mode 100644 index 00000000000..e21c0d1e0af --- /dev/null +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h @@ -0,0 +1,66 @@ +/* + * Copyright © 2010 Intel Corporation + * Copyright © 2011 Bryan Cain + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "main/glheader.h" +#include "tgsi/tgsi_ureg.h" + +struct gl_context; +struct gl_shader; +struct gl_shader_program; +struct glsl_to_tgsi_visitor; + +enum pipe_error st_translate_program( + struct gl_context *ctx, + uint procType, + struct ureg_program *ureg, + struct glsl_to_tgsi_visitor *program, + const struct gl_program *proginfo, + GLuint numInputs, + const GLuint inputMapping[], + const ubyte inputSemanticName[], + const ubyte inputSemanticIndex[], + const GLuint interpMode[], + GLuint numOutputs, + const GLuint outputMapping[], + const ubyte outputSemanticName[], + const ubyte outputSemanticIndex[], + boolean passthrough_edgeflags); + +void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v); + +struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type); + +struct gl_shader_program * +st_new_shader_program(struct gl_context *ctx, GLuint name); + +void st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog); +GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog); + +#ifdef __cplusplus +} +#endif diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index a41e5b16a85..75842286ba8 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -267,7 +267,7 @@ src_register( struct st_translate *t, /** * Map mesa texture target to TGSI texture target. */ -static unsigned +unsigned translate_texture_target( GLuint textarget, GLboolean shadow ) { @@ -511,7 +511,7 @@ static void emit_ddy( struct st_translate *t, -static unsigned +unsigned translate_opcode( unsigned op ) { switch( op ) { diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h index 0615e52ef62..0dbdf5f6159 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.h +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h @@ -64,6 +64,12 @@ st_translate_mesa_program( void st_free_tokens(const struct tgsi_token *tokens); +unsigned +translate_opcode(unsigned op); + +unsigned +translate_texture_target(GLuint textarget, GLboolean shadow); + #if defined __cplusplus } /* extern "C" */ diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 7a6d33d3fea..dd618424d66 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -174,8 +174,8 @@ st_release_gp_variants(struct st_context *st, struct st_geometry_program *stgp) * \param tokensOut destination for TGSI tokens * \return pointer to cached pipe_shader object. */ -static void -st_prepare_vertex_program(struct st_context *st, +void +st_prepare_vertex_program(struct gl_context *ctx, struct st_vertex_program *stvp) { GLuint attr; @@ -184,7 +184,7 @@ st_prepare_vertex_program(struct st_context *st, stvp->num_outputs = 0; if (stvp->Base.IsPositionInvariant) - _mesa_insert_mvp_code(st->ctx, &stvp->Base); + _mesa_insert_mvp_code(ctx, &stvp->Base); assert(stvp->Base.Base.NumInstructions > 1); @@ -292,7 +292,7 @@ st_translate_vertex_program(struct st_context *st, enum pipe_error error; unsigned num_outputs; - st_prepare_vertex_program( st, stvp ); + st_prepare_vertex_program(st->ctx, stvp); _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT); _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING); @@ -318,22 +318,41 @@ st_translate_vertex_program(struct st_context *st, debug_printf("\n"); } - error = st_translate_mesa_program(st->ctx, - TGSI_PROCESSOR_VERTEX, - ureg, - &stvp->Base.Base, - /* inputs */ - vpv->num_inputs, - stvp->input_to_index, - NULL, /* input semantic name */ - NULL, /* input semantic index */ - NULL, - /* outputs */ - num_outputs, - stvp->result_to_output, - stvp->output_semantic_name, - stvp->output_semantic_index, - key->passthrough_edgeflags ); + if (stvp->glsl_to_tgsi) + error = st_translate_program(st->ctx, + TGSI_PROCESSOR_VERTEX, + ureg, + stvp->glsl_to_tgsi, + &stvp->Base.Base, + /* inputs */ + stvp->num_inputs, + stvp->input_to_index, + NULL, /* input semantic name */ + NULL, /* input semantic index */ + NULL, /* interp mode */ + /* outputs */ + stvp->num_outputs, + stvp->result_to_output, + stvp->output_semantic_name, + stvp->output_semantic_index, + key->passthrough_edgeflags ); + else + error = st_translate_mesa_program(st->ctx, + TGSI_PROCESSOR_VERTEX, + ureg, + &stvp->Base.Base, + /* inputs */ + vpv->num_inputs, + stvp->input_to_index, + NULL, /* input semantic name */ + NULL, /* input semantic index */ + NULL, + /* outputs */ + num_outputs, + stvp->result_to_output, + stvp->output_semantic_name, + stvp->output_semantic_index, + key->passthrough_edgeflags ); if (error) goto fail; @@ -393,6 +412,151 @@ st_get_vp_variant(struct st_context *st, return vpv; } +/** + * Translate Mesa fragment shader attributes to TGSI attributes. + * \return GL_TRUE if color output should be written to all render targets, + * GL_FALSE if not + */ +GLboolean +st_prepare_fragment_program(struct gl_context *ctx, + struct st_fragment_program *stfp) +{ + GLuint attr; + const GLbitfield inputsRead = stfp->Base.Base.InputsRead; + GLboolean write_all = GL_FALSE; + + /* + * Convert Mesa program inputs to TGSI input register semantics. + */ + for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) { + if (inputsRead & (1 << attr)) { + const GLuint slot = stfp->num_inputs++; + + stfp->input_to_index[attr] = slot; + + switch (attr) { + case FRAG_ATTRIB_WPOS: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; + stfp->input_semantic_index[slot] = 0; + stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; + break; + case FRAG_ATTRIB_COL0: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + stfp->input_semantic_index[slot] = 0; + stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; + break; + case FRAG_ATTRIB_COL1: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + stfp->input_semantic_index[slot] = 1; + stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; + break; + case FRAG_ATTRIB_FOGC: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FOG; + stfp->input_semantic_index[slot] = 0; + stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; + break; + case FRAG_ATTRIB_FACE: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FACE; + stfp->input_semantic_index[slot] = 0; + stfp->interp_mode[slot] = TGSI_INTERPOLATE_CONSTANT; + break; + /* In most cases, there is nothing special about these + * inputs, so adopt a convention to use the generic + * semantic name and the mesa FRAG_ATTRIB_ number as the + * index. + * + * All that is required is that the vertex shader labels + * its own outputs similarly, and that the vertex shader + * generates at least every output required by the + * fragment shader plus fixed-function hardware (such as + * BFC). + * + * There is no requirement that semantic indexes start at + * zero or be restricted to a particular range -- nobody + * should be building tables based on semantic index. + */ + case FRAG_ATTRIB_PNTC: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + case FRAG_ATTRIB_VAR0: + default: + /* Actually, let's try and zero-base this just for + * readability of the generated TGSI. + */ + assert(attr >= FRAG_ATTRIB_TEX0); + stfp->input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0); + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + if (attr == FRAG_ATTRIB_PNTC) + stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; + else + stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; + break; + } + } + else { + stfp->input_to_index[attr] = -1; + } + } + + /* + * Semantics and mapping for outputs + */ + { + uint numColors = 0; + GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten; + + /* if z is written, emit that first */ + if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_POSITION; + stfp->output_semantic_index[stfp->num_outputs] = 0; + stfp->result_to_output[FRAG_RESULT_DEPTH] = stfp->num_outputs; + stfp->num_outputs++; + outputsWritten &= ~(1 << FRAG_RESULT_DEPTH); + } + + if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { + stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_STENCIL; + stfp->output_semantic_index[stfp->num_outputs] = 0; + stfp->result_to_output[FRAG_RESULT_STENCIL] = stfp->num_outputs; + stfp->num_outputs++; + outputsWritten &= ~(1 << FRAG_RESULT_STENCIL); + } + + /* handle remaning outputs (color) */ + for (attr = 0; attr < FRAG_RESULT_MAX; attr++) { + if (outputsWritten & BITFIELD64_BIT(attr)) { + switch (attr) { + case FRAG_RESULT_DEPTH: + case FRAG_RESULT_STENCIL: + /* handled above */ + assert(0); + break; + case FRAG_RESULT_COLOR: + write_all = GL_TRUE; /* fallthrough */ + default: + assert(attr == FRAG_RESULT_COLOR || + (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX)); + stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_COLOR; + stfp->output_semantic_index[stfp->num_outputs] = numColors; + stfp->result_to_output[attr] = stfp->num_outputs; + numColors++; + break; + } + + stfp->num_outputs++; + } + } + } + + return write_all; +} + /** * Translate a Mesa fragment shader into a TGSI shader using extra info in @@ -445,155 +609,12 @@ st_translate_fragment_program(struct st_context *st, if (!stfp->tgsi.tokens) { /* need to translate Mesa instructions to TGSI now */ - GLuint outputMapping[FRAG_RESULT_MAX]; - GLuint inputMapping[FRAG_ATTRIB_MAX]; - GLuint interpMode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */ - GLuint attr; enum pipe_error error; - const GLbitfield inputsRead = stfp->Base.Base.InputsRead; struct ureg_program *ureg; - GLboolean write_all = GL_FALSE; - - ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; - ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; - uint fs_num_inputs = 0; - - ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; - ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; - uint fs_num_outputs = 0; - - + GLboolean write_all = st_prepare_fragment_program(st->ctx, stfp); + _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT); - /* - * Convert Mesa program inputs to TGSI input register semantics. - */ - for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) { - if (inputsRead & (1 << attr)) { - const GLuint slot = fs_num_inputs++; - - inputMapping[attr] = slot; - - switch (attr) { - case FRAG_ATTRIB_WPOS: - input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_LINEAR; - break; - case FRAG_ATTRIB_COL0: - input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_LINEAR; - break; - case FRAG_ATTRIB_COL1: - input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - input_semantic_index[slot] = 1; - interpMode[slot] = TGSI_INTERPOLATE_LINEAR; - break; - case FRAG_ATTRIB_FOGC: - input_semantic_name[slot] = TGSI_SEMANTIC_FOG; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; - break; - case FRAG_ATTRIB_FACE: - input_semantic_name[slot] = TGSI_SEMANTIC_FACE; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_CONSTANT; - break; - /* In most cases, there is nothing special about these - * inputs, so adopt a convention to use the generic - * semantic name and the mesa FRAG_ATTRIB_ number as the - * index. - * - * All that is required is that the vertex shader labels - * its own outputs similarly, and that the vertex shader - * generates at least every output required by the - * fragment shader plus fixed-function hardware (such as - * BFC). - * - * There is no requirement that semantic indexes start at - * zero or be restricted to a particular range -- nobody - * should be building tables based on semantic index. - */ - case FRAG_ATTRIB_PNTC: - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - case FRAG_ATTRIB_VAR0: - default: - /* Actually, let's try and zero-base this just for - * readability of the generated TGSI. - */ - assert(attr >= FRAG_ATTRIB_TEX0); - input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0); - input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - if (attr == FRAG_ATTRIB_PNTC) - interpMode[slot] = TGSI_INTERPOLATE_LINEAR; - else - interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; - break; - } - } - else { - inputMapping[attr] = -1; - } - } - - /* - * Semantics and mapping for outputs - */ - { - uint numColors = 0; - GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten; - - /* if z is written, emit that first */ - if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { - fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION; - fs_output_semantic_index[fs_num_outputs] = 0; - outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs; - fs_num_outputs++; - outputsWritten &= ~(1 << FRAG_RESULT_DEPTH); - } - - if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { - fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL; - fs_output_semantic_index[fs_num_outputs] = 0; - outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs; - fs_num_outputs++; - outputsWritten &= ~(1 << FRAG_RESULT_STENCIL); - } - - /* handle remaning outputs (color) */ - for (attr = 0; attr < FRAG_RESULT_MAX; attr++) { - if (outputsWritten & BITFIELD64_BIT(attr)) { - switch (attr) { - case FRAG_RESULT_DEPTH: - case FRAG_RESULT_STENCIL: - /* handled above */ - assert(0); - break; - case FRAG_RESULT_COLOR: - write_all = GL_TRUE; /* fallthrough */ - default: - assert(attr == FRAG_RESULT_COLOR || - (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX)); - fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR; - fs_output_semantic_index[fs_num_outputs] = numColors; - outputMapping[attr] = fs_num_outputs; - numColors++; - break; - } - - fs_num_outputs++; - } - } - } - ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT ); if (ureg == NULL) return NULL; @@ -606,21 +627,39 @@ st_translate_fragment_program(struct st_context *st, if (write_all == GL_TRUE) ureg_property_fs_color0_writes_all_cbufs(ureg, 1); - error = st_translate_mesa_program(st->ctx, - TGSI_PROCESSOR_FRAGMENT, - ureg, - &stfp->Base.Base, - /* inputs */ - fs_num_inputs, - inputMapping, - input_semantic_name, - input_semantic_index, - interpMode, - /* outputs */ - fs_num_outputs, - outputMapping, - fs_output_semantic_name, - fs_output_semantic_index, FALSE ); + if (stfp->glsl_to_tgsi) + error = st_translate_program(st->ctx, + TGSI_PROCESSOR_FRAGMENT, + ureg, + stfp->glsl_to_tgsi, + &stfp->Base.Base, + /* inputs */ + stfp->num_inputs, + stfp->input_to_index, + stfp->input_semantic_name, + stfp->input_semantic_index, + stfp->interp_mode, + /* outputs */ + stfp->num_outputs, + stfp->result_to_output, + stfp->output_semantic_name, + stfp->output_semantic_index, FALSE ); + else + error = st_translate_mesa_program(st->ctx, + TGSI_PROCESSOR_FRAGMENT, + ureg, + &stfp->Base.Base, + /* inputs */ + stfp->num_inputs, + stfp->input_to_index, + stfp->input_semantic_name, + stfp->input_semantic_index, + stfp->interp_mode, + /* outputs */ + stfp->num_outputs, + stfp->result_to_output, + stfp->output_semantic_name, + stfp->output_semantic_index, FALSE ); stfp->tgsi.tokens = ureg_get_tokens( ureg, NULL ); ureg_destroy( ureg ); diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index c4244df939e..67723de6d53 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -38,6 +38,7 @@ #include "program/program.h" #include "pipe/p_state.h" #include "st_context.h" +#include "st_glsl_to_tgsi.h" /** Fragment program variant key */ @@ -83,6 +84,22 @@ struct st_fp_variant struct st_fragment_program { struct gl_fragment_program Base; + struct glsl_to_tgsi_visitor* glsl_to_tgsi; + + /** maps a Mesa FRAG_ATTRIB_x to a packed TGSI input index */ + GLuint input_to_index[FRAG_ATTRIB_MAX]; + /** maps a TGSI input index back to a Mesa FRAG_ATTRIB_x */ + GLuint index_to_input[PIPE_MAX_SHADER_INPUTS]; + ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; + ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; + GLuint num_inputs; + GLuint interp_mode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */ + + /** Maps FRAG_RESULT_x to slot */ + GLuint result_to_output[FRAG_RESULT_MAX]; + ubyte output_semantic_name[FRAG_RESULT_MAX]; + ubyte output_semantic_index[FRAG_RESULT_MAX]; + GLuint num_outputs; struct pipe_shader_state tgsi; @@ -136,6 +153,7 @@ struct st_vp_variant struct st_vertex_program { struct gl_vertex_program Base; /**< The Mesa vertex program */ + struct glsl_to_tgsi_visitor* glsl_to_tgsi; /** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */ GLuint input_to_index[VERT_ATTRIB_MAX]; @@ -184,6 +202,7 @@ struct st_gp_variant struct st_geometry_program { struct gl_geometry_program Base; /**< The Mesa geometry program */ + struct glsl_to_tgsi_visitor* glsl_to_tgsi; /** map GP input back to VP output */ GLuint input_map[PIPE_MAX_SHADER_INPUTS]; @@ -276,6 +295,14 @@ st_get_gp_variant(struct st_context *st, const struct st_gp_variant_key *key); +extern void +st_prepare_vertex_program(struct gl_context *ctx, + struct st_vertex_program *stvp); + +extern GLboolean +st_prepare_fragment_program(struct gl_context *ctx, + struct st_fragment_program *stfp); + extern void st_release_vp_variants( struct st_context *st, -- cgit v1.2.3 From 1e5fd8e480b661c1ab748c2ded587650ea7f3d20 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 29 Apr 2011 19:00:24 -0500 Subject: mesa: fix segfault when no Mesa IR is generated --- src/mesa/program/program.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index 78efca9f122..224446a2683 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -388,8 +388,9 @@ _mesa_delete_program(struct gl_context *ctx, struct gl_program *prog) if (prog->String) free(prog->String); - _mesa_free_instructions(prog->Instructions, prog->NumInstructions); - + if (prog->Instructions) { + _mesa_free_instructions(prog->Instructions, prog->NumInstructions); + } if (prog->Parameters) { _mesa_free_parameter_list(prog->Parameters); } -- cgit v1.2.3 From 44867da3543ca54ef245695cef72a6e305451d93 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 29 Apr 2011 19:24:57 -0500 Subject: glsl_to_tgsi: stop generating Mesa IR Before, it was still generating unused Mesa IR as a remnant of ir_to_mesa, and depended on some of the information from it. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 290 +++-------------------------- src/mesa/state_tracker/st_program.c | 13 +- 2 files changed, 33 insertions(+), 270 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index e1102503ee0..c562abc96c9 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -254,8 +254,9 @@ public: struct gl_shader_compiler_options *options; int next_temp; - + int num_address_regs; + int samplers_used; bool indirect_addr_temps; bool indirect_addr_consts; @@ -2310,170 +2311,23 @@ extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) delete v; } -static struct prog_src_register -mesa_st_src_reg_from_ir_st_src_reg(st_src_reg reg) -{ - struct prog_src_register mesa_reg; - - mesa_reg.File = reg.file; - assert(reg.index < (1 << INST_INDEX_BITS)); - mesa_reg.Index = reg.index; - mesa_reg.Swizzle = reg.swizzle; - mesa_reg.RelAddr = reg.reladdr != NULL; - mesa_reg.Negate = reg.negate; - mesa_reg.Abs = 0; - mesa_reg.HasIndex2 = GL_FALSE; - mesa_reg.RelAddr2 = 0; - mesa_reg.Index2 = 0; - - return mesa_reg; -} - -static void -set_branchtargets(glsl_to_tgsi_visitor *v, - struct prog_instruction *mesa_instructions, - int num_instructions) -{ - int if_count = 0, loop_count = 0; - int *if_stack, *loop_stack; - int if_stack_pos = 0, loop_stack_pos = 0; - int i, j; - - for (i = 0; i < num_instructions; i++) { - switch (mesa_instructions[i].Opcode) { - case OPCODE_IF: - if_count++; - break; - case OPCODE_BGNLOOP: - loop_count++; - break; - case OPCODE_BRK: - case OPCODE_CONT: - mesa_instructions[i].BranchTarget = -1; - break; - default: - break; - } - } - - if_stack = rzalloc_array(v->mem_ctx, int, if_count); - loop_stack = rzalloc_array(v->mem_ctx, int, loop_count); - - for (i = 0; i < num_instructions; i++) { - switch (mesa_instructions[i].Opcode) { - case OPCODE_IF: - if_stack[if_stack_pos] = i; - if_stack_pos++; - break; - case OPCODE_ELSE: - mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; - if_stack[if_stack_pos - 1] = i; - break; - case OPCODE_ENDIF: - mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; - if_stack_pos--; - break; - case OPCODE_BGNLOOP: - loop_stack[loop_stack_pos] = i; - loop_stack_pos++; - break; - case OPCODE_ENDLOOP: - loop_stack_pos--; - /* Rewrite any breaks/conts at this nesting level (haven't - * already had a BranchTarget assigned) to point to the end - * of the loop. - */ - for (j = loop_stack[loop_stack_pos]; j < i; j++) { - if (mesa_instructions[j].Opcode == OPCODE_BRK || - mesa_instructions[j].Opcode == OPCODE_CONT) { - if (mesa_instructions[j].BranchTarget == -1) { - mesa_instructions[j].BranchTarget = i; - } - } - } - /* The loop ends point at each other. */ - mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos]; - mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i; - break; - case OPCODE_CAL: - foreach_iter(exec_list_iterator, iter, v->function_signatures) { - function_entry *entry = (function_entry *)iter.get(); - - if (entry->sig_id == mesa_instructions[i].BranchTarget) { - mesa_instructions[i].BranchTarget = entry->inst; - break; - } - } - break; - default: - break; - } - } -} - -static void -print_program(struct prog_instruction *mesa_instructions, - ir_instruction **mesa_instruction_annotation, - int num_instructions) -{ - /*ir_instruction *last_ir = NULL;*/ - int i; - int indent = 0; - - for (i = 0; i < num_instructions; i++) { - struct prog_instruction *mesa_inst = mesa_instructions + i; - - fprintf(stdout, "%3d: ", i); - -#if 0 -/* Disable this for now, since printing GLSL IR along with its corresponding - * Mesa IR makes the Mesa IR unreadable. */ - ir_instruction *ir = mesa_instruction_annotation[i]; - if (last_ir != ir && ir) { - int j; - - for (j = 0; j < indent; j++) { - fprintf(stdout, " "); - } - ir->print(); - printf("\n"); - last_ir = ir; - - fprintf(stdout, " "); /* line number spacing. */ - } -#endif - - indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent, - PROG_PRINT_DEBUG, NULL); - } -} - /** * Count resources used by the given gpu program (number of texture * samplers, etc). */ static void -count_resources(struct gl_program *prog) +count_resources(glsl_to_tgsi_visitor *v) { - unsigned int i; + v->samplers_used = 0; - prog->SamplersUsed = 0; - - for (i = 0; i < prog->NumInstructions; i++) { - struct prog_instruction *inst = &prog->Instructions[i]; + foreach_iter(exec_list_iterator, iter, v->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - if (_mesa_is_tex_instruction(inst->Opcode)) { - prog->SamplerTargets[inst->TexSrcUnit] = - (gl_texture_index)inst->TexSrcTarget; - prog->SamplersUsed |= 1 << inst->TexSrcUnit; - if (inst->TexShadow) { - prog->ShadowSamplers |= 1 << inst->TexSrcUnit; - } + if (_mesa_is_tex_instruction(inst->op)) { + v->samplers_used |= 1 << inst->sampler; } } - - _mesa_update_shader_textures_used(prog); } @@ -2487,34 +2341,35 @@ count_resources(struct gl_program *prog) static void check_resources(const struct gl_context *ctx, struct gl_shader_program *shader_program, - struct gl_program *prog) + glsl_to_tgsi_visitor *prog, + struct gl_program *proginfo) { - switch (prog->Target) { + switch (proginfo->Target) { case GL_VERTEX_PROGRAM_ARB: - if (_mesa_bitcount(prog->SamplersUsed) > + if (_mesa_bitcount(prog->samplers_used) > ctx->Const.MaxVertexTextureImageUnits) { fail_link(shader_program, "Too many vertex shader texture samplers"); } - if (prog->Parameters->NumParameters > MAX_UNIFORMS) { + if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { fail_link(shader_program, "Too many vertex shader constants"); } break; case MESA_GEOMETRY_PROGRAM: - if (_mesa_bitcount(prog->SamplersUsed) > + if (_mesa_bitcount(prog->samplers_used) > ctx->Const.MaxGeometryTextureImageUnits) { fail_link(shader_program, "Too many geometry shader texture samplers"); } - if (prog->Parameters->NumParameters > + if (proginfo->Parameters->NumParameters > MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { fail_link(shader_program, "Too many geometry shader constants"); } break; case GL_FRAGMENT_PROGRAM_ARB: - if (_mesa_bitcount(prog->SamplersUsed) > + if (_mesa_bitcount(prog->samplers_used) > ctx->Const.MaxTextureImageUnits) { fail_link(shader_program, "Too many fragment shader texture samplers"); } - if (prog->Parameters->NumParameters > MAX_UNIFORMS) { + if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { fail_link(shader_program, "Too many fragment shader constants"); } break; @@ -3767,8 +3622,6 @@ st_translate_program( t->pointSizeOutIndex = -1; t->prevInstWrotePointSize = GL_FALSE; - /*_mesa_print_program(program);*/ - /* * Declare input attributes. */ @@ -3952,8 +3805,7 @@ st_translate_program( /* texture samplers */ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { - // XXX: depends on SamplersUsed property generated by conversion to Mesa IR - if (proginfo->SamplersUsed & (1 << i)) { + if (program->samplers_used & (1 << i)) { t->samplers[i] = ureg_DECL_sampler( ureg, i ); } } @@ -4006,7 +3858,8 @@ out: /* ----------------------------- End TGSI code ------------------------------ */ /** - * Convert a shader's GLSL IR into both a Mesa gl_program and a TGSI shader. + * Convert a shader's GLSL IR into a Mesa gl_program, although without + * generating Mesa IR. */ static struct gl_program * get_mesa_program(struct gl_context *ctx, @@ -4014,9 +3867,6 @@ get_mesa_program(struct gl_context *ctx, struct gl_shader *shader) { glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); - struct prog_instruction *mesa_instructions, *mesa_inst; - ir_instruction **mesa_instruction_annotation; - int i; struct gl_program *prog; GLenum target; const char *target_string; @@ -4110,90 +3960,6 @@ get_mesa_program(struct gl_context *ctx, v->merge_registers(); v->renumber_registers(); - prog->NumTemporaries = v->next_temp; - - int num_instructions = 0; - foreach_iter(exec_list_iterator, iter, v->instructions) { - num_instructions++; - } - - mesa_instructions = - (struct prog_instruction *)calloc(num_instructions, - sizeof(*mesa_instructions)); - mesa_instruction_annotation = ralloc_array(v->mem_ctx, ir_instruction *, - num_instructions); - - /* Convert glsl_to_tgsi_instructions into Mesa IR prog_instructions. - * TODO: remove - */ - mesa_inst = mesa_instructions; - i = 0; - foreach_iter(exec_list_iterator, iter, v->instructions) { - const glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - - mesa_inst->Opcode = inst->op; - mesa_inst->CondUpdate = inst->cond_update; - if (inst->saturate) - mesa_inst->SaturateMode = SATURATE_ZERO_ONE; - mesa_inst->DstReg.File = inst->dst.file; - mesa_inst->DstReg.Index = inst->dst.index; - mesa_inst->DstReg.CondMask = inst->dst.cond_mask; - mesa_inst->DstReg.WriteMask = inst->dst.writemask; - mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL; - mesa_inst->SrcReg[0] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[0]); - mesa_inst->SrcReg[1] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[1]); - mesa_inst->SrcReg[2] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[2]); - mesa_inst->TexSrcUnit = inst->sampler; - mesa_inst->TexSrcTarget = inst->tex_target; - mesa_inst->TexShadow = inst->tex_shadow; - mesa_instruction_annotation[i] = inst->ir; - - /* Set IndirectRegisterFiles. */ - if (mesa_inst->DstReg.RelAddr) - prog->IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File; - - /* Update program's bitmask of indirectly accessed register files */ - for (unsigned src = 0; src < 3; src++) - if (mesa_inst->SrcReg[src].RelAddr) - prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; - - if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) { - fail_link(shader_program, "Couldn't flatten if statement\n"); - } - - switch (mesa_inst->Opcode) { - case OPCODE_BGNSUB: - inst->function->inst = i; - mesa_inst->Comment = strdup(inst->function->sig->function_name()); - break; - case OPCODE_ENDSUB: - mesa_inst->Comment = strdup(inst->function->sig->function_name()); - break; - case OPCODE_CAL: - mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */ - break; - case OPCODE_ARL: - prog->NumAddressRegs = 1; - break; - default: - break; - } - - mesa_inst++; - i++; - - if (!shader_program->LinkStatus) - break; - } - - if (!shader_program->LinkStatus) { - free(mesa_instructions); - _mesa_reference_program(ctx, &shader->Program, NULL); - return NULL; - } - - set_branchtargets(v, mesa_instructions, num_instructions); - if (ctx->Shader.Flags & GLSL_DUMP) { printf("\n"); printf("GLSL IR for linked %s program %d:\n", target_string, @@ -4201,25 +3967,17 @@ get_mesa_program(struct gl_context *ctx, _mesa_print_ir(shader->ir, NULL); printf("\n"); printf("\n"); - printf("Mesa IR for linked %s program %d:\n", target_string, - shader_program->Name); - print_program(mesa_instructions, mesa_instruction_annotation, - num_instructions); } - prog->Instructions = mesa_instructions; - prog->NumInstructions = num_instructions; + prog->Instructions = NULL; + prog->NumInstructions = 0; do_set_program_inouts(shader->ir, prog); - count_resources(prog); + count_resources(v); - check_resources(ctx, shader_program, prog); + check_resources(ctx, shader_program, v, prog); _mesa_reference_program(ctx, &shader->Program, prog); - - if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) { - _mesa_optimize_program(ctx, prog); - } struct st_vertex_program *stvp; struct st_fragment_program *stfp; diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index dd618424d66..6d395128295 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -186,7 +186,8 @@ st_prepare_vertex_program(struct gl_context *ctx, if (stvp->Base.IsPositionInvariant) _mesa_insert_mvp_code(ctx, &stvp->Base); - assert(stvp->Base.Base.NumInstructions > 1); + if (!stvp->glsl_to_tgsi) + assert(stvp->Base.Base.NumInstructions > 1); /* * Determine number of inputs, the mappings between VERT_ATTRIB_x @@ -294,8 +295,11 @@ st_translate_vertex_program(struct st_context *st, st_prepare_vertex_program(st->ctx, stvp); - _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT); - _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING); + if (!stvp->glsl_to_tgsi) + { + _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT); + _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING); + } ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); if (ureg == NULL) { @@ -613,7 +617,8 @@ st_translate_fragment_program(struct st_context *st, struct ureg_program *ureg; GLboolean write_all = st_prepare_fragment_program(st->ctx, stfp); - _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT); + if (!stfp->glsl_to_tgsi) + _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT); ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT ); if (ureg == NULL) -- cgit v1.2.3 From c341d3cfd0ddbabf6274212b7f0da1a25854a673 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sat, 30 Apr 2011 13:03:33 -0500 Subject: glsl_to_tgsi: remove reads to output registers Fixes a regression in 0 A.D. introduced by 809a11c77073e999fd47. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 91 ++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index c562abc96c9..5ea03b4424e 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -100,6 +100,15 @@ public: this->reladdr = NULL; } + st_src_reg(gl_register_file file, int index) + { + this->file = file; + this->index = index; + this->swizzle = SWIZZLE_XYZW; + this->negate = 0; + this->reladdr = NULL; + } + st_src_reg() { this->file = PROGRAM_UNDEFINED; @@ -346,6 +355,8 @@ public: bool process_move_condition(ir_rvalue *ir); + void remove_output_reads(gl_register_file type); + void rename_temp_register(int index, int new_index); int get_first_temp_read(int index); int get_first_temp_write(int index); @@ -2595,6 +2606,81 @@ set_uniform_initializers(struct gl_context *ctx, ralloc_free(mem_ctx); } +/* + * Scan/rewrite program to remove reads of custom (output) registers. + * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING + * (for vertex shaders). + * In GLSL shaders, varying vars can be read and written. + * On some hardware, trying to read an output register causes trouble. + * So, rewrite the program to use a temporary register in this case. + * + * Based on _mesa_remove_output_reads from programopt.c. + */ +void +glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) +{ + GLuint i; + GLint outputMap[VERT_RESULT_MAX]; + GLuint numVaryingReads = 0; + GLboolean usedTemps[MAX_PROGRAM_TEMPS]; + GLuint firstTemp = 0; + + _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, + usedTemps, MAX_PROGRAM_TEMPS); + + assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); + assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); + + for (i = 0; i < VERT_RESULT_MAX; i++) + outputMap[i] = -1; + + /* look for instructions which read from varying vars */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + const GLuint numSrc = _mesa_num_inst_src_regs(inst->op); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->src[j].file == type) { + /* replace the read with a temp reg */ + const GLuint var = inst->src[j].index; + if (outputMap[var] == -1) { + numVaryingReads++; + outputMap[var] = _mesa_find_free_register(usedTemps, + MAX_PROGRAM_TEMPS, + firstTemp); + firstTemp = outputMap[var] + 1; + } + inst->src[j].file = PROGRAM_TEMPORARY; + inst->src[j].index = outputMap[var]; + } + } + } + + if (numVaryingReads == 0) + return; /* nothing to be done */ + + /* look for instructions which write to the varying vars identified above */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) { + /* change inst to write to the temp reg, instead of the varying */ + inst->dst.file = PROGRAM_TEMPORARY; + inst->dst.index = outputMap[inst->dst.index]; + } + } + + /* insert new MOV instructions at the end */ + for (i = 0; i < VERT_RESULT_MAX; i++) { + if (outputMap[i] >= 0) { + /* MOV VAR[i], TEMP[tmp]; */ + st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i]); + st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW); + dst.index = i; + this->emit(NULL, OPCODE_MOV, dst, src); + } + } +} + /* Replaces all references to a temporary register index with another index. */ void glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) @@ -3954,6 +4040,11 @@ get_mesa_program(struct gl_context *ctx, } #endif + /* Remove reads to output registers, and to varyings in vertex shaders. */ + v->remove_output_reads(PROGRAM_OUTPUT); + if (target == GL_VERTEX_PROGRAM_ARB) + v->remove_output_reads(PROGRAM_VARYING); + /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ v->copy_propagate(); v->eliminate_dead_code(); -- cgit v1.2.3 From 556bd82ce1227a568d69dfa0c22841986267d39f Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sat, 30 Apr 2011 13:44:32 -0500 Subject: glsl_to_tgsi: remove a bad assertion It was triggered by Alien Arena. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 5ea03b4424e..aa63539e5e8 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3077,11 +3077,11 @@ glsl_to_tgsi_visitor::merge_registers(void) /* Start looking for registers with non-overlapping usages that can be * merged together. */ - for (i=0; i < this->next_temp - 1; i++) { + for (i=0; i < this->next_temp; i++) { /* Don't touch unused registers. */ if (last_reads[i] < 0 || first_writes[i] < 0) continue; - for (j=i+1; j < this->next_temp; j++) { + for (j=0; j < this->next_temp; j++) { /* Don't touch unused registers. */ if (last_reads[j] < 0 || first_writes[j] < 0) continue; @@ -3089,8 +3089,9 @@ glsl_to_tgsi_visitor::merge_registers(void) * in the same instruction as the last read from i. Note that the * register at index i will always be used earlier or at the same time * as the register at index j. */ - assert(first_writes[i] <= first_writes[j]); - if (last_reads[i] <= first_writes[j]) { + if (first_writes[i] <= first_writes[j] && + last_reads[i] <= first_writes[j]) + { rename_temp_register(j, i); /* Replace all references to j with i.*/ /* Update the first_writes and last_reads arrays with the new -- cgit v1.2.3 From 5768ed6429937940bd48f5de4f8383273952880a Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sat, 30 Apr 2011 21:17:38 -0500 Subject: glsl_to_tgsi: define the sampler objects used Fixes the Nexuiz title screen and the water in 0 A.D. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index aa63539e5e8..5f3f0ba295a 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -2328,7 +2328,7 @@ extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) * samplers, etc). */ static void -count_resources(glsl_to_tgsi_visitor *v) +count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) { v->samplers_used = 0; @@ -2337,8 +2337,17 @@ count_resources(glsl_to_tgsi_visitor *v) if (_mesa_is_tex_instruction(inst->op)) { v->samplers_used |= 1 << inst->sampler; + + prog->SamplerTargets[inst->sampler] = + (gl_texture_index)inst->tex_target; + if (inst->tex_shadow) { + prog->ShadowSamplers |= 1 << inst->sampler; + } } } + + prog->SamplersUsed = v->samplers_used; + _mesa_update_shader_textures_used(prog); } @@ -4065,7 +4074,7 @@ get_mesa_program(struct gl_context *ctx, prog->NumInstructions = 0; do_set_program_inouts(shader->ir, prog); - count_resources(v); + count_resources(v, prog); check_resources(ctx, shader_program, v, prog); -- cgit v1.2.3 From a6705aa5ca151278ed1e596b68a327afd1405b9e Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sat, 30 Apr 2011 23:17:11 -0500 Subject: glsl_to_tgsi: lower noise opcodes when converting from GLSL IR, not when generating TGSI --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 5f3f0ba295a..08c6a7b2dd3 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1275,12 +1275,13 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_noise: { - const enum prog_opcode opcode = - prog_opcode(OPCODE_NOISE1 - + (ir->operands[0]->type->vector_elements) - 1); - assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4)); - - emit(ir, opcode, result_dst, op[0]); + /* At some point, a motivated person could add a better + * implementation of noise. Currently not even the nvidia + * binary drivers do anything more than this. In any case, the + * place to do this is in the GL state tracker, not the poor + * driver. + */ + emit(ir, OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); break; } @@ -3484,13 +3485,7 @@ compile_tgsi_instruction(struct st_translate *t, case OPCODE_NOISE2: case OPCODE_NOISE3: case OPCODE_NOISE4: - /* At some point, a motivated person could add a better - * implementation of noise. Currently not even the nvidia - * binary drivers do anything more than this. In any case, the - * place to do this is in the GL state tracker, not the poor - * driver. - */ - ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) ); + assert(!"OPCODE_NOISE should have been lowered\n"); break; case OPCODE_DDY: -- cgit v1.2.3 From 3b0858f1aed83e2d90449f042d625c86ac7b93ed Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sun, 1 May 2011 11:55:03 -0500 Subject: glsl_to_tgsi: support DDY (ir_unop_dFdy) --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 08c6a7b2dd3..eed9bb0819e 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1271,6 +1271,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit(ir, OPCODE_DDX, result_dst, op[0]); break; case ir_unop_dFdy: + op[0].negate = ~op[0].negate; emit(ir, OPCODE_DDY, result_dst, op[0]); break; @@ -3487,12 +3488,6 @@ compile_tgsi_instruction(struct st_translate *t, case OPCODE_NOISE4: assert(!"OPCODE_NOISE should have been lowered\n"); break; - - case OPCODE_DDY: - // TODO: copy emit_ddy() function from st_mesa_to_tgsi.c - assert(!"OPCODE_DDY"); - //emit_ddy( t, dst[0], &inst->src[0] ); - break; default: ureg_insn( ureg, -- cgit v1.2.3 From 56dc2c176c3ef0d4d5abea54ff4035b062262286 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sun, 1 May 2011 21:49:21 -0500 Subject: glsl_to_tgsi: use TGSI opcodes when converting from GLSL IR Before, the translator used Mesa IR opcodes (a holdover from ir_to_mesa) and converted them to TGSI opcodes during TGSI emission. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 550 ++++++++++++----------------- 1 file changed, 217 insertions(+), 333 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index eed9bb0819e..4cb2f377e98 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -27,7 +27,7 @@ /** * \file glsl_to_tgsi.cpp * - * Translate GLSL IR to Mesa's gl_program representation and to TGSI. + * Translate GLSL IR to TGSI. */ #include @@ -63,11 +63,12 @@ extern "C" { #include "pipe/p_state.h" #include "util/u_math.h" #include "tgsi/tgsi_ureg.h" -#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_info.h" #include "st_context.h" #include "st_program.h" #include "st_glsl_to_tgsi.h" #include "st_mesa_to_tgsi.h" +} #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ (1 << PROGRAM_ENV_PARAM) | \ @@ -75,7 +76,6 @@ extern "C" { (1 << PROGRAM_NAMED_PARAM) | \ (1 << PROGRAM_CONSTANT) | \ (1 << PROGRAM_UNIFORM)) -} class st_src_reg; class st_dst_reg; @@ -83,8 +83,7 @@ class st_dst_reg; static int swizzle_for_size(int size); /** - * This struct is a corresponding struct to Mesa prog_src_register, with - * wider fields. + * This struct is a corresponding struct to TGSI ureg_src. */ class st_src_reg { public: @@ -190,7 +189,7 @@ public: return node; } - enum prog_opcode op; + unsigned op; st_dst_reg dst; st_src_reg src[3]; /** Pointer to the ir source this tree came from for debugging */ @@ -201,7 +200,7 @@ public: int tex_target; /**< One of TEXTURE_*_INDEX */ GLboolean tex_shadow; - class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */ + class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ }; class variable_storage : public exec_node { @@ -317,15 +316,15 @@ public: /** List of glsl_to_tgsi_instruction */ exec_list instructions; - glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op); + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op); - glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0); - glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1); - glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1, st_src_reg src2); @@ -338,13 +337,13 @@ public: st_src_reg src1, unsigned elements); - void emit_scalar(ir_instruction *ir, enum prog_opcode op, + void emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0); - void emit_scalar(ir_instruction *ir, enum prog_opcode op, + void emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1); - void emit_scs(ir_instruction *ir, enum prog_opcode op, + void emit_scs(ir_instruction *ir, unsigned op, st_dst_reg dst, const st_src_reg &src); GLboolean try_emit_mad(ir_expression *ir, @@ -405,8 +404,29 @@ swizzle_for_size(int size) return size_swizzles[size - 1]; } +static bool +is_tex_instruction(unsigned opcode) +{ + const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); + return info->is_tex; +} + +static unsigned +num_inst_dst_regs(unsigned opcode) +{ + const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); + return info->num_dst; +} + +static unsigned +num_inst_src_regs(unsigned opcode) +{ + const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); + return info->is_tex ? info->num_src - 1 : info->num_src; +} + glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1, st_src_reg src2) { @@ -427,7 +447,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, reladdr_to_temp(ir, &src0, &num_reladdr); if (dst.reladdr) { - emit(ir, OPCODE_ARL, address_reg, *dst.reladdr); + emit(ir, TGSI_OPCODE_ARL, address_reg, *dst.reladdr); num_reladdr--; } assert(num_reladdr == 0); @@ -441,7 +461,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, inst->function = NULL; - if (op == OPCODE_ARL) + if (op == TGSI_OPCODE_ARL) this->num_address_regs = 1; /* Update indirect addressing status used by TGSI */ @@ -491,14 +511,14 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1) { return emit(ir, op, dst, src0, src1, undef_src); } glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0) { assert(dst.writemask != 0); @@ -506,7 +526,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, } glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op) +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) { return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); } @@ -516,30 +536,30 @@ glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, st_dst_reg dst, st_src_reg src0, st_src_reg src1, unsigned elements) { - static const gl_inst_opcode dot_opcodes[] = { - OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 + static const unsigned dot_opcodes[] = { + TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 }; emit(ir, dot_opcodes[elements - 2], dst, src0, src1); } /** - * Emits Mesa scalar opcodes to produce unique answers across channels. + * Emits TGSI scalar opcodes to produce unique answers across channels. * - * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X + * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X * channel determines the result across all channels. So to do a vec4 * of this operation, we want to emit a scalar per source channel used * to produce dest channels. */ void -glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, +glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg orig_src0, st_src_reg orig_src1) { int i, j; int done_mask = ~dst.writemask; - /* Mesa RCP is a scalar operation splatting results to all channels, + /* TGSI RCP is a scalar operation splatting results to all channels, * like ARB_fp/vp. So emit as many RCPs as necessary to cover our * dst channels. */ @@ -577,7 +597,7 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, } void -glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, +glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0) { st_src_reg undef = undef_src; @@ -588,21 +608,21 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, } /** - * Emit an OPCODE_SCS instruction + * Emit an TGSI_OPCODE_SCS instruction * - * The \c SCS opcode functions a bit differently than the other Mesa (or - * ARB_fragment_program) opcodes. Instead of splatting its result across all - * four components of the destination, it writes one value to the \c x - * component and another value to the \c y component. + * The \c SCS opcode functions a bit differently than the other TGSI opcodes. + * Instead of splatting its result across all four components of the + * destination, it writes one value to the \c x component and another value to + * the \c y component. * * \param ir IR instruction being processed - * \param op Either \c OPCODE_SIN or \c OPCODE_COS depending on which - * value is desired. + * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending + * on which value is desired. * \param dst Destination register * \param src Source register */ void -glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, +glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, st_dst_reg dst, const st_src_reg &src) { @@ -613,12 +633,12 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, return; } - const unsigned component = (op == OPCODE_SIN) ? 0 : 1; + const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1; const unsigned scs_mask = (1U << component); int done_mask = ~dst.writemask; st_src_reg tmp; - assert(op == OPCODE_SIN || op == OPCODE_COS); + assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS); /* If there are compnents in the destination that differ from the component * that will be written by the SCS instrution, we'll need a temporary. @@ -661,7 +681,7 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, /* Emit the SCS instruction. */ - inst = emit(ir, OPCODE_SCS, tmp_dst, src0); + inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0); inst->dst.writemask = scs_mask; /* Move the result of the SCS instruction to the desired location in @@ -669,12 +689,12 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, */ tmp.swizzle = MAKE_SWIZZLE4(component, component, component, component); - inst = emit(ir, OPCODE_SCS, dst, tmp); + inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp); inst->dst.writemask = this_mask; } else { /* Emit the SCS instruction to write directly to the destination. */ - glsl_to_tgsi_instruction *inst = emit(ir, OPCODE_SCS, dst, src0); + glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0); inst->dst.writemask = scs_mask; } @@ -870,7 +890,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) } else { st_src_reg src(PROGRAM_STATE_VAR, index, NULL); src.swizzle = slots[i].swizzle; - emit(ir, OPCODE_MOV, dst, src); + emit(ir, TGSI_OPCODE_MOV, dst, src); /* even a float takes up a whole vec4 reg in a struct/array. */ dst.index++; } @@ -903,7 +923,7 @@ glsl_to_tgsi_visitor::visit(ir_loop *ir) delete a; } - emit(NULL, OPCODE_BGNLOOP); + emit(NULL, TGSI_OPCODE_BGNLOOP); if (ir->to) { ir_expression *e = @@ -936,7 +956,7 @@ glsl_to_tgsi_visitor::visit(ir_loop *ir) delete e; } - emit(NULL, OPCODE_ENDLOOP); + emit(NULL, TGSI_OPCODE_ENDLOOP); } void @@ -944,10 +964,10 @@ glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) { switch (ir->mode) { case ir_loop_jump::jump_break: - emit(NULL, OPCODE_BRK); + emit(NULL, TGSI_OPCODE_BRK); break; case ir_loop_jump::jump_continue: - emit(NULL, OPCODE_CONT); + emit(NULL, TGSI_OPCODE_CONT); break; } } @@ -1000,7 +1020,7 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) c = this->result; this->result = get_temp(ir->type); - emit(ir, OPCODE_MAD, st_dst_reg(this->result), a, b, c); + emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, c); return true; } @@ -1023,7 +1043,7 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) this->result = get_temp(ir->type); glsl_to_tgsi_instruction *inst; - inst = emit(ir, OPCODE_MOV, st_dst_reg(this->result), src); + inst = emit(ir, TGSI_OPCODE_MOV, st_dst_reg(this->result), src); inst->saturate = true; return true; @@ -1036,135 +1056,18 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, if (!reg->reladdr) return; - emit(ir, OPCODE_ARL, address_reg, *reg->reladdr); + emit(ir, TGSI_OPCODE_ARL, address_reg, *reg->reladdr); if (*num_reladdr != 1) { st_src_reg temp = get_temp(glsl_type::vec4_type); - emit(ir, OPCODE_MOV, st_dst_reg(temp), *reg); + emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); *reg = temp; } (*num_reladdr)--; } -void -glsl_to_tgsi_visitor::emit_swz(ir_expression *ir) -{ - /* Assume that the vector operator is in a form compatible with OPCODE_SWZ. - * This means that each of the operands is either an immediate value of -1, - * 0, or 1, or is a component from one source register (possibly with - * negation). - */ - uint8_t components[4] = { 0 }; - bool negate[4] = { false }; - ir_variable *var = NULL; - - for (unsigned i = 0; i < ir->type->vector_elements; i++) { - ir_rvalue *op = ir->operands[i]; - - assert(op->type->is_scalar()); - - while (op != NULL) { - switch (op->ir_type) { - case ir_type_constant: { - - assert(op->type->is_scalar()); - - const ir_constant *const c = op->as_constant(); - if (c->is_one()) { - components[i] = SWIZZLE_ONE; - } else if (c->is_zero()) { - components[i] = SWIZZLE_ZERO; - } else if (c->is_negative_one()) { - components[i] = SWIZZLE_ONE; - negate[i] = true; - } else { - assert(!"SWZ constant must be 0.0 or 1.0."); - } - - op = NULL; - break; - } - - case ir_type_dereference_variable: { - ir_dereference_variable *const deref = - (ir_dereference_variable *) op; - - assert((var == NULL) || (deref->var == var)); - components[i] = SWIZZLE_X; - var = deref->var; - op = NULL; - break; - } - - case ir_type_expression: { - ir_expression *const expr = (ir_expression *) op; - - assert(expr->operation == ir_unop_neg); - negate[i] = true; - - op = expr->operands[0]; - break; - } - - case ir_type_swizzle: { - ir_swizzle *const swiz = (ir_swizzle *) op; - - components[i] = swiz->mask.x; - op = swiz->val; - break; - } - - default: - assert(!"Should not get here."); - return; - } - } - } - - assert(var != NULL); - - ir_dereference_variable *const deref = - new(mem_ctx) ir_dereference_variable(var); - - this->result.file = PROGRAM_UNDEFINED; - deref->accept(this); - if (this->result.file == PROGRAM_UNDEFINED) { - ir_print_visitor v; - printf("Failed to get tree for expression operand:\n"); - deref->accept(&v); - exit(1); - } - - st_src_reg src; - - src = this->result; - src.swizzle = MAKE_SWIZZLE4(components[0], - components[1], - components[2], - components[3]); - src.negate = ((unsigned(negate[0]) << 0) - | (unsigned(negate[1]) << 1) - | (unsigned(negate[2]) << 2) - | (unsigned(negate[3]) << 3)); - - /* Storage for our result. Ideally for an assignment we'd be using the - * actual storage for the result here, instead. - */ - const st_src_reg result_src = get_temp(ir->type); - st_dst_reg result_dst = st_dst_reg(result_src); - - /* Limit writes to the channels that will be used by result_src later. - * This does limit this temp's use as a temporary for multi-instruction - * sequences. - */ - result_dst.writemask = (1 << ir->type->vector_elements) - 1; - - emit(ir, OPCODE_SWZ, result_dst, src); - this->result = result_src; -} - void glsl_to_tgsi_visitor::visit(ir_expression *ir) { @@ -1173,7 +1076,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) st_src_reg result_src; st_dst_reg result_dst; - /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c) + /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) */ if (ir->operation == ir_binop_add) { if (try_emit_mad(ir, 1)) @@ -1184,10 +1087,8 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) if (try_emit_sat(ir)) return; - if (ir->operation == ir_quadop_vector) { - this->emit_swz(ir); - return; - } + if (ir->operation == ir_quadop_vector) + assert(!"ir_quadop_vector should have been lowered"); for (operand = 0; operand < ir->get_num_operands(); operand++) { this->result.file = PROGRAM_UNDEFINED; @@ -1228,51 +1129,51 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: - emit(ir, OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0)); break; case ir_unop_neg: op[0].negate = ~op[0].negate; result_src = op[0]; break; case ir_unop_abs: - emit(ir, OPCODE_ABS, result_dst, op[0]); + emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); break; case ir_unop_sign: - emit(ir, OPCODE_SSG, result_dst, op[0]); + emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]); break; case ir_unop_rcp: - emit_scalar(ir, OPCODE_RCP, result_dst, op[0]); + emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); break; case ir_unop_exp2: - emit_scalar(ir, OPCODE_EX2, result_dst, op[0]); + emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); break; case ir_unop_exp: case ir_unop_log: assert(!"not reached: should be handled by ir_explog_to_explog2"); break; case ir_unop_log2: - emit_scalar(ir, OPCODE_LG2, result_dst, op[0]); + emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); break; case ir_unop_sin: - emit_scalar(ir, OPCODE_SIN, result_dst, op[0]); + emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]); break; case ir_unop_cos: - emit_scalar(ir, OPCODE_COS, result_dst, op[0]); + emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); break; case ir_unop_sin_reduced: - emit_scs(ir, OPCODE_SIN, result_dst, op[0]); + emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]); break; case ir_unop_cos_reduced: - emit_scs(ir, OPCODE_COS, result_dst, op[0]); + emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); break; case ir_unop_dFdx: - emit(ir, OPCODE_DDX, result_dst, op[0]); + emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]); break; case ir_unop_dFdy: op[0].negate = ~op[0].negate; - emit(ir, OPCODE_DDY, result_dst, op[0]); + emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]); break; case ir_unop_noise: { @@ -1282,19 +1183,19 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) * place to do this is in the GL state tracker, not the poor * driver. */ - emit(ir, OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); + emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); break; } case ir_binop_add: - emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); break; case ir_binop_sub: - emit(ir, OPCODE_SUB, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); break; case ir_binop_mul: - emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); break; case ir_binop_div: assert(!"not reached: should be handled by ir_div_to_mul_rcp"); @@ -1303,33 +1204,33 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_binop_less: - emit(ir, OPCODE_SLT, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); break; case ir_binop_greater: - emit(ir, OPCODE_SGT, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]); break; case ir_binop_lequal: - emit(ir, OPCODE_SLE, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]); break; case ir_binop_gequal: - emit(ir, OPCODE_SGE, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); break; case ir_binop_equal: - emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); break; case ir_binop_nequal: - emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); break; case ir_binop_all_equal: /* "==" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { st_src_reg temp = get_temp(glsl_type::vec4_type); - emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); emit_dp(ir, result_dst, temp, temp, vector_elements); - emit(ir, OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0)); } else { - emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); } break; case ir_binop_any_nequal: @@ -1337,11 +1238,11 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { st_src_reg temp = get_temp(glsl_type::vec4_type); - emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); emit_dp(ir, result_dst, temp, temp, vector_elements); - emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); } else { - emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); } break; @@ -1349,22 +1250,22 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) assert(ir->operands[0]->type->is_vector()); emit_dp(ir, result_dst, op[0], op[0], ir->operands[0]->type->vector_elements); - emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); break; case ir_binop_logic_xor: - emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); break; case ir_binop_logic_or: /* This could be a saturated add and skip the SNE. */ - emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); - emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); break; case ir_binop_logic_and: /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ - emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); break; case ir_binop_dot: @@ -1376,15 +1277,15 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_sqrt: /* sqrt(x) = x * rsq(x). */ - emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); - emit(ir, OPCODE_MUL, result_dst, result_src, op[0]); + emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); + emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); /* For incoming channels <= 0, set the result to 0. */ op[0].negate = ~op[0].negate; - emit(ir, OPCODE_CMP, result_dst, + emit(ir, TGSI_OPCODE_CMP, result_dst, op[0], result_src, st_src_reg_for_float(0.0)); break; case ir_unop_rsq: - emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); + emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); break; case ir_unop_i2f: case ir_unop_b2f: @@ -1393,36 +1294,36 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) result_src = op[0]; break; case ir_unop_f2i: - emit(ir, OPCODE_TRUNC, result_dst, op[0]); + emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_f2b: case ir_unop_i2b: - emit(ir, OPCODE_SNE, result_dst, + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); break; case ir_unop_trunc: - emit(ir, OPCODE_TRUNC, result_dst, op[0]); + emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_ceil: op[0].negate = ~op[0].negate; - emit(ir, OPCODE_FLR, result_dst, op[0]); + emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); result_src.negate = ~result_src.negate; break; case ir_unop_floor: - emit(ir, OPCODE_FLR, result_dst, op[0]); + emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); break; case ir_unop_fract: - emit(ir, OPCODE_FRC, result_dst, op[0]); + emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]); break; case ir_binop_min: - emit(ir, OPCODE_MIN, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); break; case ir_binop_max: - emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); break; case ir_binop_pow: - emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]); + emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); break; case ir_unop_bit_not: @@ -1586,7 +1487,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) } else { index_reg = get_temp(glsl_type::float_type); - emit(ir, OPCODE_MUL, st_dst_reg(index_reg), + emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), this->result, st_src_reg_for_float(element_size)); } @@ -1728,9 +1629,9 @@ glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) src_ir->accept(this); - /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the + /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the * condition we produced is 0.0 or 1.0. By flipping the sign, we can - * choose which value OPCODE_CMP produces without an extra instruction + * choose which value TGSI_OPCODE_CMP produces without an extra instruction * computing the condition. */ if (negate) @@ -1803,9 +1704,9 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) for (i = 0; i < type_size(ir->lhs->type); i++) { if (switch_order) { - emit(ir, OPCODE_CMP, l, condition, st_src_reg(l), r); + emit(ir, TGSI_OPCODE_CMP, l, condition, st_src_reg(l), r); } else { - emit(ir, OPCODE_CMP, l, condition, r, st_src_reg(l)); + emit(ir, TGSI_OPCODE_CMP, l, condition, r, st_src_reg(l)); } l.index++; @@ -1813,7 +1714,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) } } else { for (i = 0; i < type_size(ir->lhs->type); i++) { - emit(ir, OPCODE_MOV, l, r); + emit(ir, TGSI_OPCODE_MOV, l, r); l.index++; r.index++; } @@ -1849,7 +1750,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) src = this->result; for (i = 0; i < (unsigned int)size; i++) { - emit(ir, OPCODE_MOV, temp, src); + emit(ir, TGSI_OPCODE_MOV, temp, src); src.index++; temp.index++; @@ -1870,7 +1771,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) ir->array_elements[i]->accept(this); src = this->result; for (int j = 0; j < size; j++) { - emit(ir, OPCODE_MOV, temp, src); + emit(ir, TGSI_OPCODE_MOV, temp, src); src.index++; temp.index++; @@ -1893,7 +1794,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) values, ir->type->vector_elements, &src.swizzle); - emit(ir, OPCODE_MOV, mat_column, src); + emit(ir, TGSI_OPCODE_MOV, mat_column, src); mat_column.index++; } @@ -2005,7 +1906,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) l.cond_mask = COND_TR; for (i = 0; i < type_size(param->type); i++) { - emit(ir, OPCODE_MOV, l, r); + emit(ir, TGSI_OPCODE_MOV, l, r); l.index++; r.index++; } @@ -2016,7 +1917,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) assert(!sig_iter.has_next()); /* Emit call instruction */ - call_inst = emit(ir, OPCODE_CAL); + call_inst = emit(ir, TGSI_OPCODE_CAL); call_inst->function = entry; /* Process out parameters. */ @@ -2041,7 +1942,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) st_dst_reg l = st_dst_reg(this->result); for (i = 0; i < type_size(param->type); i++) { - emit(ir, OPCODE_MOV, l, r); + emit(ir, TGSI_OPCODE_MOV, l, r); l.index++; r.index++; } @@ -2061,7 +1962,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) st_src_reg result_src, coord, lod_info, projector, dx, dy; st_dst_reg result_dst, coord_dst; glsl_to_tgsi_instruction *inst = NULL; - prog_opcode opcode = OPCODE_NOP; + unsigned opcode = TGSI_OPCODE_NOP; ir->coordinate->accept(this); @@ -2072,7 +1973,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) */ coord = get_temp(glsl_type::vec4_type); coord_dst = st_dst_reg(coord); - emit(ir, OPCODE_MOV, coord_dst, this->result); + emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); if (ir->projector) { ir->projector->accept(this); @@ -2087,20 +1988,20 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) switch (ir->op) { case ir_tex: - opcode = OPCODE_TEX; + opcode = TGSI_OPCODE_TEX; break; case ir_txb: - opcode = OPCODE_TXB; + opcode = TGSI_OPCODE_TXB; ir->lod_info.bias->accept(this); lod_info = this->result; break; case ir_txl: - opcode = OPCODE_TXL; + opcode = TGSI_OPCODE_TXL; ir->lod_info.lod->accept(this); lod_info = this->result; break; case ir_txd: - opcode = OPCODE_TXD; + opcode = TGSI_OPCODE_TXD; ir->lod_info.grad.dPdx->accept(this); dx = this->result; ir->lod_info.grad.dPdy->accept(this); @@ -2112,25 +2013,25 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) } if (ir->projector) { - if (opcode == OPCODE_TEX) { + if (opcode == TGSI_OPCODE_TEX) { /* Slot the projector in as the last component of the coord. */ coord_dst.writemask = WRITEMASK_W; - emit(ir, OPCODE_MOV, coord_dst, projector); + emit(ir, TGSI_OPCODE_MOV, coord_dst, projector); coord_dst.writemask = WRITEMASK_XYZW; - opcode = OPCODE_TXP; + opcode = TGSI_OPCODE_TXP; } else { st_src_reg coord_w = coord; coord_w.swizzle = SWIZZLE_WWWW; /* For the other TEX opcodes there's no projective version - * since the last slot is taken up by lod info. Do the + * since the last slot is taken up by LOD info. Do the * projective divide now. */ coord_dst.writemask = WRITEMASK_W; - emit(ir, OPCODE_RCP, coord_dst, projector); + emit(ir, TGSI_OPCODE_RCP, coord_dst, projector); /* In the case where we have to project the coordinates "by hand," - * the shadow comparitor value must also be projected. + * the shadow comparator value must also be projected. */ st_src_reg tmp_src = coord; if (ir->shadow_comparitor) { @@ -2143,42 +2044,42 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) st_dst_reg tmp_dst = st_dst_reg(tmp_src); tmp_dst.writemask = WRITEMASK_Z; - emit(ir, OPCODE_MOV, tmp_dst, this->result); + emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); tmp_dst.writemask = WRITEMASK_XY; - emit(ir, OPCODE_MOV, tmp_dst, coord); + emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord); } coord_dst.writemask = WRITEMASK_XYZ; - emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w); + emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); coord_dst.writemask = WRITEMASK_XYZW; coord.swizzle = SWIZZLE_XYZW; } } - /* If projection is done and the opcode is not OPCODE_TXP, then the shadow - * comparitor was put in the correct place (and projected) by the code, + /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow + * comparator was put in the correct place (and projected) by the code, * above, that handles by-hand projection. */ - if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) { + if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { /* Slot the shadow value in as the second to last component of the * coord. */ ir->shadow_comparitor->accept(this); coord_dst.writemask = WRITEMASK_Z; - emit(ir, OPCODE_MOV, coord_dst, this->result); + emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); coord_dst.writemask = WRITEMASK_XYZW; } - if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) { - /* Mesa IR stores lod or lod bias in the last channel of the coords. */ + if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB) { + /* TGSI stores LOD or LOD bias in the last channel of the coords. */ coord_dst.writemask = WRITEMASK_W; - emit(ir, OPCODE_MOV, coord_dst, lod_info); + emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); coord_dst.writemask = WRITEMASK_XYZW; } - if (opcode == OPCODE_TXD) + if (opcode == TGSI_OPCODE_TXD) inst = emit(ir, opcode, result_dst, coord, dx, dy); else inst = emit(ir, opcode, result_dst, coord); @@ -2235,13 +2136,13 @@ glsl_to_tgsi_visitor::visit(ir_return *ir) l = st_dst_reg(current_function->return_reg); for (i = 0; i < type_size(current_function->sig->return_type); i++) { - emit(ir, OPCODE_MOV, l, r); + emit(ir, TGSI_OPCODE_MOV, l, r); l.index++; r.index++; } } - emit(ir, OPCODE_RET); + emit(ir, TGSI_OPCODE_RET); } void @@ -2252,9 +2153,9 @@ glsl_to_tgsi_visitor::visit(ir_discard *ir) if (ir->condition) { ir->condition->accept(this); this->result.negate = ~this->result.negate; - emit(ir, OPCODE_KIL, undef_dst, this->result); + emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result); } else { - emit(ir, OPCODE_KIL_NV); + emit(ir, TGSI_OPCODE_KILP); } fp->UsesKill = GL_TRUE; @@ -2280,14 +2181,14 @@ glsl_to_tgsi_visitor::visit(ir_if *ir) */ if (cond_inst == prev_inst) { st_src_reg temp = get_temp(glsl_type::bool_type); - cond_inst = emit(ir->condition, OPCODE_MOV, st_dst_reg(temp), result); + cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result); } cond_inst->cond_update = GL_TRUE; - if_inst = emit(ir->condition, OPCODE_IF); + if_inst = emit(ir->condition, TGSI_OPCODE_IF); if_inst->dst.cond_mask = COND_NE; } else { - if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result); + if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result); } this->instructions.push_tail(if_inst); @@ -2295,11 +2196,11 @@ glsl_to_tgsi_visitor::visit(ir_if *ir) visit_exec_list(&ir->then_instructions, this); if (!ir->else_instructions.is_empty()) { - else_inst = emit(ir->condition, OPCODE_ELSE); + else_inst = emit(ir->condition, TGSI_OPCODE_ELSE); visit_exec_list(&ir->else_instructions, this); } - if_inst = emit(ir->condition, OPCODE_ENDIF); + if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF); } glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() @@ -2337,7 +2238,7 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) foreach_iter(exec_list_iterator, iter, v->instructions) { glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - if (_mesa_is_tex_instruction(inst->op)) { + if (is_tex_instruction(inst->op)) { v->samplers_used |= 1 << inst->sampler; prog->SamplerTargets[inst->sampler] = @@ -2648,7 +2549,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) /* look for instructions which read from varying vars */ foreach_iter(exec_list_iterator, iter, this->instructions) { glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - const GLuint numSrc = _mesa_num_inst_src_regs(inst->op); + const GLuint numSrc = num_inst_src_regs(inst->op); GLuint j; for (j = 0; j < numSrc; j++) { if (inst->src[j].file == type) { @@ -2687,7 +2588,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i]); st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW); dst.index = i; - this->emit(NULL, OPCODE_MOV, dst, src); + this->emit(NULL, TGSI_OPCODE_MOV, dst, src); } } } @@ -2700,7 +2601,7 @@ glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); unsigned j; - for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { + for (j=0; j < num_inst_src_regs(inst->op); j++) { if (inst->src[j].file == PROGRAM_TEMPORARY && inst->src[j].index == index) { inst->src[j].index = new_index; @@ -2723,17 +2624,17 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index) foreach_iter(exec_list_iterator, iter, this->instructions) { glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { + for (j=0; j < num_inst_src_regs(inst->op); j++) { if (inst->src[j].file == PROGRAM_TEMPORARY && inst->src[j].index == index) { return (depth == 0) ? i : loop_start; } } - if (inst->op == OPCODE_BGNLOOP) { + if (inst->op == TGSI_OPCODE_BGNLOOP) { if(depth++ == 0) loop_start = i; - } else if (inst->op == OPCODE_ENDLOOP) { + } else if (inst->op == TGSI_OPCODE_ENDLOOP) { if (--depth == 0) loop_start = -1; } @@ -2759,10 +2660,10 @@ glsl_to_tgsi_visitor::get_first_temp_write(int index) return (depth == 0) ? i : loop_start; } - if (inst->op == OPCODE_BGNLOOP) { + if (inst->op == TGSI_OPCODE_BGNLOOP) { if(depth++ == 0) loop_start = i; - } else if (inst->op == OPCODE_ENDLOOP) { + } else if (inst->op == TGSI_OPCODE_ENDLOOP) { if (--depth == 0) loop_start = -1; } @@ -2784,16 +2685,16 @@ glsl_to_tgsi_visitor::get_last_temp_read(int index) foreach_iter(exec_list_iterator, iter, this->instructions) { glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { + for (j=0; j < num_inst_src_regs(inst->op); j++) { if (inst->src[j].file == PROGRAM_TEMPORARY && inst->src[j].index == index) { last = (depth == 0) ? i : -2; } } - if (inst->op == OPCODE_BGNLOOP) + if (inst->op == TGSI_OPCODE_BGNLOOP) depth++; - else if (inst->op == OPCODE_ENDLOOP) + else if (inst->op == TGSI_OPCODE_ENDLOOP) if (--depth == 0 && last == -2) last = i; assert(depth >= 0); @@ -2818,9 +2719,9 @@ glsl_to_tgsi_visitor::get_last_temp_write(int index) if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) last = (depth == 0) ? i : -2; - if (inst->op == OPCODE_BGNLOOP) + if (inst->op == TGSI_OPCODE_BGNLOOP) depth++; - else if (inst->op == OPCODE_ENDLOOP) + else if (inst->op == TGSI_OPCODE_ENDLOOP) if (--depth == 0 && last == -2) last = i; assert(depth >= 0); @@ -2922,18 +2823,18 @@ glsl_to_tgsi_visitor::copy_propagate(void) } switch (inst->op) { - case OPCODE_BGNLOOP: - case OPCODE_ENDLOOP: + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_ENDLOOP: /* End of a basic block, clear the ACP entirely. */ memset(acp, 0, sizeof(*acp) * this->next_temp * 4); break; - case OPCODE_IF: + case TGSI_OPCODE_IF: ++level; break; - case OPCODE_ENDIF: - case OPCODE_ELSE: + case TGSI_OPCODE_ENDIF: + case TGSI_OPCODE_ELSE: /* Clear all channels written inside the block from the ACP, but * leaving those that were not touched. */ @@ -2946,7 +2847,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) acp[4 * r + c] = NULL; } } - if (inst->op == OPCODE_ENDIF) + if (inst->op == TGSI_OPCODE_ENDIF) --level; break; @@ -3005,7 +2906,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) } /* If this is a copy, add it to the ACP. */ - if (inst->op == OPCODE_MOV && + if (inst->op == TGSI_OPCODE_MOV && inst->dst.file == PROGRAM_TEMPORARY && !inst->dst.reladdr && !inst->saturate && @@ -3337,11 +3238,11 @@ src_register( struct st_translate *t, } /** - * Create a TGSI ureg_dst register from a Mesa dest register. + * Create a TGSI ureg_dst register from an st_dst_reg. */ static struct ureg_dst translate_dst( struct st_translate *t, - const st_dst_reg *dst_reg, //const struct prog_dst_register *DstReg, + const st_dst_reg *dst_reg, boolean saturate ) { struct ureg_dst dst = dst_register( t, @@ -3361,7 +3262,7 @@ translate_dst( struct st_translate *t, } /** - * Create a TGSI ureg_src register from a Mesa src register. + * Create a TGSI ureg_src register from an st_src_reg. */ static struct ureg_src translate_src( struct st_translate *t, @@ -3378,12 +3279,6 @@ translate_src( struct st_translate *t, if ((src_reg->negate & 0xf) == NEGATE_XYZW) src = ureg_negate(src); -#if 0 - // src_reg currently does not have an equivalent to SrcReg->Abs in Mesa IR - if (src_reg->abs) - src = ureg_abs(src); -#endif - if (src_reg->reladdr != NULL) { /* Normally ureg_src_indirect() would be used here, but a stupid compiler * bug in g++ makes ureg_src_indirect (an inline C function) erroneously @@ -3421,77 +3316,64 @@ compile_tgsi_instruction(struct st_translate *t, unsigned num_dst; unsigned num_src; - num_dst = _mesa_num_inst_dst_regs( inst->op ); - num_src = _mesa_num_inst_src_regs( inst->op ); + num_dst = num_inst_dst_regs( inst->op ); + num_src = num_inst_src_regs( inst->op ); if (num_dst) dst[0] = translate_dst( t, &inst->dst, - inst->saturate); // inst->SaturateMode + inst->saturate); for (i = 0; i < num_src; i++) src[i] = translate_src( t, &inst->src[i] ); switch( inst->op ) { - case OPCODE_SWZ: - // TODO: copy emit_swz function from st_mesa_to_tgsi.c - //emit_swz( t, dst[0], &inst->src[0] ); - assert(!"OPCODE_SWZ"); - return; - - case OPCODE_BGNLOOP: - case OPCODE_CAL: - case OPCODE_ELSE: - case OPCODE_ENDLOOP: - case OPCODE_IF: + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_CAL: + case TGSI_OPCODE_ELSE: + case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_IF: debug_assert(num_dst == 0); ureg_label_insn( ureg, - translate_opcode( inst->op ), + inst->op, src, num_src, get_label( t, - inst->op == OPCODE_CAL ? inst->function->sig_id : 0 )); + inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0 )); return; - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXD: - case OPCODE_TXL: - case OPCODE_TXP: + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXD: + case TGSI_OPCODE_TXL: + case TGSI_OPCODE_TXP: src[num_src++] = t->samplers[inst->sampler]; ureg_tex_insn( ureg, - translate_opcode( inst->op ), + inst->op, dst, num_dst, translate_texture_target( inst->tex_target, inst->tex_shadow ), src, num_src ); return; - case OPCODE_SCS: + case TGSI_OPCODE_SCS: dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); ureg_insn( ureg, - translate_opcode( inst->op ), + inst->op, dst, num_dst, src, num_src ); break; - case OPCODE_XPD: + case TGSI_OPCODE_XPD: dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); ureg_insn( ureg, - translate_opcode( inst->op ), + inst->op, dst, num_dst, src, num_src ); break; - case OPCODE_NOISE1: - case OPCODE_NOISE2: - case OPCODE_NOISE3: - case OPCODE_NOISE4: - assert(!"OPCODE_NOISE should have been lowered\n"); - break; - default: ureg_insn( ureg, - translate_opcode( inst->op ), + inst->op, dst, num_dst, src, num_src ); break; @@ -3993,9 +3875,8 @@ get_mesa_program(struct gl_context *ctx, add_uniforms_to_parameters_list(shader_program, shader, prog); - /* Emit Mesa IR for main(). */ + /* Emit intermediate IR for main(). */ visit_exec_list(shader->ir, v); - v->emit(NULL, OPCODE_END); /* Now emit bodies for any functions that were used. */ do { @@ -4007,18 +3888,18 @@ get_mesa_program(struct gl_context *ctx, if (!entry->bgn_inst) { v->current_function = entry; - entry->bgn_inst = v->emit(NULL, OPCODE_BGNSUB); + entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB); entry->bgn_inst->function = entry; visit_exec_list(&entry->sig->body, v); glsl_to_tgsi_instruction *last; last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); - if (last->op != OPCODE_RET) - v->emit(NULL, OPCODE_RET); + if (last->op != TGSI_OPCODE_RET) + v->emit(NULL, TGSI_OPCODE_RET); glsl_to_tgsi_instruction *end; - end = v->emit(NULL, OPCODE_ENDSUB); + end = v->emit(NULL, TGSI_OPCODE_ENDSUB); end->function = entry; progress = GL_TRUE; @@ -4050,6 +3931,9 @@ get_mesa_program(struct gl_context *ctx, v->eliminate_dead_code(); v->merge_registers(); v->renumber_registers(); + + /* Write the END instruction. */ + v->emit(NULL, TGSI_OPCODE_END); if (ctx->Shader.Flags & GLSL_DUMP) { printf("\n"); @@ -4127,8 +4011,8 @@ st_new_shader_program(struct gl_context *ctx, GLuint name) /** * Link a shader. * Called via ctx->Driver.LinkShader() - * This actually involves converting GLSL IR into Mesa gl_programs with - * code lowering and other optimizations. + * This actually involves converting GLSL IR into an intermediate TGSI-like IR + * with code lowering and other optimizations. */ GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) -- cgit v1.2.3 From 16d7a717d592524e4d62fec4173cb9523f7a1453 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 2 May 2011 23:12:18 -0500 Subject: glsl_to_tgsi: fix shaders with indirect addressing of temps Fixes several Piglit tests, although it's a step backwards for optimization. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 4cb2f377e98..75ab9c5de7c 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -485,7 +485,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, else { for (i=0; i<3; i++) { if(inst->src[i].reladdr) { - switch(dst.file) { + switch(inst->src[i].file) { case PROGRAM_TEMPORARY: this->indirect_addr_temps = true; break; @@ -3928,9 +3928,17 @@ get_mesa_program(struct gl_context *ctx, /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ v->copy_propagate(); - v->eliminate_dead_code(); - v->merge_registers(); - v->renumber_registers(); + + /* FIXME: These passes to optimize temporary registers don't work when there + * is indirect addressing of the temporary register space. We need proper + * array support so that we don't have to give up these passes in every + * shader that uses arrays. + */ + if (!v->indirect_addr_temps) { + v->merge_registers(); + v->eliminate_dead_code(); + v->renumber_registers(); + } /* Write the END instruction. */ v->emit(NULL, TGSI_OPCODE_END); -- cgit v1.2.3 From 17b695e6e7dd730497fb60a8e161935b23fa0e9c Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 5 May 2011 21:10:28 -0500 Subject: gallium: add PIPE_SHADER_CAP_INTEGERS --- src/gallium/auxiliary/tgsi/tgsi_exec.h | 2 ++ src/gallium/drivers/i915/i915_screen.c | 2 ++ src/gallium/drivers/i965/brw_screen.c | 2 ++ src/gallium/drivers/nv50/nv50_screen.c | 2 ++ src/gallium/drivers/nvc0/nvc0_screen.c | 2 ++ src/gallium/drivers/nvfx/nvfx_screen.c | 2 ++ src/gallium/drivers/r300/r300_screen.c | 2 ++ src/gallium/drivers/r600/r600_pipe.c | 2 ++ src/gallium/drivers/svga/svga_screen.c | 2 ++ src/gallium/include/pipe/p_defines.h | 1 + 10 files changed, 19 insertions(+) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 33f33aa82c7..6c32ccff323 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -400,6 +400,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param) return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 1; + case PIPE_SHADER_CAP_INTEGERS: + return 1; default: return 0; } diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index c86baa58b28..5b3af2519fc 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -222,6 +222,8 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap); return 0; diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 9178dfa8f69..39e9e2fa6ac 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -243,6 +243,8 @@ brw_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shad return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 1; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: assert(0); return 0; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index cc921d08666..7e436fd47d8 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -180,6 +180,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; /* please inline, or provide function declarations */ + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); return 0; diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 34bf0f0a2ad..52143981500 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -167,6 +167,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; /* please inline, or provide function declarations */ + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); return 0; diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 475138c3c32..d880b12fcaa 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -174,6 +174,8 @@ nvfx_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, enum return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 1; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: break; } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index fae03acb6d1..93baba68150 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -256,6 +256,8 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: break; } diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 16fe6c54a15..2d744137522 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -504,6 +504,8 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: return 0; } diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index b847cf331b3..4be10ef5821 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -286,6 +286,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; + case PIPE_SHADER_CAP_INTEGERS: + return 0; default: break; } diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index c0c2a7c7fd2..2c95c204e5b 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -491,6 +491,7 @@ enum pipe_shader_cap PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR = 14, PIPE_SHADER_CAP_INDIRECT_CONST_ADDR = 15, PIPE_SHADER_CAP_SUBROUTINES = 16, /* BGNSUB, ENDSUB, CAL, RET */ + PIPE_SHADER_CAP_INTEGERS = 17, }; -- cgit v1.2.3 From 6d89abadbcd68bbe9e08f041412549f8dc1fc73c Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Tue, 17 May 2011 17:13:20 -0500 Subject: mesa: support boolean and integer-based parameters in prog_parameter The functionality is not used by anything yet, and the glUniform functions will need to be reworked before this can reach its full usefulness. It is nonetheless a step towards integer support in the state tracker and classic drivers. --- src/mesa/main/ff_fragment_shader.cpp | 3 +- src/mesa/main/ffvertex_prog.c | 10 +++--- src/mesa/main/uniforms.c | 12 +++---- src/mesa/program/ir_to_mesa.cpp | 8 ++--- src/mesa/program/nvfragparse.c | 23 +++++++++----- src/mesa/program/prog_execute.c | 2 +- src/mesa/program/prog_parameter.c | 50 ++++++++++++++++-------------- src/mesa/program/prog_parameter.h | 25 ++++++++++----- src/mesa/program/prog_parameter_layout.c | 2 +- src/mesa/program/prog_print.c | 2 +- src/mesa/program/program.c | 3 +- src/mesa/program/sampler.cpp | 2 +- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 12 ++++--- 13 files changed, 88 insertions(+), 66 deletions(-) diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp index 0b53c28f7ae..2ccbaf8f8c3 100644 --- a/src/mesa/main/ff_fragment_shader.cpp +++ b/src/mesa/main/ff_fragment_shader.cpp @@ -875,7 +875,8 @@ static struct ureg register_const4f( struct texenv_fragment_program *p, values[1] = s1; values[2] = s2; values[3] = s3; - idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, + idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, + (gl_constant_value *) values, 4, &swizzle ); r = make_ureg(PROGRAM_CONSTANT, idx); r.swz = swizzle; diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c index b8e49a3757f..2d2485c9e06 100644 --- a/src/mesa/main/ffvertex_prog.c +++ b/src/mesa/main/ffvertex_prog.c @@ -455,13 +455,13 @@ static struct ureg register_const4f( struct tnl_program *p, GLfloat s2, GLfloat s3) { - GLfloat values[4]; + gl_constant_value values[4]; GLint idx; GLuint swizzle; - values[0] = s0; - values[1] = s1; - values[2] = s2; - values[3] = s3; + values[0].f = s0; + values[1].f = s1; + values[2].f = s2; + values[3].f = s3; idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, &swizzle ); ASSERT(swizzle == SWIZZLE_NOOP); diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c index 1c4fd82baac..07d46c6404f 100644 --- a/src/mesa/main/uniforms.c +++ b/src/mesa/main/uniforms.c @@ -429,7 +429,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, for (i = 0; i < rows; i++) { const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { - params[k++] = prog->Parameters->ParameterValues[base][j]; + params[k++] = prog->Parameters->ParameterValues[base][j].f; } } } @@ -442,7 +442,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { params[k++] = (GLdouble) - prog->Parameters->ParameterValues[base][j]; + prog->Parameters->ParameterValues[base][j].f; } } } @@ -455,7 +455,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { params[k++] = (GLint) - prog->Parameters->ParameterValues[base][j]; + prog->Parameters->ParameterValues[base][j].f; } } } @@ -468,7 +468,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { params[k++] = (GLuint) - prog->Parameters->ParameterValues[base][j]; + prog->Parameters->ParameterValues[base][j].f; } } } @@ -670,7 +670,7 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program, /* loop over number of samplers to change */ for (i = 0; i < count; i++) { GLuint sampler = (GLuint) - program->Parameters->ParameterValues[index + offset + i][0]; + program->Parameters->ParameterValues[index+offset + i][0].f; GLuint texUnit = ((GLuint *) values)[i]; /* check that the sampler (tex unit index) is legal */ @@ -936,7 +936,7 @@ set_program_uniform_matrix(struct gl_context *ctx, struct gl_program *program, /* Ignore writes beyond the end of (the used part of) an array */ return; } - v = program->Parameters->ParameterValues[index + offset]; + v = (GLfloat *) program->Parameters->ParameterValues[index + offset]; for (row = 0; row < rows; row++) { if (transpose) { v[row] = values[src + row * cols + col]; diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 00869979dd8..f27492749bd 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -599,7 +599,7 @@ ir_to_mesa_visitor::src_reg_for_float(float val) src_reg src(PROGRAM_CONSTANT, -1, NULL); src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - &val, 1, &src.swizzle); + (const gl_constant_value *)&val, 1, &src.swizzle); return src; } @@ -1798,7 +1798,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir) src = src_reg(PROGRAM_CONSTANT, -1, NULL); src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - values, + (gl_constant_value *) values, ir->type->vector_elements, &src.swizzle); emit(ir, OPCODE_MOV, mat_column, src); @@ -1836,7 +1836,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir) this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type); this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, - values, + (gl_constant_value *) values, ir->type->vector_elements, &this->result.swizzle); } @@ -2533,7 +2533,7 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, */ if (file == PROGRAM_SAMPLER) { for (unsigned int j = 0; j < size / 4; j++) - prog->Parameters->ParameterValues[index + j][0] = next_sampler++; + prog->Parameters->ParameterValues[index + j][0].f = next_sampler++; } /* The location chosen in the Parameters list here (returned diff --git a/src/mesa/program/nvfragparse.c b/src/mesa/program/nvfragparse.c index 8516b5fc1ff..ce72c610d89 100644 --- a/src/mesa/program/nvfragparse.c +++ b/src/mesa/program/nvfragparse.c @@ -472,8 +472,9 @@ Parse_ScalarConstant(struct parse_state *parseState, GLfloat *number) const GLfloat *constant; if (!Parse_Identifier(parseState, ident)) RETURN_ERROR1("Expected an identifier"); - constant = _mesa_lookup_parameter_value(parseState->parameters, - -1, (const char *) ident); + constant = (GLfloat *)_mesa_lookup_parameter_value(parseState->parameters, + -1, + (const char *) ident); /* XXX Check that it's a constant and not a parameter */ if (!constant) { RETURN_ERROR1("Undefined symbol"); @@ -1039,7 +1040,8 @@ Parse_VectorSrc(struct parse_state *parseState, if (!Parse_ScalarConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->File = PROGRAM_NAMED_PARAM; srcReg->Index = paramIndex; } @@ -1051,7 +1053,8 @@ Parse_VectorSrc(struct parse_state *parseState, if (!Parse_VectorConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->File = PROGRAM_NAMED_PARAM; srcReg->Index = paramIndex; } @@ -1145,7 +1148,8 @@ Parse_ScalarSrcReg(struct parse_state *parseState, if (!Parse_VectorConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->File = PROGRAM_NAMED_PARAM; srcReg->Index = paramIndex; } @@ -1170,7 +1174,8 @@ Parse_ScalarSrcReg(struct parse_state *parseState, if (!Parse_ScalarConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->Index = paramIndex; srcReg->File = PROGRAM_NAMED_PARAM; needSuffix = GL_FALSE; @@ -1296,7 +1301,8 @@ Parse_InstructionSequence(struct parse_state *parseState, RETURN_ERROR2(id, "already defined"); } _mesa_add_named_parameter(parseState->parameters, - (const char *) id, value); + (const char *) id, + (gl_constant_value *) value); } else if (Parse_String(parseState, "DECLARE")) { GLubyte id[100]; @@ -1315,7 +1321,8 @@ Parse_InstructionSequence(struct parse_state *parseState, RETURN_ERROR2(id, "already declared"); } _mesa_add_named_parameter(parseState->parameters, - (const char *) id, value); + (const char *) id, + (gl_constant_value *) value); } else if (Parse_String(parseState, "END")) { inst->Opcode = OPCODE_END; diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c index e7553c69dbe..dbfd1b91875 100644 --- a/src/mesa/program/prog_execute.c +++ b/src/mesa/program/prog_execute.c @@ -157,7 +157,7 @@ get_src_register_pointer(const struct prog_src_register *source, case PROGRAM_NAMED_PARAM: if (reg >= (GLint) prog->Parameters->NumParameters) return ZeroVec; - return prog->Parameters->ParameterValues[reg]; + return (GLfloat *) prog->Parameters->ParameterValues[reg]; case PROGRAM_SYSTEM_VALUE: assert(reg < Elements(machine->SystemValues)); diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c index 3570cab118b..b1cdf8bf2c0 100644 --- a/src/mesa/program/prog_parameter.c +++ b/src/mesa/program/prog_parameter.c @@ -56,8 +56,8 @@ _mesa_new_parameter_list_sized(unsigned size) p->Parameters = (struct gl_program_parameter *) calloc(1, size * sizeof(struct gl_program_parameter)); - p->ParameterValues = (GLfloat (*)[4]) - _mesa_align_malloc(size * 4 *sizeof(GLfloat), 16); + p->ParameterValues = (gl_constant_value (*)[4]) + _mesa_align_malloc(size * 4 *sizeof(gl_constant_value), 16); if ((p->Parameters == NULL) || (p->ParameterValues == NULL)) { @@ -101,14 +101,15 @@ _mesa_free_parameter_list(struct gl_program_parameter_list *paramList) * \param name the parameter name, will be duplicated/copied! * \param size number of elements in 'values' vector (1..4, or more) * \param datatype GL_FLOAT, GL_FLOAT_VECx, GL_INT, GL_INT_VECx or GL_NONE. - * \param values initial parameter value, up to 4 GLfloats, or NULL + * \param values initial parameter value, up to 4 gl_constant_values, or NULL * \param state state indexes, or NULL * \return index of new parameter in the list, or -1 if error (out of mem) */ GLint _mesa_add_parameter(struct gl_program_parameter_list *paramList, gl_register_file type, const char *name, - GLuint size, GLenum datatype, const GLfloat *values, + GLuint size, GLenum datatype, + const gl_constant_value *values, const gl_state_index state[STATE_LENGTH], GLbitfield flags) { @@ -127,10 +128,10 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, oldNum * sizeof(struct gl_program_parameter), paramList->Size * sizeof(struct gl_program_parameter)); - paramList->ParameterValues = (GLfloat (*)[4]) + paramList->ParameterValues = (gl_constant_value (*)[4]) _mesa_align_realloc(paramList->ParameterValues, /* old buf */ - oldNum * 4 * sizeof(GLfloat), /* old size */ - paramList->Size * 4 *sizeof(GLfloat), /* new sz */ + oldNum * 4 * sizeof(gl_constant_value),/* old sz */ + paramList->Size*4*sizeof(gl_constant_value),/*new*/ 16); } @@ -142,7 +143,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, return -1; } else { - GLuint i; + GLuint i, j; paramList->NumParameters = oldNum + sz4; @@ -163,7 +164,8 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, } else { /* silence valgrind */ - ASSIGN_4V(paramList->ParameterValues[oldNum + i], 0, 0, 0, 0); + for (j = 0; j < 4; j++) + paramList->ParameterValues[oldNum + i][j].f = 0; } size -= 4; } @@ -184,7 +186,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, */ GLint _mesa_add_named_parameter(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4]) + const char *name, const gl_constant_value values[4]) { return _mesa_add_parameter(paramList, PROGRAM_NAMED_PARAM, name, 4, GL_NONE, values, NULL, 0x0); @@ -204,17 +206,17 @@ _mesa_add_named_parameter(struct gl_program_parameter_list *paramList, */ GLint _mesa_add_named_constant(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4], + const char *name, const gl_constant_value values[4], GLuint size) { /* first check if this is a duplicate constant */ GLint pos; for (pos = 0; pos < (GLint)paramList->NumParameters; pos++) { - const GLfloat *pvals = paramList->ParameterValues[pos]; - if (pvals[0] == values[0] && - pvals[1] == values[1] && - pvals[2] == values[2] && - pvals[3] == values[3] && + const gl_constant_value *pvals = paramList->ParameterValues[pos]; + if (pvals[0].u == values[0].u && + pvals[1].u == values[1].u && + pvals[2].u == values[2].u && + pvals[3].u == values[3].u && strcmp(paramList->Parameters[pos].Name, name) == 0) { /* Same name and value is already in the param list - reuse it */ return pos; @@ -240,7 +242,7 @@ _mesa_add_named_constant(struct gl_program_parameter_list *paramList, */ GLint _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, - const GLfloat values[4], GLuint size, + const gl_constant_value values[4], GLuint size, GLuint *swizzleOut) { GLint pos; @@ -262,7 +264,7 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, struct gl_program_parameter *p = paramList->Parameters + pos; if (p->Type == PROGRAM_CONSTANT && p->Size + size <= 4) { /* ok, found room */ - GLfloat *pVal = paramList->ParameterValues[pos]; + gl_constant_value *pVal = paramList->ParameterValues[pos]; GLuint swz = p->Size; /* 1, 2 or 3 for Y, Z, W */ pVal[p->Size] = values[0]; p->Size++; @@ -401,7 +403,7 @@ _mesa_add_state_reference(struct gl_program_parameter_list *paramList, * Lookup a parameter value by name in the given parameter list. * \return pointer to the float[4] values. */ -GLfloat * +gl_constant_value * _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList, GLsizei nameLen, const char *name) { @@ -465,7 +467,7 @@ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList, */ GLboolean _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, - const GLfloat v[], GLuint vSize, + const gl_constant_value v[], GLuint vSize, GLint *posOut, GLuint *swizzleOut) { GLuint i; @@ -484,7 +486,7 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, /* swizzle not allowed */ GLuint j, match = 0; for (j = 0; j < vSize; j++) { - if (v[j] == list->ParameterValues[i][j]) + if (v[j].u == list->ParameterValues[i][j].u) match++; } if (match == vSize) { @@ -498,7 +500,7 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, /* look for v[0] anywhere within float[4] value */ GLuint j; for (j = 0; j < list->Parameters[i].Size; j++) { - if (list->ParameterValues[i][j] == v[0]) { + if (list->ParameterValues[i][j].u == v[0].u) { /* found it */ *posOut = i; *swizzleOut = MAKE_SWIZZLE4(j, j, j, j); @@ -511,13 +513,13 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, GLuint swz[4]; GLuint match = 0, j, k; for (j = 0; j < vSize; j++) { - if (v[j] == list->ParameterValues[i][j]) { + if (v[j].u == list->ParameterValues[i][j].u) { swz[j] = j; match++; } else { for (k = 0; k < list->Parameters[i].Size; k++) { - if (v[j] == list->ParameterValues[i][k]) { + if (v[j].u == list->ParameterValues[i][k].u) { swz[j] = k; match++; break; diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h index 10cbbe57a6c..dcc171ed745 100644 --- a/src/mesa/program/prog_parameter.h +++ b/src/mesa/program/prog_parameter.h @@ -46,7 +46,15 @@ #define PROG_PARAM_BIT_CYL_WRAP 0x10 /**< XXX gallium debug */ /*@}*/ - +/** + * Actual data for constant values of parameters. + */ +typedef union gl_constant_value { + GLfloat f; + GLboolean b; + GLint i; + GLuint u; +} gl_constant_value; /** * Program parameter. @@ -81,7 +89,7 @@ struct gl_program_parameter_list GLuint Size; /**< allocated size of Parameters, ParameterValues */ GLuint NumParameters; /**< number of parameters in arrays */ struct gl_program_parameter *Parameters; /**< Array [Size] */ - GLfloat (*ParameterValues)[4]; /**< Array [Size] of GLfloat[4] */ + gl_constant_value (*ParameterValues)[4]; /**< Array [Size] of constant[4] */ GLbitfield StateFlags; /**< _NEW_* flags indicating which state changes might invalidate ParameterValues[] */ }; @@ -112,22 +120,23 @@ _mesa_num_parameters(const struct gl_program_parameter_list *list) extern GLint _mesa_add_parameter(struct gl_program_parameter_list *paramList, gl_register_file type, const char *name, - GLuint size, GLenum datatype, const GLfloat *values, + GLuint size, GLenum datatype, + const gl_constant_value *values, const gl_state_index state[STATE_LENGTH], GLbitfield flags); extern GLint _mesa_add_named_parameter(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4]); + const char *name, const gl_constant_value values[4]); extern GLint _mesa_add_named_constant(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4], + const char *name, const gl_constant_value values[4], GLuint size); extern GLint _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, - const GLfloat values[4], GLuint size, + const gl_constant_value values[4], GLuint size, GLuint *swizzleOut); extern GLint @@ -143,7 +152,7 @@ extern GLint _mesa_add_state_reference(struct gl_program_parameter_list *paramList, const gl_state_index stateTokens[STATE_LENGTH]); -extern GLfloat * +extern gl_constant_value * _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList, GLsizei nameLen, const char *name); @@ -153,7 +162,7 @@ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList, extern GLboolean _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, - const GLfloat v[], GLuint vSize, + const gl_constant_value v[], GLuint vSize, GLint *posOut, GLuint *swizzleOut); extern GLuint diff --git a/src/mesa/program/prog_parameter_layout.c b/src/mesa/program/prog_parameter_layout.c index 90a9771080c..28fca3b92d9 100644 --- a/src/mesa/program/prog_parameter_layout.c +++ b/src/mesa/program/prog_parameter_layout.c @@ -182,7 +182,7 @@ _mesa_layout_parameters(struct asm_parser_state *state) switch (p->Type) { case PROGRAM_CONSTANT: { - const float *const v = + const gl_constant_value *const v = state->prog->Parameters->ParameterValues[idx]; inst->Base.SrcReg[i].Index = diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c index 7c3b4909e73..70412b1fa6a 100644 --- a/src/mesa/program/prog_print.c +++ b/src/mesa/program/prog_print.c @@ -985,7 +985,7 @@ _mesa_fprint_parameter_list(FILE *f, fprintf(f, "dirty state flags: 0x%x\n", list->StateFlags); for (i = 0; i < list->NumParameters; i++){ struct gl_program_parameter *param = list->Parameters + i; - const GLfloat *v = list->ParameterValues[i]; + const GLfloat *v = (GLfloat *) list->ParameterValues[i]; fprintf(f, "param[%d] sz=%d %s %s = {%.3g, %.3g, %.3g, %.3g}", i, param->Size, _mesa_register_file_name(list->Parameters[i].Type), diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index 224446a2683..4f2b6270501 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -1030,7 +1030,8 @@ _mesa_postprocess_program(struct gl_context *ctx, struct gl_program *prog) GLuint i; GLuint whiteSwizzle; GLint whiteIndex = _mesa_add_unnamed_constant(prog->Parameters, - white, 4, &whiteSwizzle); + (gl_constant_value *) white, + 4, &whiteSwizzle); (void) whiteIndex; diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp index 1457d1199fa..e8d34c670a9 100644 --- a/src/mesa/program/sampler.cpp +++ b/src/mesa/program/sampler.cpp @@ -132,6 +132,6 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler, index += getname.offset; - return prog->Parameters->ParameterValues[index][0]; + return prog->Parameters->ParameterValues[index][0].f; } } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 75ab9c5de7c..881b9e05de1 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -706,9 +706,11 @@ struct st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_float(float val) { st_src_reg src(PROGRAM_CONSTANT, -1, NULL); + union gl_constant_value uval; + uval.f = val; src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - &val, 1, &src.swizzle); + &uval, 1, &src.swizzle); return src; } @@ -1791,7 +1793,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) src = st_src_reg(PROGRAM_CONSTANT, -1, NULL); src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - values, + (gl_constant_value *) values, ir->type->vector_elements, &src.swizzle); emit(ir, TGSI_OPCODE_MOV, mat_column, src); @@ -1829,7 +1831,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type); this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, - values, + (gl_constant_value *) values, ir->type->vector_elements, &this->result.swizzle); } @@ -2401,7 +2403,7 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, */ if (file == PROGRAM_SAMPLER) { for (unsigned int j = 0; j < size / 4; j++) - prog->Parameters->ParameterValues[index + j][0] = next_sampler++; + prog->Parameters->ParameterValues[index + j][0].f = next_sampler++; } /* The location chosen in the Parameters list here (returned @@ -3762,7 +3764,7 @@ st_translate_program( else t->constants[i] = ureg_DECL_immediate( ureg, - proginfo->Parameters->ParameterValues[i], + (GLfloat *) proginfo->Parameters->ParameterValues[i], 4 ); break; default: -- cgit v1.2.3 From f95169deb40f8245f4b3b07b17b222746da29bdd Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 13 Jun 2011 17:52:54 -0500 Subject: tgsi: add support for TGSI_OPCODE_MOD in tgsi_exec --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 9cf74a838fe..072772eaa7e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2977,6 +2977,17 @@ micro_xor(union tgsi_exec_channel *dst, dst->u[3] = src0->u[3] ^ src1->u[3]; } +static void +micro_mod(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->i[0] = src0->i[0] % src1->i[0]; + dst->i[1] = src0->i[1] % src1->i[1]; + dst->i[2] = src0->i[2] % src1->i[2]; + dst->i[3] = src0->i[3] % src1->i[3]; +} + static void micro_f2i(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) @@ -3680,7 +3691,7 @@ exec_instruction( break; case TGSI_OPCODE_MOD: - assert (0); + exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_XOR: -- cgit v1.2.3 From b191382c60bdcfeb7f424b23aa6ab63de81e2f08 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 13 Jun 2011 18:12:56 -0500 Subject: mesa, glsl_to_tgsi: add native support for integers in shaders Disabled by default on all drivers. To enable it, change ctx->GLSLVersion to 130 in st_extensions.c. Currently, softpipe is the only driver with integer support. --- src/glsl/glsl_types.h | 15 ++ src/mesa/main/uniforms.c | 38 ++-- src/mesa/program/prog_parameter.c | 28 ++- src/mesa/program/prog_parameter.h | 5 + src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 302 ++++++++++++++++++++++++----- 5 files changed, 328 insertions(+), 60 deletions(-) diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index 1b069df74fe..eb9d501858a 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -165,6 +165,21 @@ struct glsl_type { static const glsl_type *const mat4x3_type; static const glsl_type *const mat4_type; /*@}*/ + + /** + * Get the built-in instance of the vec4 type for a specific base type + */ + static const glsl_type *get_vec4_type(glsl_base_type base_type) + { + if (base_type == GLSL_TYPE_FLOAT) + return vec4_type; + else if (base_type == GLSL_TYPE_INT) + return ivec4_type; + else if (base_type == GLSL_TYPE_UINT) + return uvec4_type; + else + return NULL; + } /** diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c index 07d46c6404f..ce4863faf78 100644 --- a/src/mesa/main/uniforms.c +++ b/src/mesa/main/uniforms.c @@ -454,8 +454,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, for (i = 0; i < rows; i++) { const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { - params[k++] = (GLint) - prog->Parameters->ParameterValues[base][j].f; + params[k++] = ctx->Const.GLSLVersion <= 120 ? + (GLint) prog->Parameters->ParameterValues[base][j].f : + prog->Parameters->ParameterValues[base][j].i; } } } @@ -467,8 +468,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, for (i = 0; i < rows; i++) { const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { - params[k++] = (GLuint) - prog->Parameters->ParameterValues[base][j].f; + params[k++] = ctx->Const.GLSLVersion <= 120 ? + (GLuint) prog->Parameters->ParameterValues[base][j].f : + prog->Parameters->ParameterValues[base][j].u; } } } @@ -735,42 +737,52 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program, /* loop over number of array elements */ for (k = 0; k < count; k++) { - GLfloat *uniformVal; + gl_constant_value *uniformVal; if (offset + k >= slots) { /* Extra array data is ignored */ break; } - /* uniformVal (the destination) is always float[4] */ + /* uniformVal (the destination) is always gl_constant_value[4] */ uniformVal = program->Parameters->ParameterValues[index + offset + k]; if (basicType == GL_INT) { - /* convert user's ints to floats */ const GLint *iValues = ((const GLint *) values) + k * elems; for (i = 0; i < elems; i++) { - uniformVal[i] = (GLfloat) iValues[i]; + if (ctx->Const.GLSLVersion <= 120) + uniformVal[i].f = (GLfloat) iValues[i]; + else + uniformVal[i].i = iValues[i]; } } else if (basicType == GL_UNSIGNED_INT) { - /* convert user's uints to floats */ const GLuint *iValues = ((const GLuint *) values) + k * elems; for (i = 0; i < elems; i++) { - uniformVal[i] = (GLfloat) iValues[i]; + if (ctx->Const.GLSLVersion <= 120) + uniformVal[i].f = (GLfloat)(GLuint) iValues[i]; + else + uniformVal[i].u = iValues[i]; } } else { const GLfloat *fValues = ((const GLfloat *) values) + k * elems; assert(basicType == GL_FLOAT); for (i = 0; i < elems; i++) { - uniformVal[i] = fValues[i]; + uniformVal[i].f = fValues[i]; } } - /* if the uniform is bool-valued, convert to 1.0 or 0.0 */ + /* if the uniform is bool-valued, convert to 1 or 0 */ if (isUniformBool) { for (i = 0; i < elems; i++) { - uniformVal[i] = uniformVal[i] ? 1.0f : 0.0f; + if (basicType == GL_FLOAT) + uniformVal[i].b = uniformVal[i].f != 0.0f ? 1 : 0; + else + uniformVal[i].b = uniformVal[i].u ? 1 : 0; + + if (ctx->Const.GLSLVersion <= 120) + uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f; } } } diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c index b1cdf8bf2c0..49b3ffbdd5c 100644 --- a/src/mesa/program/prog_parameter.c +++ b/src/mesa/program/prog_parameter.c @@ -241,9 +241,9 @@ _mesa_add_named_constant(struct gl_program_parameter_list *paramList, * \return index/position of the new parameter in the parameter list. */ GLint -_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, +_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList, const gl_constant_value values[4], GLuint size, - GLuint *swizzleOut) + GLenum datatype, GLuint *swizzleOut) { GLint pos; ASSERT(size >= 1); @@ -276,7 +276,7 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, /* add a new parameter to store this constant */ pos = _mesa_add_parameter(paramList, PROGRAM_CONSTANT, NULL, - size, GL_NONE, values, NULL, 0x0); + size, datatype, values, NULL, 0x0); if (pos >= 0 && swizzleOut) { if (size == 1) *swizzleOut = SWIZZLE_XXXX; @@ -286,6 +286,28 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, return pos; } +/** + * Add a new unnamed constant to the parameter list. This will be used + * when a fragment/vertex program contains something like this: + * MOV r, { 0, 1, 2, 3 }; + * If swizzleOut is non-null we'll search the parameter list for an + * existing instance of the constant which matches with a swizzle. + * + * \param paramList the parameter list + * \param values four float values + * \param swizzleOut returns swizzle mask for accessing the constant + * \return index/position of the new parameter in the parameter list. + * \sa _mesa_add_typed_unnamed_constant + */ +GLint +_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, + const gl_constant_value values[4], GLuint size, + GLuint *swizzleOut) +{ + return _mesa_add_typed_unnamed_constant(paramList, values, size, GL_NONE, + swizzleOut); +} + /** * Add parameter representing a varying variable. */ diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h index dcc171ed745..f858cf0fa0d 100644 --- a/src/mesa/program/prog_parameter.h +++ b/src/mesa/program/prog_parameter.h @@ -134,6 +134,11 @@ _mesa_add_named_constant(struct gl_program_parameter_list *paramList, const char *name, const gl_constant_value values[4], GLuint size); +extern GLint +_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList, + const gl_constant_value values[4], GLuint size, + GLenum datatype, GLuint *swizzleOut); + extern GLint _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, const gl_constant_value values[4], GLuint size, diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 881b9e05de1..3f5c0c60226 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -96,11 +96,13 @@ public: else this->swizzle = SWIZZLE_XYZW; this->negate = 0; + this->type = type ? type->base_type : GLSL_TYPE_ERROR; this->reladdr = NULL; } - st_src_reg(gl_register_file file, int index) + st_src_reg(gl_register_file file, int index, int type) { + this->type = type; this->file = file; this->index = index; this->swizzle = SWIZZLE_XYZW; @@ -110,6 +112,7 @@ public: st_src_reg() { + this->type = GLSL_TYPE_ERROR; this->file = PROGRAM_UNDEFINED; this->index = 0; this->swizzle = 0; @@ -123,23 +126,26 @@ public: int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ int negate; /**< NEGATE_XYZW mask from mesa */ + int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ /** Register index should be offset by the integer in this reg. */ st_src_reg *reladdr; }; class st_dst_reg { public: - st_dst_reg(gl_register_file file, int writemask) + st_dst_reg(gl_register_file file, int writemask, int type) { this->file = file; this->index = 0; this->writemask = writemask; this->cond_mask = COND_TR; this->reladdr = NULL; + this->type = type; } st_dst_reg() { + this->type = GLSL_TYPE_ERROR; this->file = PROGRAM_UNDEFINED; this->index = 0; this->writemask = 0; @@ -153,12 +159,14 @@ public: int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ GLuint cond_mask:4; + int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ /** Register index should be offset by the integer in this reg. */ st_src_reg *reladdr; }; st_src_reg::st_src_reg(st_dst_reg reg) { + this->type = reg.type; this->file = reg.file; this->index = reg.index; this->swizzle = SWIZZLE_XYZW; @@ -168,6 +176,7 @@ st_src_reg::st_src_reg(st_dst_reg reg) st_dst_reg::st_dst_reg(st_src_reg reg) { + this->type = reg.type; this->file = reg.file; this->index = reg.index; this->writemask = WRITEMASK_XYZW; @@ -267,6 +276,8 @@ public: int samplers_used; bool indirect_addr_temps; bool indirect_addr_consts; + + int glsl_version; variable_storage *find_variable_storage(ir_variable *var); @@ -276,6 +287,8 @@ public: void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); st_src_reg st_src_reg_for_float(float val); + st_src_reg st_src_reg_for_int(int val); + st_src_reg st_src_reg_for_type(int type, int val); /** * \name Visit methods @@ -327,6 +340,10 @@ public: glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1, st_src_reg src2); + + unsigned get_opcode(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1); /** * Emit the correct dot-product instruction for the type of arguments @@ -343,6 +360,8 @@ public: void emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1); + void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); + void emit_scs(ir_instruction *ir, unsigned op, st_dst_reg dst, const st_src_reg &src); @@ -372,9 +391,9 @@ public: static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL); -static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); +static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); -static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); +static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT); static void fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); @@ -432,6 +451,8 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, { glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); int num_reladdr = 0, i; + + op = get_opcode(ir, op, dst, src0, src1); /* If we have to do relative addressing, we want to load the ARL * reg directly for one of the regs, and preload the other reladdr @@ -447,7 +468,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, reladdr_to_temp(ir, &src0, &num_reladdr); if (dst.reladdr) { - emit(ir, TGSI_OPCODE_ARL, address_reg, *dst.reladdr); + emit_arl(ir, address_reg, *dst.reladdr); num_reladdr--; } assert(num_reladdr == 0); @@ -531,6 +552,62 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); } +/** + * Determines whether to use an integer, unsigned integer, or float opcode + * based on the operands and input opcode, then emits the result. + * + * TODO: type checking for remaining TGSI opcodes + */ +unsigned +glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1) +{ + int type = GLSL_TYPE_FLOAT; + + if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) + type = GLSL_TYPE_FLOAT; + else if (glsl_version >= 130) + type = src0.type; + +#define case4(c, f, i, u) \ + case TGSI_OPCODE_##c: \ + if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \ + else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \ + else op = TGSI_OPCODE_##f; \ + break; +#define case3(f, i, u) case4(f, f, i, u) +#define case2fi(f, i) case4(f, f, i, i) +#define case2iu(i, u) case4(i, LAST, i, u) + + switch(op) { + case2fi(ADD, UADD); + case2fi(MUL, UMUL); + case2fi(MAD, UMAD); + case3(DIV, IDIV, UDIV); + case3(MAX, IMAX, UMAX); + case3(MIN, IMIN, UMIN); + case2iu(MOD, UMOD); + + case2fi(SEQ, USEQ); + case2fi(SNE, USNE); + case3(SGE, ISGE, USGE); + case3(SLT, ISLT, USLT); + + case2iu(SHL, SHL); + case2iu(ISHR, USHR); + case2iu(NOT, NOT); + case2iu(AND, AND); + case2iu(OR, OR); + case2iu(XOR, XOR); + + default: break; + } + + assert(op != TGSI_OPCODE_LAST); + return op; +} + void glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, st_dst_reg dst, st_src_reg src0, st_src_reg src1, @@ -607,6 +684,22 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, emit_scalar(ir, op, dst, src0, undef); } +void +glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, + st_dst_reg dst, st_src_reg src0) +{ + st_src_reg tmp = get_temp(glsl_type::float_type); + + if (src0.type == GLSL_TYPE_INT) + emit(ir, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0); + else if (src0.type == GLSL_TYPE_UINT) + emit(ir, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0); + else + tmp = src0; + + emit(ir, TGSI_OPCODE_ARL, dst, tmp); +} + /** * Emit an TGSI_OPCODE_SCS instruction * @@ -705,16 +798,41 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, struct st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_float(float val) { - st_src_reg src(PROGRAM_CONSTANT, -1, NULL); + st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_FLOAT); union gl_constant_value uval; uval.f = val; - src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - &uval, 1, &src.swizzle); + src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, + &uval, 1, GL_FLOAT, &src.swizzle); + + return src; +} + +struct st_src_reg +glsl_to_tgsi_visitor::st_src_reg_for_int(int val) +{ + st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_INT); + union gl_constant_value uval; + + assert(glsl_version >= 130); + + uval.i = val; + src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, + &uval, 1, GL_INT, &src.swizzle); return src; } +struct st_src_reg +glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) +{ + if (glsl_version >= 130) + return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : + st_src_reg_for_int(val); + else + return st_src_reg_for_float(val); +} + static int type_size(const struct glsl_type *type) { @@ -759,8 +877,7 @@ type_size(const struct glsl_type *type) /** * In the initial pass of codegen, we assign temporary numbers to * intermediate results. (not SSA -- variable assignments will reuse - * storage). Actual register allocation for the Mesa VM occurs in a - * pass over the Mesa IR later. + * storage). */ st_src_reg glsl_to_tgsi_visitor::get_temp(const glsl_type *type) @@ -769,6 +886,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) int swizzle[4]; int i; + src.type = type->base_type; src.file = PROGRAM_TEMPORARY; src.index = next_temp; src.reladdr = NULL; @@ -875,7 +993,8 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) this->variables.push_tail(storage); this->next_temp += type_size(ir->type); - dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, NULL)); + dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, + glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT)); } @@ -890,7 +1009,8 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) assert(index == storage->index + (int)i); } } else { - st_src_reg src(PROGRAM_STATE_VAR, index, NULL); + st_src_reg src(PROGRAM_STATE_VAR, index, + glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT); src.swizzle = slots[i].swizzle; emit(ir, TGSI_OPCODE_MOV, dst, src); /* even a float takes up a whole vec4 reg in a struct/array. */ @@ -1058,7 +1178,7 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, if (!reg->reladdr) return; - emit(ir, TGSI_OPCODE_ARL, address_reg, *reg->reladdr); + emit_arl(ir, address_reg, *reg->reladdr); if (*num_reladdr != 1) { st_src_reg temp = get_temp(glsl_type::vec4_type); @@ -1131,13 +1251,19 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: - emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0)); break; case ir_unop_neg: - op[0].negate = ~op[0].negate; - result_src = op[0]; + assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT); + if (result_dst.type == GLSL_TYPE_INT) + emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); + else { + op[0].negate = ~op[0].negate; + result_src = op[0]; + } break; case ir_unop_abs: + assert(result_dst.type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); break; case ir_unop_sign: @@ -1200,9 +1326,16 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); break; case ir_binop_div: - assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + if (result_dst.type == GLSL_TYPE_FLOAT) + assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + else + emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); + break; case ir_binop_mod: - assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + if (result_dst.type == GLSL_TYPE_FLOAT) + assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + else + emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); break; case ir_binop_less: @@ -1227,7 +1360,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) /* "==" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { - st_src_reg temp = get_temp(glsl_type::vec4_type); + st_src_reg temp = get_temp(glsl_version >= 130 ? + glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : + glsl_type::vec4_type); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); emit_dp(ir, result_dst, temp, temp, vector_elements); emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0)); @@ -1239,7 +1375,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) /* "!=" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { - st_src_reg temp = get_temp(glsl_type::vec4_type); + st_src_reg temp = get_temp(glsl_version >= 130 ? + glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : + glsl_type::vec4_type); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); emit_dp(ir, result_dst, temp, temp, vector_elements); emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); @@ -1291,17 +1430,24 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_i2f: case ir_unop_b2f: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); + break; + } case ir_unop_b2i: - /* Mesa IR lacks types, ints are stored as truncated floats. */ + /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */ result_src = op[0]; break; case ir_unop_f2i: - emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); + if (glsl_version >= 130) + emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); + else + emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_f2b: case ir_unop_i2b: - emit(ir, TGSI_OPCODE_SNE, result_dst, - op[0], st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], + st_src_reg_for_type(result_dst.type, 0)); break; case ir_unop_trunc: emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); @@ -1329,12 +1475,40 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_bit_not: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); + break; + } case ir_unop_u2f: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); + break; + } case ir_binop_lshift: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]); + break; + } case ir_binop_rshift: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]); + break; + } case ir_binop_bit_and: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_AND, result_dst, op[0]); + break; + } case ir_binop_bit_xor: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]); + break; + } case ir_binop_bit_or: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_OR, result_dst, op[0]); + break; + } case ir_unop_round_even: assert(!"GLSL 1.30 features unsupported"); break; @@ -1729,7 +1903,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) { st_src_reg src; GLfloat stack_vals[4] = { 0 }; - GLfloat *values = stack_vals; + gl_constant_value *values = (gl_constant_value *) stack_vals; + GLenum gl_type = GL_NONE; unsigned int i; /* Unfortunately, 4 floats is all we can get into @@ -1737,7 +1912,6 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) * aggregate constant and move each constant value into it. If we * get lucky, copy propagation will eliminate the extra moves. */ - if (ir->type->base_type == GLSL_TYPE_STRUCT) { st_src_reg temp_base = get_temp(ir->type); st_dst_reg temp = st_dst_reg(temp_base); @@ -1789,13 +1963,13 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) for (i = 0; i < ir->type->matrix_columns; i++) { assert(ir->type->base_type == GLSL_TYPE_FLOAT); - values = &ir->value.f[i * ir->type->vector_elements]; + values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; - src = st_src_reg(PROGRAM_CONSTANT, -1, NULL); + src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type); src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - (gl_constant_value *) values, - ir->type->vector_elements, - &src.swizzle); + values, + ir->type->vector_elements, + &src.swizzle); emit(ir, TGSI_OPCODE_MOV, mat_column, src); mat_column.index++; @@ -1808,21 +1982,36 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) src.file = PROGRAM_CONSTANT; switch (ir->type->base_type) { case GLSL_TYPE_FLOAT: - values = &ir->value.f[0]; + gl_type = GL_FLOAT; + for (i = 0; i < ir->type->vector_elements; i++) { + values[i].f = ir->value.f[i]; + } break; case GLSL_TYPE_UINT: + gl_type = glsl_version >= 130 ? GL_UNSIGNED_INT : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { - values[i] = ir->value.u[i]; + if (glsl_version >= 130) + values[i].u = ir->value.u[i]; + else + values[i].f = ir->value.u[i]; } break; case GLSL_TYPE_INT: + gl_type = glsl_version >= 130 ? GL_INT : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { - values[i] = ir->value.i[i]; + if (glsl_version >= 130) + values[i].i = ir->value.i[i]; + else + values[i].f = ir->value.i[i]; } break; case GLSL_TYPE_BOOL: + gl_type = glsl_version >= 130 ? GL_BOOL : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { - values[i] = ir->value.b[i]; + if (glsl_version >= 130) + values[i].b = ir->value.b[i]; + else + values[i].f = ir->value.b[i]; } break; default: @@ -1830,9 +2019,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) } this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type); - this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, - (gl_constant_value *) values, - ir->type->vector_elements, + this->result.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, + values, ir->type->vector_elements, gl_type, &this->result.swizzle); } @@ -2535,6 +2723,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) { GLuint i; GLint outputMap[VERT_RESULT_MAX]; + GLint outputTypes[VERT_RESULT_MAX]; GLuint numVaryingReads = 0; GLboolean usedTemps[MAX_PROGRAM_TEMPS]; GLuint firstTemp = 0; @@ -2562,6 +2751,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) outputMap[var] = _mesa_find_free_register(usedTemps, MAX_PROGRAM_TEMPS, firstTemp); + outputTypes[var] = inst->src[j].type; firstTemp = outputMap[var] + 1; } inst->src[j].file = PROGRAM_TEMPORARY; @@ -2587,8 +2777,8 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) for (i = 0; i < VERT_RESULT_MAX; i++) { if (outputMap[i] >= 0) { /* MOV VAR[i], TEMP[tmp]; */ - st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i]); - st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW); + st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]); + st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]); dst.index = i; this->emit(NULL, TGSI_OPCODE_MOV, dst, src); } @@ -3762,10 +3952,33 @@ st_translate_program( if (program->indirect_addr_consts) t->constants[i] = ureg_DECL_constant( ureg, i ); else - t->constants[i] = - ureg_DECL_immediate( ureg, - (GLfloat *) proginfo->Parameters->ParameterValues[i], - 4 ); + switch(proginfo->Parameters->Parameters[i].DataType) + { + case GL_FLOAT: + case GL_FLOAT_VEC2: + case GL_FLOAT_VEC3: + case GL_FLOAT_VEC4: + t->constants[i] = ureg_DECL_immediate(ureg, (float *)proginfo->Parameters->ParameterValues[i], 4); + break; + case GL_INT: + case GL_INT_VEC2: + case GL_INT_VEC3: + case GL_INT_VEC4: + t->constants[i] = ureg_DECL_immediate_int(ureg, (int *)proginfo->Parameters->ParameterValues[i], 4); + break; + case GL_UNSIGNED_INT: + case GL_UNSIGNED_INT_VEC2: + case GL_UNSIGNED_INT_VEC3: + case GL_UNSIGNED_INT_VEC4: + case GL_BOOL: + case GL_BOOL_VEC2: + case GL_BOOL_VEC3: + case GL_BOOL_VEC4: + t->constants[i] = ureg_DECL_immediate_uint(ureg, (unsigned *)proginfo->Parameters->ParameterValues[i], 4); + break; + default: + assert(!"should not get here"); + } break; default: break; @@ -3874,6 +4087,7 @@ get_mesa_program(struct gl_context *ctx, v->prog = prog; v->shader_program = shader_program; v->options = options; + v->glsl_version = ctx->Const.GLSLVersion; add_uniforms_to_parameters_list(shader_program, shader, prog); -- cgit v1.2.3 From b2c067e3075414703a7ebad439d4290c27cab46a Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Tue, 14 Jun 2011 17:38:14 -0500 Subject: glsl-to-tgsi: fix piglit tests This commit fixes all of the piglit tests regressed by "mesa, glsl_to_tgsi: add native support for integers in shaders" on softpipe. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 3f5c0c60226..49613fccda7 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -886,7 +886,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) int swizzle[4]; int i; - src.type = type->base_type; + src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT; src.file = PROGRAM_TEMPORARY; src.index = next_temp; src.reladdr = NULL; @@ -1632,6 +1632,8 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) } this->result = st_src_reg(entry->file, entry->index, var->type); + if (glsl_version <= 120) + this->result.type = GLSL_TYPE_FLOAT; } void @@ -1966,10 +1968,11 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type); - src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - values, - ir->type->vector_elements, - &src.swizzle); + src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, + values, + ir->type->vector_elements, + GL_FLOAT, + &src.swizzle); emit(ir, TGSI_OPCODE_MOV, mat_column, src); mat_column.index++; @@ -4142,15 +4145,14 @@ get_mesa_program(struct gl_context *ctx, if (target == GL_VERTEX_PROGRAM_ARB) v->remove_output_reads(PROGRAM_VARYING); - /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ - v->copy_propagate(); - - /* FIXME: These passes to optimize temporary registers don't work when there + /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. + * FIXME: These passes to optimize temporary registers don't work when there * is indirect addressing of the temporary register space. We need proper * array support so that we don't have to give up these passes in every * shader that uses arrays. */ if (!v->indirect_addr_temps) { + v->copy_propagate(); v->merge_registers(); v->eliminate_dead_code(); v->renumber_registers(); -- cgit v1.2.3 From bf1cee9f24022e3da96d84fdc6baaa050d3eadf1 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Tue, 14 Jun 2011 18:17:40 -0500 Subject: glsl_to_tgsi: finish some loose ends --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 46 +++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 49613fccda7..438f21483c7 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -2200,7 +2200,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) ir->lod_info.grad.dPdy->accept(this); dy = this->result; break; - case ir_txf: // TODO: use TGSI_OPCODE_TXF here + case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */ assert(!"GLSL 1.30 features unsupported"); break; } @@ -3731,6 +3731,37 @@ emit_wpos(struct st_context *st, emit_wpos_inversion(t, program, invert); } +/** + * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. + * TGSI uses +1 for front, -1 for back. + * This function converts the TGSI value to the GL value. Simply clamping/ + * saturating the value to [0,1] does the job. + */ +static void +emit_face_var(struct st_translate *t) +{ + struct ureg_program *ureg = t->ureg; + struct ureg_dst face_temp = ureg_DECL_temporary(ureg); + struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; + + /* MOV_SAT face_temp, input[face] */ + face_temp = ureg_saturate(face_temp); + ureg_MOV(ureg, face_temp, face_input); + + /* Use face_temp as face input from here on: */ + t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); +} + +static void +emit_edgeflags(struct st_translate *t) +{ + struct ureg_program *ureg = t->ureg; + struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; + struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; + + ureg_MOV(ureg, edge_dst, edge_src); +} + /** * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. * \param program the program to translate @@ -3800,15 +3831,11 @@ st_translate_program( /* Must do this after setting up t->inputs, and before * emitting constant references, below: */ - printf("FRAG_BIT_WPOS\n"); emit_wpos(st_context(ctx), t, proginfo, ureg); } - if (proginfo->InputsRead & FRAG_BIT_FACE) { - // TODO: uncomment - printf("FRAG_BIT_FACE\n"); - //emit_face_var( t, program ); - } + if (proginfo->InputsRead & FRAG_BIT_FACE) + emit_face_var(t); /* * Declare output attributes. @@ -3875,7 +3902,6 @@ st_translate_program( /* XXX: note we are modifying the incoming shader here! Need to * do this before emitting the constant decls below, or this * will be missed. - * XXX: depends on "Parameters" field specific to Mesa IR */ unsigned pointSizeClampConst = _mesa_add_state_reference(proginfo->Parameters, @@ -3887,8 +3913,8 @@ st_translate_program( t->outputs[i] = psizregtemp; } } - /*if (passthrough_edgeflags) - emit_edgeflags( t, program ); */ // TODO: uncomment + if (passthrough_edgeflags) + emit_edgeflags(t); } /* Declare address register. -- cgit v1.2.3 From b30bbd7436bdb9727d3766ba9c07abd610e6dda8 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Wed, 15 Jun 2011 14:45:03 -0500 Subject: glsl_to_tgsi: silence compiler warning --- src/mesa/state_tracker/st_mesa_to_tgsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 75842286ba8..656c985d78f 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -1207,7 +1207,7 @@ st_translate_mesa_program( else t->constants[i] = ureg_DECL_immediate( ureg, - program->Parameters->ParameterValues[i], + (const float*) program->Parameters->ParameterValues[i], 4 ); break; default: -- cgit v1.2.3 From 1141c3f4c4014e3c2834db65b96a3ba7cc78744a Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Wed, 15 Jun 2011 17:31:51 -0500 Subject: glsl: remove glsl_type::get_vec4_type() Thanks to Kenneth Graunke for pointing out that glsl_type::get_instance(base, 4, 1) is the same as glsl_type::get_vec4_type(base). The function was only used in st_glsl_to_tgsi, and this commit replaces that usage with get_instance. --- src/glsl/glsl_types.h | 15 --------------- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4 ++-- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index eb9d501858a..1b069df74fe 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -165,21 +165,6 @@ struct glsl_type { static const glsl_type *const mat4x3_type; static const glsl_type *const mat4_type; /*@}*/ - - /** - * Get the built-in instance of the vec4 type for a specific base type - */ - static const glsl_type *get_vec4_type(glsl_base_type base_type) - { - if (base_type == GLSL_TYPE_FLOAT) - return vec4_type; - else if (base_type == GLSL_TYPE_INT) - return ivec4_type; - else if (base_type == GLSL_TYPE_UINT) - return uvec4_type; - else - return NULL; - } /** diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 438f21483c7..5fedf263090 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1361,7 +1361,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { st_src_reg temp = get_temp(glsl_version >= 130 ? - glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : + glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : glsl_type::vec4_type); assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); @@ -1376,7 +1376,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { st_src_reg temp = get_temp(glsl_version >= 130 ? - glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : + glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : glsl_type::vec4_type); assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); -- cgit v1.2.3 From 552cc48fca9b932fceb3d8fa7f9d0067f46b67c2 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 16 Jun 2011 13:42:57 -0500 Subject: glsl_to_tgsi: fix compile error with g++ 4.6 --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 5fedf263090..6c92441a105 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -389,7 +389,7 @@ public: void *mem_ctx; }; -static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL); +static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); -- cgit v1.2.3 From 29d21417e38aed0f0710d3692df320728aef90b1 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 16 Jun 2011 18:36:16 -0500 Subject: glsl_to_tgsi: implement simplify_cmp pass needed by r300g --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 95 ++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 6c92441a105..322bfbbf1ab 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -374,6 +374,7 @@ public: bool process_move_condition(ir_rvalue *ir); void remove_output_reads(gl_register_file type); + void simplify_cmp(void); void rename_temp_register(int index, int new_index); int get_first_temp_read(int index); @@ -2788,6 +2789,97 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) } } +/** + * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which + * are read from the given src in this instruction + */ +static int +get_src_arg_mask(st_dst_reg dst, st_src_reg src) +{ + int read_mask = 0, comp; + + /* Now, given the src swizzle and the written channels, find which + * components are actually read + */ + for (comp = 0; comp < 4; ++comp) { + const unsigned coord = GET_SWZ(src.swizzle, comp); + ASSERT(coord < 4); + if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) + read_mask |= 1 << coord; + } + + return read_mask; +} + +/** + * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP + * instruction is the first instruction to write to register T0. There are + * several lowering passes done in GLSL IR (e.g. branches and + * relative addressing) that create a large number of conditional assignments + * that ir_to_mesa converts to CMP instructions like the one mentioned above. + * + * Here is why this conversion is safe: + * CMP T0, T1 T2 T0 can be expanded to: + * if (T1 < 0.0) + * MOV T0, T2; + * else + * MOV T0, T0; + * + * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same + * as the original program. If (T1 < 0.0) evaluates to false, executing + * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. + * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 + * because any instruction that was going to read from T0 after this was going + * to read a garbage value anyway. + */ +void +glsl_to_tgsi_visitor::simplify_cmp(void) +{ + unsigned tempWrites[MAX_PROGRAM_TEMPS]; + unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; + + memset(tempWrites, 0, sizeof(tempWrites)); + memset(outputWrites, 0, sizeof(outputWrites)); + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + unsigned prevWriteMask = 0; + + /* Give up if we encounter relative addressing or flow control. */ + if (inst->dst.reladdr || + tgsi_get_opcode_info(inst->op)->is_branch || + inst->op == TGSI_OPCODE_BGNSUB || + inst->op == TGSI_OPCODE_CONT || + inst->op == TGSI_OPCODE_END || + inst->op == TGSI_OPCODE_ENDSUB || + inst->op == TGSI_OPCODE_RET) { + return; + } + + if (inst->dst.file == PROGRAM_OUTPUT) { + assert(inst->dst.index < MAX_PROGRAM_OUTPUTS); + prevWriteMask = outputWrites[inst->dst.index]; + outputWrites[inst->dst.index] |= inst->dst.writemask; + } else if (inst->dst.file == PROGRAM_TEMPORARY) { + assert(inst->dst.index < MAX_PROGRAM_TEMPS); + prevWriteMask = tempWrites[inst->dst.index]; + tempWrites[inst->dst.index] |= inst->dst.writemask; + } + + /* For a CMP to be considered a conditional write, the destination + * register and source register two must be the same. */ + if (inst->op == TGSI_OPCODE_CMP + && !(inst->dst.writemask & prevWriteMask) + && inst->src[2].file == inst->dst.file + && inst->src[2].index == inst->dst.index + && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) { + + inst->op = TGSI_OPCODE_MOV; + inst->src[0] = inst->src[1]; + } + } +} + /* Replaces all references to a temporary register index with another index. */ void glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) @@ -4170,6 +4262,9 @@ get_mesa_program(struct gl_context *ctx, v->remove_output_reads(PROGRAM_OUTPUT); if (target == GL_VERTEX_PROGRAM_ARB) v->remove_output_reads(PROGRAM_VARYING); + + /* Perform the simplify_cmp optimization, which is required by r300g. */ + v->simplify_cmp(); /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. * FIXME: These passes to optimize temporary registers don't work when there -- cgit v1.2.3 From 8c50f18b29637470539d05ccc32b0cae0092aeac Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Tue, 21 Jun 2011 21:52:19 +0100 Subject: glsl_to_tgsi: execute merge_registers() after eliminate_dead_code() Fixes a regression unintentionally introduced by "glsl_to_tgsi: fix shaders with indirect addressing of temps" that caused missing leaves in 3dmark01 test 4 (Nature) and missing/displaced textures on human models in Counter-Strike: Source. Signed-off-by: Emil Velikov Signed-off-by: Bryan Cain --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 322bfbbf1ab..abeb44a4083 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -4274,8 +4274,8 @@ get_mesa_program(struct gl_context *ctx, */ if (!v->indirect_addr_temps) { v->copy_propagate(); - v->merge_registers(); v->eliminate_dead_code(); + v->merge_registers(); v->renumber_registers(); } -- cgit v1.2.3 From 8b881ad1c3d9dd3c96afbdbb608a7240d40e9c92 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 23 Jun 2011 19:35:36 -0500 Subject: glsl_to_tgsi: use swizzle_for_size for src reg in conditional moves This prevents the copy propagation pass from being confused by undefined channels and thus missing optimization opportunities. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index abeb44a4083..6d76686ab5d 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1882,10 +1882,13 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) st_src_reg condition = this->result; for (i = 0; i < type_size(ir->lhs->type); i++) { + st_src_reg l_src = st_src_reg(l); + l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements); + if (switch_order) { - emit(ir, TGSI_OPCODE_CMP, l, condition, st_src_reg(l), r); + emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r); } else { - emit(ir, TGSI_OPCODE_CMP, l, condition, r, st_src_reg(l)); + emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src); } l.index++; -- cgit v1.2.3 From 7ec7dd4fb6ae6c8aa29988754476e1212eb986ef Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 23 Jun 2011 19:53:37 -0500 Subject: glsl_to_tgsi: remove handling of XPD opcode in compile_tgsi_instruction() The opcode is never emitted by the glsl_to_tgsi_visitor, so its special case in compile_tgsi_instruction() was dead code. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 6d76686ab5d..721ba28d61f 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3653,14 +3653,6 @@ compile_tgsi_instruction(struct st_translate *t, src, num_src ); break; - case TGSI_OPCODE_XPD: - dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); - ureg_insn( ureg, - inst->op, - dst, num_dst, - src, num_src ); - break; - default: ureg_insn( ureg, inst->op, -- cgit v1.2.3 From 41472f7809dcff114223b8fadc5b97baff6060a9 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 24 Jun 2011 18:45:04 -0500 Subject: glsl_to_tgsi: add a better, more advanced dead code elimination pass --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 140 +++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 721ba28d61f..d47364fabb6 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -208,6 +208,7 @@ public: int sampler; /**< sampler index */ int tex_target; /**< One of TEXTURE_*_INDEX */ GLboolean tex_shadow; + int dead_mask; /**< Used in dead code elimination */ class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ }; @@ -384,6 +385,7 @@ public: void copy_propagate(void); void eliminate_dead_code(void); + int eliminate_dead_code_advanced(void); void merge_registers(void); void renumber_registers(void); @@ -480,6 +482,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, inst->src[1] = src1; inst->src[2] = src2; inst->ir = ir; + inst->dead_mask = 0; inst->function = NULL; @@ -3257,6 +3260,142 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void) } } +/* + * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead + * code elimination. This is less primitive than eliminate_dead_code(), as it + * is per-channel and can detect consecutive writes without a read between them + * as dead code. However, there is some dead code that can be eliminated by + * eliminate_dead_code() but not this function - for example, this function + * cannot eliminate an instruction writing to a register that is never read and + * is the only instruction writing to that register. + * + * The glsl_to_tgsi_visitor lazily produces code assuming that this pass + * will occur. + */ +int +glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) +{ + glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, + glsl_to_tgsi_instruction *, + this->next_temp * 4); + int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); + int level = 0; + int removed = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + assert(inst->dst.file != PROGRAM_TEMPORARY + || inst->dst.index < this->next_temp); + + switch (inst->op) { + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_ENDLOOP: + /* End of a basic block, clear the write array entirely. + * FIXME: This keeps us from killing dead code when the writes are + * on either side of a loop, even when the register isn't touched + * inside the loop. + */ + memset(writes, 0, sizeof(*writes) * this->next_temp * 4); + break; + + case TGSI_OPCODE_IF: + ++level; + break; + + case TGSI_OPCODE_ENDIF: + --level; + break; + + case TGSI_OPCODE_ELSE: + /* Clear all channels written inside the preceding if block from the + * write array, but leave those that were not touched. + * + * FIXME: This destroys opportunities to remove dead code inside of + * IF blocks that are followed by an ELSE block. + */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!writes[4 * r + c]) + continue; + + if (write_level[4 * r + c] >= level) + writes[4 * r + c] = NULL; + } + } + break; + + default: + /* Continuing the block, clear any channels from the write array that + * are read by this instruction. + */ + for (int i = 0; i < 4; i++) { + if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ + /* Any temporary might be read, so no dead code elimination + * across this instruction. + */ + memset(writes, 0, sizeof(*writes) * this->next_temp * 4); + } else if (inst->src[i].file == PROGRAM_TEMPORARY) { + /* Clear where it's used as src. */ + int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); + src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); + src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); + src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); + + for (int c = 0; c < 4; c++) { + if (src_chans & (1 << c)) { + writes[4 * inst->src[i].index + c] = NULL; + } + } + } + } + break; + } + + /* If this instruction writes to a temporary, add it to the write array. + * If there is already an instruction in the write array for one or more + * of the channels, flag that channel write as dead. + */ + if (inst->dst.file == PROGRAM_TEMPORARY && + !inst->dst.reladdr && + !inst->saturate) { + for (int c = 0; c < 4; c++) { + if (inst->dst.writemask & (1 << c)) { + if (writes[4 * inst->dst.index + c]) { + if (write_level[4 * inst->dst.index + c] < level) + continue; + else + writes[4 * inst->dst.index + c]->dead_mask |= (1 << c); + } + writes[4 * inst->dst.index + c] = inst; + write_level[4 * inst->dst.index + c] = level; + } + } + } + } + + /* Now actually remove the instructions that are completely dead and update + * the writemask of other instructions with dead channels. + */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (!inst->dead_mask || !inst->dst.writemask) + continue; + else if (inst->dead_mask == inst->dst.writemask) { + iter.remove(); + delete inst; + removed++; + } else + inst->dst.writemask &= ~(inst->dead_mask); + } + + ralloc_free(write_level); + ralloc_free(writes); + + return removed; +} + /* Merges temporary registers together where possible to reduce the number of * registers needed to run a program. * @@ -4269,6 +4408,7 @@ get_mesa_program(struct gl_context *ctx, */ if (!v->indirect_addr_temps) { v->copy_propagate(); + while (v->eliminate_dead_code_advanced()); v->eliminate_dead_code(); v->merge_registers(); v->renumber_registers(); -- cgit v1.2.3 From 194732fd7299481dd57815f46a594d155260ce17 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 24 Jun 2011 20:37:53 -0500 Subject: glsl_to_tgsi: use a more specific condition for gl_FragDepth hack in generating assignments This reduces the number of instructions in the fragment shader of glsl-fs-atan-2 from 174 to 146 with EmitNoIfs enabled. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index d47364fabb6..5f22f7091d6 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1841,7 +1841,8 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) if (ir->write_mask == 0) { assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); l.writemask = WRITEMASK_XYZW; - } else if (ir->lhs->type->is_scalar()) { + } else if (ir->lhs->type->is_scalar() && + ir->lhs->variable_referenced()->mode == ir_var_out) { /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the * FINISHME: W component of fragment shader output zero, work correctly. */ @@ -1851,7 +1852,6 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) int first_enabled_chan = 0; int rhs_chan = 0; - assert(ir->lhs->type->is_vector()); l.writemask = ir->write_mask; for (int i = 0; i < 4; i++) { -- cgit v1.2.3 From 3bd06e5b82b438041f50e2469be9ea68bf3b4300 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 24 Jun 2011 22:32:26 -0500 Subject: glsl_to_tgsi: use the correct writemask in try_emit_mad() and try_emit_sat() --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 5f22f7091d6..13573fc1b94 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1133,6 +1133,7 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) { int nonmul_operand = 1 - mul_operand; st_src_reg a, b, c; + st_dst_reg result_dst; ir_expression *expr = ir->operands[mul_operand]->as_expression(); if (!expr || expr->operation != ir_binop_mul) @@ -1146,7 +1147,9 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) c = this->result; this->result = get_temp(ir->type); - emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, c); + result_dst = st_dst_reg(this->result); + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); return true; } @@ -1168,8 +1171,10 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) st_src_reg src = this->result; this->result = get_temp(ir->type); + st_dst_reg result_dst = st_dst_reg(this->result); + result_dst.writemask = (1 << ir->type->vector_elements) - 1; glsl_to_tgsi_instruction *inst; - inst = emit(ir, TGSI_OPCODE_MOV, st_dst_reg(this->result), src); + inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); inst->saturate = true; return true; -- cgit v1.2.3 From 71cbc9e3c4c9ef6090ee31e87601ae64af26321e Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 24 Jun 2011 23:17:30 -0500 Subject: glsl_to_tgsi: improve eliminate_dead_code_advanced() --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 13573fc1b94..15a1a3c51c4 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3379,6 +3379,15 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) } } + /* Anything still in the write array at this point is dead code. */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + glsl_to_tgsi_instruction *inst = writes[4 * r + c]; + if (inst) + inst->dead_mask |= (1 << c); + } + } + /* Now actually remove the instructions that are completely dead and update * the writemask of other instructions with dead channels. */ -- cgit v1.2.3 From f00406b68c07f97b11e873c04917cafdb1a67462 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 27 Jun 2011 17:11:07 -0500 Subject: glsl_to_tgsi: improve assignment handling This is a hack, but it's better than emitting an unnecessary MOV instruction and hoping the optimization passes clean it up. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 15a1a3c51c4..e38617ae9fe 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -695,13 +695,13 @@ glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, st_src_reg tmp = get_temp(glsl_type::float_type); if (src0.type == GLSL_TYPE_INT) - emit(ir, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0); + emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0); else if (src0.type == GLSL_TYPE_UINT) - emit(ir, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0); + emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0); else tmp = src0; - emit(ir, TGSI_OPCODE_ARL, dst, tmp); + emit(NULL, TGSI_OPCODE_ARL, dst, tmp); } /** @@ -1902,6 +1902,17 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) l.index++; r.index++; } + } else if (ir->rhs->as_expression() && + this->instructions.get_tail() && + ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && + type_size(ir->lhs->type) == 1) { + /* To avoid emitting an extra MOV when assigning an expression to a + * variable, change the destination register of the last instruction + * emitted as part of the expression to the assignment variable. + */ + glsl_to_tgsi_instruction *inst; + inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + inst->dst = l; } else { for (i = 0; i < type_size(ir->lhs->type); i++) { emit(ir, TGSI_OPCODE_MOV, l, r); -- cgit v1.2.3 From 4c8b6a286887628e5fc35306189a4c4a83c482ea Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 27 Jun 2011 17:25:50 -0500 Subject: glsl_to_tgsi: fix mistake in new dead code elimination pass The conditions of IF opcodes were not being counted as reads, which sometimes led to the condition register being wrong or undefined. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index e38617ae9fe..f87c64f62c7 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3315,10 +3315,6 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) memset(writes, 0, sizeof(*writes) * this->next_temp * 4); break; - case TGSI_OPCODE_IF: - ++level; - break; - case TGSI_OPCODE_ENDIF: --level; break; @@ -3341,6 +3337,10 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) } break; + case TGSI_OPCODE_IF: + ++level; + /* fallthrough to default case to mark the condition as read */ + default: /* Continuing the block, clear any channels from the write array that * are read by this instruction. -- cgit v1.2.3 From 9c2810103d107d1e5ef8bd8b57819d12264f664a Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 27 Jun 2011 17:40:10 -0500 Subject: glsl_to_tgsi: always run copy_propagate() and eliminate_dead_code_advanced() These two passes are written to handle indirect addressing properly. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index f87c64f62c7..e7d0af83a6b 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -4422,18 +4422,17 @@ get_mesa_program(struct gl_context *ctx, if (target == GL_VERTEX_PROGRAM_ARB) v->remove_output_reads(PROGRAM_VARYING); - /* Perform the simplify_cmp optimization, which is required by r300g. */ + /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ v->simplify_cmp(); + v->copy_propagate(); + while (v->eliminate_dead_code_advanced()); - /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. - * FIXME: These passes to optimize temporary registers don't work when there + /* FIXME: These passes to optimize temporary registers don't work when there * is indirect addressing of the temporary register space. We need proper * array support so that we don't have to give up these passes in every * shader that uses arrays. */ if (!v->indirect_addr_temps) { - v->copy_propagate(); - while (v->eliminate_dead_code_advanced()); v->eliminate_dead_code(); v->merge_registers(); v->renumber_registers(); -- cgit v1.2.3 From 54db6e618e43abbd69b59e0a03e2b6ec83d3120f Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 30 Jun 2011 13:42:37 -0500 Subject: r200, r600c, i965: fix build --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vs_emit.c | 2 +- src/mesa/drivers/dri/r200/r200_vertprog.c | 8 ++++---- src/mesa/drivers/dri/r600/evergreen_fragprog.c | 8 ++++---- src/mesa/drivers/dri/r600/evergreen_vertprog.c | 16 ++++++++-------- src/mesa/drivers/dri/r600/r700_fragprog.c | 8 ++++---- src/mesa/drivers/dri/r600/r700_vertprog.c | 16 ++++++++-------- 7 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 7c73a8fbf02..31f76f8c939 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -605,7 +605,7 @@ fs_visitor::setup_paramvalues_refs() /* Set up the pointers to ParamValues now that that array is finalized. */ for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { c->prog_data.param[i] = - fp->Base.Parameters->ParameterValues[this->param_index[i]] + + (const float *)fp->Base.Parameters->ParameterValues[this->param_index[i]] + this->param_offset[i]; } } diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index b6c9e5a1ceb..2fa04a15a34 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1359,7 +1359,7 @@ get_src_reg( struct brw_vs_compile *c, if (component >= 0) { params = c->vp->program.Base.Parameters; - f = params->ParameterValues[src->Index][component]; + f = params->ParameterValues[src->Index][component].f; if (src->Abs) f = fabs(f); diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c index 63e03b0e0c7..cf44d7f459c 100644 --- a/src/mesa/drivers/dri/r200/r200_vertprog.c +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -126,10 +126,10 @@ static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_ case PROGRAM_NAMED_PARAM: //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); case PROGRAM_CONSTANT: - *fcmd++ = paramList->ParameterValues[pi][0]; - *fcmd++ = paramList->ParameterValues[pi][1]; - *fcmd++ = paramList->ParameterValues[pi][2]; - *fcmd++ = paramList->ParameterValues[pi][3]; + *fcmd++ = paramList->ParameterValues[pi][0].f; + *fcmd++ = paramList->ParameterValues[pi][1].f; + *fcmd++ = paramList->ParameterValues[pi][2].f; + *fcmd++ = paramList->ParameterValues[pi][3].f; break; default: _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__); diff --git a/src/mesa/drivers/dri/r600/evergreen_fragprog.c b/src/mesa/drivers/dri/r600/evergreen_fragprog.c index e527c379b62..cc584ca2b35 100644 --- a/src/mesa/drivers/dri/r600/evergreen_fragprog.c +++ b/src/mesa/drivers/dri/r600/evergreen_fragprog.c @@ -752,10 +752,10 @@ GLboolean evergreenSetupFPconstants(struct gl_context * ctx) unNumParamData = paramList->NumParameters; for(ui=0; uips.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } /* alloc multiple of 16 constants */ diff --git a/src/mesa/drivers/dri/r600/evergreen_vertprog.c b/src/mesa/drivers/dri/r600/evergreen_vertprog.c index 018869b9996..117916ac78f 100644 --- a/src/mesa/drivers/dri/r600/evergreen_vertprog.c +++ b/src/mesa/drivers/dri/r600/evergreen_vertprog.c @@ -684,17 +684,17 @@ GLboolean evergreenSetupVPconstants(struct gl_context * ctx) for(ui=0; uiParameters[ui].Type == PROGRAM_UNIFORM) { - evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0]; - evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1]; - evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2]; - evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3]; + evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f; + evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f; + evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f; + evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f; } else { - evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } } diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 40494cd6af0..6f9834e68fe 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -778,10 +778,10 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx) unNumParamData = paramList->NumParameters; for(ui=0; uips.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } /* Load fp constants to gpu */ diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 7d4be9180a0..b1e2742b27d 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -720,17 +720,17 @@ GLboolean r700SetupVertexProgram(struct gl_context * ctx) for(ui=0; uiParameters[ui].Type == PROGRAM_UNIFORM) { - r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0]; - r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1]; - r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2]; - r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3]; + r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f; + r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f; + r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f; + r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f; } else { - r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } } -- cgit v1.2.3 From 33e0c47b05c8fbae9d7af57ba65b612825b5db60 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 4 Jul 2011 08:44:12 -0500 Subject: glsl_to_tgsi: replace MAX_PROGRAM_TEMPS (256) with MAX_TEMPS (4096) --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index e7d0af83a6b..d7afc22c048 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -77,6 +77,8 @@ extern "C" { (1 << PROGRAM_CONSTANT) | \ (1 << PROGRAM_UNIFORM)) +#define MAX_TEMPS 4096 + class st_src_reg; class st_dst_reg; @@ -2751,11 +2753,11 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) GLint outputMap[VERT_RESULT_MAX]; GLint outputTypes[VERT_RESULT_MAX]; GLuint numVaryingReads = 0; - GLboolean usedTemps[MAX_PROGRAM_TEMPS]; + GLboolean usedTemps[MAX_TEMPS]; GLuint firstTemp = 0; _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, - usedTemps, MAX_PROGRAM_TEMPS); + usedTemps, MAX_TEMPS); assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); @@ -2775,7 +2777,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) if (outputMap[var] == -1) { numVaryingReads++; outputMap[var] = _mesa_find_free_register(usedTemps, - MAX_PROGRAM_TEMPS, + MAX_TEMPS, firstTemp); outputTypes[var] = inst->src[j].type; firstTemp = outputMap[var] + 1; @@ -2857,7 +2859,7 @@ get_src_arg_mask(st_dst_reg dst, st_src_reg src) void glsl_to_tgsi_visitor::simplify_cmp(void) { - unsigned tempWrites[MAX_PROGRAM_TEMPS]; + unsigned tempWrites[MAX_TEMPS]; unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; memset(tempWrites, 0, sizeof(tempWrites)); @@ -2883,7 +2885,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void) prevWriteMask = outputWrites[inst->dst.index]; outputWrites[inst->dst.index] |= inst->dst.writemask; } else if (inst->dst.file == PROGRAM_TEMPORARY) { - assert(inst->dst.index < MAX_PROGRAM_TEMPS); + assert(inst->dst.index < MAX_TEMPS); prevWriteMask = tempWrites[inst->dst.index]; tempWrites[inst->dst.index] |= inst->dst.writemask; } @@ -3504,7 +3506,7 @@ struct label { struct st_translate { struct ureg_program *ureg; - struct ureg_dst temps[MAX_PROGRAM_TEMPS]; + struct ureg_dst temps[MAX_TEMPS]; struct ureg_src *constants; struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; -- cgit v1.2.3 From c0dcab2882a4731dccd363a40c3ebcabc88b9c5d Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 8 Jul 2011 21:12:08 -0500 Subject: st/mesa, glsl_to_tgsi: support glDrawPixels/glCopyPixels with a GLSL fragment shader active Since this was previously implemented using Mesa IR and _mesa_combine_programs, this commit adds a new code path that works with glsl_to_tgsi. --- src/mesa/state_tracker/st_cb_drawpixels.c | 65 +++++++++++++++ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 126 +++++++++++++++++++++++++++++ src/mesa/state_tracker/st_glsl_to_tgsi.h | 3 + 3 files changed, 194 insertions(+) diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 965fbcd1d9e..f4dd2a42847 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -94,6 +94,67 @@ is_passthrough_program(const struct gl_fragment_program *prog) } +/* XXX copied verbatim from st_atom_pixeltransfer.c */ +static struct pipe_resource * +create_color_map_texture(struct gl_context *ctx) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct pipe_resource *pt; + enum pipe_format format; + const uint texSize = 256; /* simple, and usually perfect */ + + /* find an RGBA texture format */ + format = st_choose_format(pipe->screen, GL_RGBA, + PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); + + /* create texture for color map/table */ + pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0, + texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW); + return pt; +} + + +/** + * Returns a fragment program which implements the current pixel transfer ops. + */ +static struct gl_fragment_program * +get_glsl_pixel_transfer_program(struct st_context *st, + struct st_fragment_program *orig) +{ + int pixelMaps = 0, scaleAndBias = 0; + struct gl_context *ctx = st->ctx; + struct st_fragment_program *fp = (struct st_fragment_program *) + ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + + if (!fp) + return NULL; + + if (ctx->Pixel.RedBias != 0.0 || ctx->Pixel.RedScale != 1.0 || + ctx->Pixel.GreenBias != 0.0 || ctx->Pixel.GreenScale != 1.0 || + ctx->Pixel.BlueBias != 0.0 || ctx->Pixel.BlueScale != 1.0 || + ctx->Pixel.AlphaBias != 0.0 || ctx->Pixel.AlphaScale != 1.0) { + scaleAndBias = 1; + } + + pixelMaps = ctx->Pixel.MapColorFlag; + + if (pixelMaps) { + /* create the colormap/texture now if not already done */ + if (!st->pixel_xfer.pixelmap_texture) { + st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx); + st->pixel_xfer.pixelmap_sampler_view = + st_create_texture_sampler_view(st->pipe, + st->pixel_xfer.pixelmap_texture); + } + } + + get_pixel_transfer_visitor(fp, orig->glsl_to_tgsi, + scaleAndBias, pixelMaps); + + return &fp->Base; +} + /** * Make fragment shader for glDraw/CopyPixels. This shader is made @@ -107,11 +168,15 @@ st_make_drawpix_fragment_program(struct st_context *st, struct gl_fragment_program **fpOut) { struct gl_program *newProg; + struct st_fragment_program *stfp = (struct st_fragment_program *) fpIn; if (is_passthrough_program(fpIn)) { newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx, &st->pixel_xfer.program->Base); } + else if (stfp->glsl_to_tgsi != NULL) { + newProg = (struct gl_program *) get_glsl_pixel_transfer_program(st, stfp); + } else { #if 0 /* debug */ diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index d7afc22c048..ae0c92f5f13 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3494,6 +3494,132 @@ glsl_to_tgsi_visitor::renumber_registers(void) this->next_temp = new_index; } +/** + * Returns a fragment program which implements the current pixel transfer ops. + * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c. + */ +extern "C" void +get_pixel_transfer_visitor(struct st_fragment_program *fp, + glsl_to_tgsi_visitor *original, + int scale_and_bias, int pixel_maps) +{ + glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); + struct st_context *st = st_context(original->ctx); + struct gl_program *prog = &fp->Base.Base; + struct gl_program_parameter_list *params = _mesa_new_parameter_list(); + st_src_reg coord, src0; + st_dst_reg dst0; + glsl_to_tgsi_instruction *inst; + + /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ + v->ctx = original->ctx; + v->prog = prog; + v->glsl_version = original->glsl_version; + v->options = original->options; + v->next_temp = original->next_temp; + v->num_address_regs = original->num_address_regs; + v->samplers_used = prog->SamplersUsed = original->samplers_used; + v->indirect_addr_temps = original->indirect_addr_temps; + v->indirect_addr_consts = original->indirect_addr_consts; + + /* + * Get initial pixel color from the texture. + * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; + */ + coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); + src0 = v->get_temp(glsl_type::vec4_type); + dst0 = st_dst_reg(src0); + inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); + inst->sampler = 0; + inst->tex_target = TEXTURE_2D_INDEX; + + prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); + prog->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_COLOR); + prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ + v->samplers_used |= (1 << 0); + + if (scale_and_bias) { + static const gl_state_index scale_state[STATE_LENGTH] = + { STATE_INTERNAL, STATE_PT_SCALE, + (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; + static const gl_state_index bias_state[STATE_LENGTH] = + { STATE_INTERNAL, STATE_PT_BIAS, + (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; + GLint scale_p, bias_p; + st_src_reg scale, bias; + + scale_p = _mesa_add_state_reference(params, scale_state); + bias_p = _mesa_add_state_reference(params, bias_state); + + /* MAD colorTemp, colorTemp, scale, bias; */ + scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT); + bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT); + inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); + } + + if (pixel_maps) { + st_src_reg temp = v->get_temp(glsl_type::vec4_type); + st_dst_reg temp_dst = st_dst_reg(temp); + + assert(st->pixel_xfer.pixelmap_texture); + + /* With a little effort, we can do four pixel map look-ups with + * two TEX instructions: + */ + + /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ + temp_dst.writemask = WRITEMASK_XY; /* write R,G */ + inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); + inst->sampler = 1; + inst->tex_target = TEXTURE_2D_INDEX; + + /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ + src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); + temp_dst.writemask = WRITEMASK_ZW; /* write B,A */ + inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); + inst->sampler = 1; + inst->tex_target = TEXTURE_2D_INDEX; + + prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */ + v->samplers_used |= (1 << 1); + + /* MOV colorTemp, temp; */ + inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp); + } + + /* Now copy the instructions from the original glsl_to_tgsi_visitor into the + * new visitor. */ + foreach_iter(exec_list_iterator, iter, original->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + st_src_reg src_regs[3]; + + for (int i=0; i<3; i++) { + src_regs[i] = inst->src[i]; + if (src_regs[i].file == PROGRAM_INPUT && + src_regs[i].index == FRAG_ATTRIB_COL0) + { + src_regs[i].file = PROGRAM_TEMPORARY; + src_regs[i].index = src0.index; + } + else if (src_regs[i].file == PROGRAM_INPUT) + prog->InputsRead |= (1 << src_regs[i].index); + else if (src_regs[i].file == PROGRAM_OUTPUT) + prog->OutputsWritten |= BITFIELD64_BIT(src_regs[i].index); + } + + v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + } + + /* Make modifications to fragment program info. */ + prog->Parameters = _mesa_combine_parameter_lists(params, + original->prog->Parameters); + prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); + prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); + _mesa_free_parameter_list(params); + count_resources(v, prog); + fp->glsl_to_tgsi = v; +} + /* ------------------------- TGSI conversion stuff -------------------------- */ struct label { unsigned branch_target; diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h index e21c0d1e0af..7884a9feb71 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.h +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h @@ -52,6 +52,9 @@ enum pipe_error st_translate_program( boolean passthrough_edgeflags); void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v); +void get_pixel_transfer_visitor(struct st_fragment_program *fp, + struct glsl_to_tgsi_visitor *original, + int scale_and_bias, int pixel_maps); struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type); -- cgit v1.2.3 From 5f0b4b0e9d376f9ec1cb5ae08c36052f4f51ac37 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sun, 10 Jul 2011 17:17:38 -0500 Subject: st/mesa, glsl_to_tgsi: support glBitmap with a GLSL fragment shader active --- src/mesa/state_tracker/st_cb_bitmap.c | 35 +++++++++++--- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 73 ++++++++++++++++++++++++++++++ src/mesa/state_tracker/st_glsl_to_tgsi.h | 3 ++ 3 files changed, 105 insertions(+), 6 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 49b196032b9..f0750b518ad 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -172,6 +172,23 @@ make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex) } +static struct gl_program * +make_bitmap_fragment_program_glsl(struct st_context *st, + struct st_fragment_program *orig, + GLuint samplerIndex) +{ + struct gl_context *ctx = st->ctx; + struct st_fragment_program *fp = (struct st_fragment_program *) + ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + + if (!fp) + return NULL; + + get_bitmap_visitor(fp, orig->glsl_to_tgsi, samplerIndex); + return &fp->Base.Base; +} + + static int find_free_bit(uint bitfield) { @@ -199,6 +216,7 @@ st_make_bitmap_fragment_program(struct st_context *st, GLuint *bitmap_sampler) { struct st_fragment_program *bitmap_prog; + struct st_fragment_program *stfpIn = (struct st_fragment_program *) fpIn; struct gl_program *newProg; uint sampler; @@ -207,13 +225,18 @@ st_make_bitmap_fragment_program(struct st_context *st, * with the bitmap sampler/kill instructions. */ sampler = find_free_bit(fpIn->Base.SamplersUsed); - bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler); + + if (stfpIn->glsl_to_tgsi) + newProg = make_bitmap_fragment_program_glsl(st, stfpIn, sampler); + else { + bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler); - newProg = _mesa_combine_programs(st->ctx, - &bitmap_prog->Base.Base, - &fpIn->Base); - /* done with this after combining */ - st_reference_fragprog(st, &bitmap_prog, NULL); + newProg = _mesa_combine_programs(st->ctx, + &bitmap_prog->Base.Base, + &fpIn->Base); + /* done with this after combining */ + st_reference_fragprog(st, &bitmap_prog, NULL); + } #if 0 { diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index ae0c92f5f13..74f15087947 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3620,6 +3620,79 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, fp->glsl_to_tgsi = v; } +/** + * Make fragment program for glBitmap: + * Sample the texture and kill the fragment if the bit is 0. + * This program will be combined with the user's fragment program. + * + * Based on make_bitmap_fragment_program in st_cb_bitmap.c. + */ +extern "C" void +get_bitmap_visitor(struct st_fragment_program *fp, + glsl_to_tgsi_visitor *original, int samplerIndex) +{ + glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); + struct st_context *st = st_context(original->ctx); + struct gl_program *prog = &fp->Base.Base; + st_src_reg coord, src0; + st_dst_reg dst0; + glsl_to_tgsi_instruction *inst; + + /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ + v->ctx = original->ctx; + v->prog = prog; + v->glsl_version = original->glsl_version; + v->options = original->options; + v->next_temp = original->next_temp; + v->num_address_regs = original->num_address_regs; + v->samplers_used = prog->SamplersUsed = original->samplers_used; + v->indirect_addr_temps = original->indirect_addr_temps; + v->indirect_addr_consts = original->indirect_addr_consts; + + /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ + coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); + src0 = v->get_temp(glsl_type::vec4_type); + dst0 = st_dst_reg(src0); + inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); + inst->sampler = samplerIndex; + inst->tex_target = TEXTURE_2D_INDEX; + + prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); + prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ + v->samplers_used |= (1 << samplerIndex); + + /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ + src0.negate = NEGATE_XYZW; + if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) + src0.swizzle = SWIZZLE_XXXX; + inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0); + + /* Now copy the instructions from the original glsl_to_tgsi_visitor into the + * new visitor. */ + foreach_iter(exec_list_iterator, iter, original->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + st_src_reg src_regs[3]; + + if (inst->dst.file == PROGRAM_OUTPUT) + prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); + + for (int i=0; i<3; i++) { + src_regs[i] = inst->src[i]; + if (src_regs[i].file == PROGRAM_INPUT) + prog->InputsRead |= (1 << src_regs[i].index); + } + + v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + } + + /* Make modifications to fragment program info. */ + prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters); + prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); + prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); + count_resources(v, prog); + fp->glsl_to_tgsi = v; +} + /* ------------------------- TGSI conversion stuff -------------------------- */ struct label { unsigned branch_target; diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h index 7884a9feb71..d877471785d 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.h +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h @@ -55,6 +55,9 @@ void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v); void get_pixel_transfer_visitor(struct st_fragment_program *fp, struct glsl_to_tgsi_visitor *original, int scale_and_bias, int pixel_maps); +void get_bitmap_visitor(struct st_fragment_program *fp, + struct glsl_to_tgsi_visitor *original, + int samplerIndex); struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type); -- cgit v1.2.3 From 87f8d8547db9b947ae847c509a464e06d0ac6c64 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sun, 10 Jul 2011 17:36:04 -0500 Subject: glsl_to_tgsi: fix mistakes in get_pixel_transfer_visitor() I noticed these issues while working on get_bitmap_visitor(). --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 74f15087947..3df22eae918 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3534,7 +3534,6 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, inst->tex_target = TEXTURE_2D_INDEX; prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); - prog->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_COLOR); prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ v->samplers_used |= (1 << 0); @@ -3593,6 +3592,9 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); st_src_reg src_regs[3]; + if (inst->dst.file == PROGRAM_OUTPUT) + prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); + for (int i=0; i<3; i++) { src_regs[i] = inst->src[i]; if (src_regs[i].file == PROGRAM_INPUT && @@ -3603,8 +3605,6 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, } else if (src_regs[i].file == PROGRAM_INPUT) prog->InputsRead |= (1 << src_regs[i].index); - else if (src_regs[i].file == PROGRAM_OUTPUT) - prog->OutputsWritten |= BITFIELD64_BIT(src_regs[i].index); } v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); -- cgit v1.2.3 From 7732822c833ee22e259af3f8bd2bfb57c986612e Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 21 Jul 2011 15:49:26 -0500 Subject: glsl_to_tgsi: separate immediates from array constants during IR translation Before, if any uniform or constant array was accessed with indirect addressing, st_translate_program() would emit uniform constants in the place of immediates. This behavior was unavoidable with ir_to_mesa/mesa_to_tgsi, but glsl_to_tgsi can work around it since the GLSL IR backend and the TGSI emission are both inside the state tracker. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 143 +++++++++++++++++++---------- 1 file changed, 95 insertions(+), 48 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 3df22eae918..389e5d8e2ef 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -70,6 +70,7 @@ extern "C" { #include "st_mesa_to_tgsi.h" } +#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ (1 << PROGRAM_ENV_PARAM) | \ (1 << PROGRAM_STATE_VAR) | \ @@ -272,6 +273,7 @@ public: struct gl_program *prog; struct gl_shader_program *shader_program; struct gl_shader_compiler_options *options; + struct gl_program_parameter_list *immediates; int next_temp; @@ -505,6 +507,9 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, case PROGRAM_UNIFORM: this->indirect_addr_consts = true; break; + case PROGRAM_IMMEDIATE: + assert(!"immediates should not have indirect addressing"); + break; default: break; } @@ -524,6 +529,9 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, case PROGRAM_UNIFORM: this->indirect_addr_consts = true; break; + case PROGRAM_IMMEDIATE: + assert(!"immediates should not have indirect addressing"); + break; default: break; } @@ -804,12 +812,12 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, struct st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_float(float val) { - st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_FLOAT); + st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); union gl_constant_value uval; uval.f = val; - src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, - &uval, 1, GL_FLOAT, &src.swizzle); + src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1, + GL_FLOAT, &src.swizzle); return src; } @@ -817,14 +825,14 @@ glsl_to_tgsi_visitor::st_src_reg_for_float(float val) struct st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_int(int val) { - st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_INT); + st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); union gl_constant_value uval; assert(glsl_version >= 130); uval.i = val; - src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, - &uval, 1, GL_INT, &src.swizzle); + src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1, + GL_INT, &src.swizzle); return src; } @@ -1933,9 +1941,15 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) gl_constant_value *values = (gl_constant_value *) stack_vals; GLenum gl_type = GL_NONE; unsigned int i; + gl_register_file file; + gl_program_parameter_list *param_list; + static int in_array = 0; + + file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; + param_list = in_array ? this->prog->Parameters : this->immediates; /* Unfortunately, 4 floats is all we can get into - * _mesa_add_unnamed_constant. So, make a temp to store an + * _mesa_add_typed_unnamed_constant. So, make a temp to store an * aggregate constant and move each constant value into it. If we * get lucky, copy propagation will eliminate the extra moves. */ @@ -1969,6 +1983,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) int size = type_size(ir->type->fields.array); assert(size > 0); + in_array++; for (i = 0; i < ir->type->length; i++) { ir->array_elements[i]->accept(this); @@ -1981,6 +1996,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) } } this->result = temp_base; + in_array--; return; } @@ -1992,8 +2008,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) assert(ir->type->base_type == GLSL_TYPE_FLOAT); values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; - src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type); - src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, + src = st_src_reg(file, -1, ir->type->base_type); + src.index = _mesa_add_typed_unnamed_constant(param_list, values, ir->type->vector_elements, GL_FLOAT, @@ -2007,7 +2023,6 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) return; } - src.file = PROGRAM_CONSTANT; switch (ir->type->base_type) { case GLSL_TYPE_FLOAT: gl_type = GL_FLOAT; @@ -2046,8 +2061,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) assert(!"Non-float/uint/int/bool constant"); } - this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type); - this->result.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, + this->result = st_src_reg(file, -1, ir->type); + this->result.index = _mesa_add_typed_unnamed_constant(param_list, values, ir->type->vector_elements, gl_type, &this->result.swizzle); } @@ -2430,11 +2445,13 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() num_address_regs = 0; indirect_addr_temps = false; indirect_addr_consts = false; + immediates = _mesa_new_parameter_list(); mem_ctx = ralloc_context(NULL); } glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() { + _mesa_free_parameter_list(immediates); ralloc_free(mem_ctx); } @@ -3521,6 +3538,8 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, v->samplers_used = prog->SamplersUsed = original->samplers_used; v->indirect_addr_temps = original->indirect_addr_temps; v->indirect_addr_consts = original->indirect_addr_consts; + _mesa_free_parameter_list(v->immediates); + v->immediates = _mesa_clone_parameter_list(original->immediates); /* * Get initial pixel color from the texture. @@ -3648,6 +3667,8 @@ get_bitmap_visitor(struct st_fragment_program *fp, v->samplers_used = prog->SamplersUsed = original->samplers_used; v->indirect_addr_temps = original->indirect_addr_temps; v->indirect_addr_consts = original->indirect_addr_consts; + _mesa_free_parameter_list(v->immediates); + v->immediates = _mesa_clone_parameter_list(original->immediates); /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); @@ -3707,6 +3728,7 @@ struct st_translate { struct ureg_dst temps[MAX_TEMPS]; struct ureg_src *constants; + struct ureg_src *immediates; struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; struct ureg_dst address[1]; @@ -3797,6 +3819,43 @@ static void set_insn_start( struct st_translate *t, t->insn[t->insn_count++] = start; } +/** + * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. + */ +static struct ureg_src +emit_immediate( struct st_translate *t, + struct gl_program_parameter_list *params, + int index) +{ + struct ureg_program *ureg = t->ureg; + + switch(params->Parameters[index].DataType) + { + case GL_FLOAT: + case GL_FLOAT_VEC2: + case GL_FLOAT_VEC3: + case GL_FLOAT_VEC4: + return ureg_DECL_immediate(ureg, (float *)params->ParameterValues[index], 4); + case GL_INT: + case GL_INT_VEC2: + case GL_INT_VEC3: + case GL_INT_VEC4: + return ureg_DECL_immediate_int(ureg, (int *)params->ParameterValues[index], 4); + case GL_UNSIGNED_INT: + case GL_UNSIGNED_INT_VEC2: + case GL_UNSIGNED_INT_VEC3: + case GL_UNSIGNED_INT_VEC4: + case GL_BOOL: + case GL_BOOL_VEC2: + case GL_BOOL_VEC3: + case GL_BOOL_VEC4: + return ureg_DECL_immediate_uint(ureg, (unsigned *)params->ParameterValues[index], 4); + default: + assert(!"should not get here - type must be float, int, uint, or bool"); + return ureg_src_undef(); + } +} + /** * Map a Mesa dst register to a TGSI ureg_dst register. */ @@ -3871,6 +3930,9 @@ src_register( struct st_translate *t, else return t->constants[index]; + case PROGRAM_IMMEDIATE: + return t->immediates[index]; + case PROGRAM_INPUT: assert(t->inputMapping[index] < Elements(t->inputs)); return t->inputs[t->inputMapping[index]]; @@ -4402,9 +4464,8 @@ st_translate_program( } } - /* Emit constants and immediates. Mesa uses a single index space - * for these, so we put all the translated regs in t->constants. - * XXX: this entire if block depends on proginfo->Parameters from Mesa IR + /* Emit constants and uniforms. TGSI uses a single index space for these, + * so we put all the translated regs in t->constants. */ if (proginfo->Parameters) { t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] ); @@ -4423,49 +4484,34 @@ st_translate_program( t->constants[i] = ureg_DECL_constant( ureg, i ); break; - /* Emit immediates only when there's no indirect addressing of - * the const buffer. - * FIXME: Be smarter and recognize param arrays: - * indirect addressing is only valid within the referenced - * array. - */ + /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect + * addressing of the const buffer. + * FIXME: Be smarter and recognize param arrays: + * indirect addressing is only valid within the referenced + * array. + */ case PROGRAM_CONSTANT: if (program->indirect_addr_consts) t->constants[i] = ureg_DECL_constant( ureg, i ); else - switch(proginfo->Parameters->Parameters[i].DataType) - { - case GL_FLOAT: - case GL_FLOAT_VEC2: - case GL_FLOAT_VEC3: - case GL_FLOAT_VEC4: - t->constants[i] = ureg_DECL_immediate(ureg, (float *)proginfo->Parameters->ParameterValues[i], 4); - break; - case GL_INT: - case GL_INT_VEC2: - case GL_INT_VEC3: - case GL_INT_VEC4: - t->constants[i] = ureg_DECL_immediate_int(ureg, (int *)proginfo->Parameters->ParameterValues[i], 4); - break; - case GL_UNSIGNED_INT: - case GL_UNSIGNED_INT_VEC2: - case GL_UNSIGNED_INT_VEC3: - case GL_UNSIGNED_INT_VEC4: - case GL_BOOL: - case GL_BOOL_VEC2: - case GL_BOOL_VEC3: - case GL_BOOL_VEC4: - t->constants[i] = ureg_DECL_immediate_uint(ureg, (unsigned *)proginfo->Parameters->ParameterValues[i], 4); - break; - default: - assert(!"should not get here"); - } + t->constants[i] = emit_immediate( t, proginfo->Parameters, i ); break; default: break; } } } + + /* Emit immediate values. + */ + t->immediates = (struct ureg_src *)CALLOC( program->immediates->NumParameters * sizeof(struct ureg_src) ); + if (t->immediates == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + for (i = 0; i < program->immediates->NumParameters; i++) { + t->immediates[i] = emit_immediate( t, program->immediates, i ); + } /* texture samplers */ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { @@ -4512,6 +4558,7 @@ out: FREE(t->insn); FREE(t->labels); FREE(t->constants); + FREE(t->immediates); if (t->error) { debug_printf("%s: translate error flag set\n", __FUNCTION__); -- cgit v1.2.3 From 0da994a9f15b461d16cf88ce16dc07e98dfada6f Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 21 Jul 2011 16:29:56 -0500 Subject: glsl_to_tgsi: make assignment hack safer Fixes an assertion failure in piglit test glsl-texcoord-array. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 389e5d8e2ef..6e01a44a733 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1917,12 +1917,13 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && type_size(ir->lhs->type) == 1) { /* To avoid emitting an extra MOV when assigning an expression to a - * variable, change the destination register of the last instruction - * emitted as part of the expression to the assignment variable. + * variable, emit the last instruction of the expression again, but + * replace the destination register with the target of the assignment. + * Dead code elimination will remove the original instruction. */ glsl_to_tgsi_instruction *inst; inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); - inst->dst = l; + emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); } else { for (i = 0; i < type_size(ir->lhs->type); i++) { emit(ir, TGSI_OPCODE_MOV, l, r); -- cgit v1.2.3 From a2c3b9f38d81f363bd62abc87dc3abef2beeba95 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 22 Jul 2011 13:23:26 -0500 Subject: glsl_to_tgsi: make coding style more consistent --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 263 ++++++++++++++--------------- 1 file changed, 126 insertions(+), 137 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 6e01a44a733..952900a1fb5 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3778,15 +3778,14 @@ static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { * of labels built here and patch the TGSI code with the actual * location of each label. */ -static unsigned *get_label( struct st_translate *t, - unsigned branch_target ) +static unsigned *get_label(struct st_translate *t, unsigned branch_target) { unsigned i; if (t->labels_count + 1 >= t->labels_size) { t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); t->labels = (struct label *)realloc(t->labels, - t->labels_size * sizeof t->labels[0]); + t->labels_size * sizeof(struct label)); if (t->labels == NULL) { static unsigned dummy; t->error = TRUE; @@ -3805,12 +3804,11 @@ static unsigned *get_label( struct st_translate *t, * Update the insn[] array so the next Mesa instruction points to * the next TGSI instruction. */ -static void set_insn_start( struct st_translate *t, - unsigned start ) +static void set_insn_start(struct st_translate *t, unsigned start) { if (t->insn_count + 1 >= t->insn_size) { t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); - t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof t->insn[0]); + t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0])); if (t->insn == NULL) { t->error = TRUE; return; @@ -3824,9 +3822,9 @@ static void set_insn_start( struct st_translate *t, * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. */ static struct ureg_src -emit_immediate( struct st_translate *t, - struct gl_program_parameter_list *params, - int index) +emit_immediate(struct st_translate *t, + struct gl_program_parameter_list *params, + int index) { struct ureg_program *ureg = t->ureg; @@ -3861,17 +3859,17 @@ emit_immediate( struct st_translate *t, * Map a Mesa dst register to a TGSI ureg_dst register. */ static struct ureg_dst -dst_register( struct st_translate *t, - gl_register_file file, - GLuint index ) +dst_register(struct st_translate *t, + gl_register_file file, + GLuint index) { - switch( file ) { + switch(file) { case PROGRAM_UNDEFINED: return ureg_dst_undef(); case PROGRAM_TEMPORARY: if (ureg_dst_is_undef(t->temps[index])) - t->temps[index] = ureg_DECL_temporary( t->ureg ); + t->temps[index] = ureg_DECL_temporary(t->ureg); return t->temps[index]; @@ -3894,7 +3892,7 @@ dst_register( struct st_translate *t, return t->address[index]; default: - debug_assert( 0 ); + assert(!"unknown dst register file"); return ureg_dst_undef(); } } @@ -3903,11 +3901,11 @@ dst_register( struct st_translate *t, * Map a Mesa src register to a TGSI ureg_src register. */ static struct ureg_src -src_register( struct st_translate *t, - gl_register_file file, - GLuint index ) +src_register(struct st_translate *t, + gl_register_file file, + GLuint index) { - switch( file ) { + switch(file) { case PROGRAM_UNDEFINED: return ureg_src_undef(); @@ -3915,7 +3913,7 @@ src_register( struct st_translate *t, assert(index >= 0); assert(index < Elements(t->temps)); if (ureg_dst_is_undef(t->temps[index])) - t->temps[index] = ureg_DECL_temporary( t->ureg ); + t->temps[index] = ureg_DECL_temporary(t->ureg); return ureg_src(t->temps[index]); case PROGRAM_NAMED_PARAM: @@ -3927,7 +3925,7 @@ src_register( struct st_translate *t, case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: /* ie, immediate */ if (index < 0) - return ureg_DECL_constant( t->ureg, 0 ); + return ureg_DECL_constant(t->ureg, 0); else return t->constants[index]; @@ -3950,7 +3948,7 @@ src_register( struct st_translate *t, return t->systemValues[index]; default: - debug_assert( 0 ); + assert(!"unknown src register file"); return ureg_src_undef(); } } @@ -3959,22 +3957,21 @@ src_register( struct st_translate *t, * Create a TGSI ureg_dst register from an st_dst_reg. */ static struct ureg_dst -translate_dst( struct st_translate *t, - const st_dst_reg *dst_reg, - boolean saturate ) +translate_dst(struct st_translate *t, + const st_dst_reg *dst_reg, + bool saturate) { - struct ureg_dst dst = dst_register( t, - dst_reg->file, - dst_reg->index ); + struct ureg_dst dst = dst_register(t, + dst_reg->file, + dst_reg->index); - dst = ureg_writemask( dst, - dst_reg->writemask ); + dst = ureg_writemask(dst, dst_reg->writemask); if (saturate) - dst = ureg_saturate( dst ); + dst = ureg_saturate(dst); if (dst_reg->reladdr != NULL) - dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) ); + dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); return dst; } @@ -3983,16 +3980,15 @@ translate_dst( struct st_translate *t, * Create a TGSI ureg_src register from an st_src_reg. */ static struct ureg_src -translate_src( struct st_translate *t, - const st_src_reg *src_reg ) +translate_src(struct st_translate *t, const st_src_reg *src_reg) { - struct ureg_src src = src_register( t, src_reg->file, src_reg->index ); + struct ureg_src src = src_register(t, src_reg->file, src_reg->index); - src = ureg_swizzle( src, - GET_SWZ( src_reg->swizzle, 0 ) & 0x3, - GET_SWZ( src_reg->swizzle, 1 ) & 0x3, - GET_SWZ( src_reg->swizzle, 2 ) & 0x3, - GET_SWZ( src_reg->swizzle, 3 ) & 0x3); + src = ureg_swizzle(src, + GET_SWZ(src_reg->swizzle, 0) & 0x3, + GET_SWZ(src_reg->swizzle, 1) & 0x3, + GET_SWZ(src_reg->swizzle, 2) & 0x3, + GET_SWZ(src_reg->swizzle, 3) & 0x3); if ((src_reg->negate & 0xf) == NEGATE_XYZW) src = ureg_negate(src); @@ -4024,8 +4020,8 @@ translate_src( struct st_translate *t, } static void -compile_tgsi_instruction(struct st_translate *t, - const struct glsl_to_tgsi_instruction *inst) +compile_tgsi_instruction(struct st_translate *t, + const struct glsl_to_tgsi_instruction *inst) { struct ureg_program *ureg = t->ureg; GLuint i; @@ -4034,29 +4030,29 @@ compile_tgsi_instruction(struct st_translate *t, unsigned num_dst; unsigned num_src; - num_dst = num_inst_dst_regs( inst->op ); - num_src = num_inst_src_regs( inst->op ); + num_dst = num_inst_dst_regs(inst->op); + num_src = num_inst_src_regs(inst->op); if (num_dst) - dst[0] = translate_dst( t, - &inst->dst, - inst->saturate); + dst[0] = translate_dst(t, + &inst->dst, + inst->saturate); for (i = 0; i < num_src; i++) - src[i] = translate_src( t, &inst->src[i] ); + src[i] = translate_src(t, &inst->src[i]); - switch( inst->op ) { + switch(inst->op) { case TGSI_OPCODE_BGNLOOP: case TGSI_OPCODE_CAL: case TGSI_OPCODE_ELSE: case TGSI_OPCODE_ENDLOOP: case TGSI_OPCODE_IF: - debug_assert(num_dst == 0); - ureg_label_insn( ureg, - inst->op, - src, num_src, - get_label( t, - inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0 )); + assert(num_dst == 0); + ureg_label_insn(ureg, + inst->op, + src, num_src, + get_label(t, + inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0)); return; case TGSI_OPCODE_TEX: @@ -4065,27 +4061,23 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXP: src[num_src++] = t->samplers[inst->sampler]; - ureg_tex_insn( ureg, - inst->op, - dst, num_dst, - translate_texture_target( inst->tex_target, - inst->tex_shadow ), - src, num_src ); + ureg_tex_insn(ureg, + inst->op, + dst, num_dst, + translate_texture_target(inst->tex_target, inst->tex_shadow), + src, num_src); return; case TGSI_OPCODE_SCS: - dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); - ureg_insn( ureg, - inst->op, - dst, num_dst, - src, num_src ); + dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); + ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); break; default: - ureg_insn( ureg, - inst->op, - dst, num_dst, - src, num_src ); + ureg_insn(ureg, + inst->op, + dst, num_dst, + src, num_src); break; } } @@ -4095,9 +4087,9 @@ compile_tgsi_instruction(struct st_translate *t, * Basically, add (adjX, adjY) to the fragment position. */ static void -emit_adjusted_wpos( struct st_translate *t, - const struct gl_program *program, - GLfloat adjX, GLfloat adjY) +emit_adjusted_wpos(struct st_translate *t, + const struct gl_program *program, + float adjX, float adjY) { struct ureg_program *ureg = t->ureg; struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); @@ -4119,9 +4111,9 @@ emit_adjusted_wpos( struct st_translate *t, * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). */ static void -emit_wpos_inversion( struct st_translate *t, - const struct gl_program *program, - boolean invert) +emit_wpos_inversion(struct st_translate *t, + const struct gl_program *program, + bool invert) { struct ureg_program *ureg = t->ureg; @@ -4140,7 +4132,7 @@ emit_wpos_inversion( struct st_translate *t, unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, wposTransformState); - struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); + struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst); struct ureg_dst wpos_temp; struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; @@ -4149,26 +4141,26 @@ emit_wpos_inversion( struct st_translate *t, if (wpos_input.File == TGSI_FILE_TEMPORARY) wpos_temp = ureg_dst(wpos_input); else { - wpos_temp = ureg_DECL_temporary( ureg ); - ureg_MOV( ureg, wpos_temp, wpos_input ); + wpos_temp = ureg_DECL_temporary(ureg); + ureg_MOV(ureg, wpos_temp, wpos_input); } if (invert) { /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy */ - ureg_MAD( ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), - wpos_input, - ureg_scalar(wpostrans, 0), - ureg_scalar(wpostrans, 1)); + ureg_MAD(ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), + wpos_input, + ureg_scalar(wpostrans, 0), + ureg_scalar(wpostrans, 1)); } else { /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww */ - ureg_MAD( ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), - wpos_input, - ureg_scalar(wpostrans, 2), - ureg_scalar(wpostrans, 3)); + ureg_MAD(ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), + wpos_input, + ureg_scalar(wpostrans, 2), + ureg_scalar(wpostrans, 3)); } /* Use wpos_temp as position input from here on: @@ -4312,7 +4304,7 @@ st_translate_program( const GLuint outputMapping[], const ubyte outputSemanticName[], const ubyte outputSemanticIndex[], - boolean passthrough_edgeflags ) + boolean passthrough_edgeflags) { struct st_translate translate, *t; unsigned i; @@ -4358,27 +4350,24 @@ st_translate_program( for (i = 0; i < numOutputs; i++) { switch (outputSemanticName[i]) { case TGSI_SEMANTIC_POSITION: - t->outputs[i] = ureg_DECL_output( ureg, - TGSI_SEMANTIC_POSITION, /* Z / Depth */ - outputSemanticIndex[i] ); - - t->outputs[i] = ureg_writemask( t->outputs[i], - TGSI_WRITEMASK_Z ); + t->outputs[i] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_POSITION, /* Z/Depth */ + outputSemanticIndex[i]); + t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z); break; case TGSI_SEMANTIC_STENCIL: - t->outputs[i] = ureg_DECL_output( ureg, - TGSI_SEMANTIC_STENCIL, /* Stencil */ - outputSemanticIndex[i] ); - t->outputs[i] = ureg_writemask( t->outputs[i], - TGSI_WRITEMASK_Y ); + t->outputs[i] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_STENCIL, /* Stencil */ + outputSemanticIndex[i]); + t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y); break; case TGSI_SEMANTIC_COLOR: - t->outputs[i] = ureg_DECL_output( ureg, - TGSI_SEMANTIC_COLOR, - outputSemanticIndex[i] ); + t->outputs[i] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_COLOR, + outputSemanticIndex[i]); break; default: - debug_assert(0); + assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); return PIPE_ERROR_BAD_INPUT; } } @@ -4392,9 +4381,9 @@ st_translate_program( } for (i = 0; i < numOutputs; i++) { - t->outputs[i] = ureg_DECL_output( ureg, - outputSemanticName[i], - outputSemanticIndex[i] ); + t->outputs[i] = ureg_DECL_output(ureg, + outputSemanticName[i], + outputSemanticIndex[i]); } } else { @@ -4405,9 +4394,9 @@ st_translate_program( } for (i = 0; i < numOutputs; i++) { - t->outputs[i] = ureg_DECL_output( ureg, - outputSemanticName[i], - outputSemanticIndex[i] ); + t->outputs[i] = ureg_DECL_output(ureg, + outputSemanticName[i], + outputSemanticIndex[i]); if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) { /* Writing to the point size result register requires special * handling to implement clamping. @@ -4421,8 +4410,8 @@ st_translate_program( unsigned pointSizeClampConst = _mesa_add_state_reference(proginfo->Parameters, pointSizeClampState); - struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg ); - t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst ); + struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg); + t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst); t->pointSizeResult = t->outputs[i]; t->pointSizeOutIndex = i; t->outputs[i] = psizregtemp; @@ -4435,8 +4424,8 @@ st_translate_program( /* Declare address register. */ if (program->num_address_regs > 0) { - debug_assert( program->num_address_regs == 1 ); - t->address[0] = ureg_DECL_address( ureg ); + assert(program->num_address_regs == 1); + t->address[0] = ureg_DECL_address(ureg); } /* Declare misc input registers @@ -4461,7 +4450,7 @@ st_translate_program( */ for (i = 0; i < (unsigned)program->next_temp; i++) { /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ - t->temps[i] = ureg_DECL_temporary( t->ureg ); + t->temps[i] = ureg_DECL_temporary(t->ureg); } } @@ -4469,7 +4458,7 @@ st_translate_program( * so we put all the translated regs in t->constants. */ if (proginfo->Parameters) { - t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] ); + t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0])); if (t->constants == NULL) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; @@ -4482,7 +4471,7 @@ st_translate_program( case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: case PROGRAM_UNIFORM: - t->constants[i] = ureg_DECL_constant( ureg, i ); + t->constants[i] = ureg_DECL_constant(ureg, i); break; /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect @@ -4493,9 +4482,9 @@ st_translate_program( */ case PROGRAM_CONSTANT: if (program->indirect_addr_consts) - t->constants[i] = ureg_DECL_constant( ureg, i ); + t->constants[i] = ureg_DECL_constant(ureg, i); else - t->constants[i] = emit_immediate( t, proginfo->Parameters, i ); + t->constants[i] = emit_immediate(t, proginfo->Parameters, i); break; default: break; @@ -4505,27 +4494,28 @@ st_translate_program( /* Emit immediate values. */ - t->immediates = (struct ureg_src *)CALLOC( program->immediates->NumParameters * sizeof(struct ureg_src) ); + t->immediates = (struct ureg_src *)CALLOC(program->immediates->NumParameters * sizeof(struct ureg_src)); if (t->immediates == NULL) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; } for (i = 0; i < program->immediates->NumParameters; i++) { - t->immediates[i] = emit_immediate( t, program->immediates, i ); + assert(program->immediates->Parameters[i].Type == PROGRAM_IMMEDIATE); + t->immediates[i] = emit_immediate(t, program->immediates, i); } /* texture samplers */ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { if (program->samplers_used & (1 << i)) { - t->samplers[i] = ureg_DECL_sampler( ureg, i ); + t->samplers[i] = ureg_DECL_sampler(ureg, i); } } /* Emit each instruction in turn: */ foreach_iter(exec_list_iterator, iter, program->instructions) { - set_insn_start( t, ureg_get_instruction_number( ureg )); - compile_tgsi_instruction( t, (glsl_to_tgsi_instruction *)iter.get() ); + set_insn_start(t, ureg_get_instruction_number(ureg)); + compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get()); if (t->prevInstWrotePointSize && proginfo->Id) { /* The previous instruction wrote to the (fake) vertex point size @@ -4535,14 +4525,14 @@ st_translate_program( * Note that we can't do this easily at the end of program due to * possible early return. */ - set_insn_start( t, ureg_get_instruction_number( ureg )); - ureg_MAX( t->ureg, - ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), - ureg_src(t->outputs[t->pointSizeOutIndex]), - ureg_swizzle(t->pointSizeConst, 1,1,1,1)); - ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), - ureg_src(t->outputs[t->pointSizeOutIndex]), - ureg_swizzle(t->pointSizeConst, 2,2,2,2)); + set_insn_start(t, ureg_get_instruction_number(ureg)); + ureg_MAX(t->ureg, + ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), + ureg_src(t->outputs[t->pointSizeOutIndex]), + ureg_swizzle(t->pointSizeConst, 1,1,1,1)); + ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), + ureg_src(t->outputs[t->pointSizeOutIndex]), + ureg_swizzle(t->pointSizeConst, 2,2,2,2)); } t->prevInstWrotePointSize = GL_FALSE; } @@ -4550,9 +4540,8 @@ st_translate_program( /* Fix up all emitted labels: */ for (i = 0; i < t->labels_count; i++) { - ureg_fixup_label( ureg, - t->labels[i].token, - t->insn[t->labels[i].branch_target] ); + ureg_fixup_label(ureg, t->labels[i].token, + t->insn[t->labels[i].branch_target]); } out: @@ -4582,7 +4571,7 @@ get_mesa_program(struct gl_context *ctx, struct gl_program *prog; GLenum target; const char *target_string; - GLboolean progress; + bool progress; struct gl_shader_compiler_options *options = &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; -- cgit v1.2.3 From f751730ad003bb19ce85bc4d0abddaf40edde6c1 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 22 Jul 2011 13:24:42 -0500 Subject: glsl_to_tgsi: update comments --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 952900a1fb5..3a69a439822 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -236,7 +236,7 @@ public: /** * identifier of this function signature used by the program. * - * At the point that Mesa instructions for function calls are + * At the point that TGSI instructions for function calls are * generated, we don't know the address of the first instruction of * the function body. So we make the BranchTarget that is called a * small integer and rewrite them during set_branchtargets(). @@ -251,10 +251,9 @@ public: glsl_to_tgsi_instruction *bgn_inst; /** - * Index of the first instruction of the function body in actual - * Mesa IR. + * Index of the first instruction of the function body in actual TGSI. * - * Set after convertion from glsl_to_tgsi_instruction to prog_instruction. + * Set after conversion from glsl_to_tgsi_instruction to TGSI. */ int inst; @@ -1672,7 +1671,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) } else { st_src_reg array_base = this->result; /* Variable index array dereference. It eats the "vec4" of the - * base of the array and an index that offsets the Mesa register + * base of the array and an index that offsets the TGSI register * index. */ ir->array_index->accept(this); @@ -1879,7 +1878,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) /* Swizzle a small RHS vector into the channels being written. * * glsl ir treats write_mask as dictating how many channels are - * present on the RHS while Mesa IR treats write_mask as just + * present on the RHS while TGSI treats write_mask as just * showing which channels of the vec4 RHS get written. */ for (int i = 0; i < 4; i++) { @@ -2202,8 +2201,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) /* Put our coords in a temp. We'll need to modify them for shadow, * projection, or LOD, so the only case we'd use it as is is if - * we're doing plain old texturing. Mesa IR optimization should - * handle cleaning up our mess in that case. + * we're doing plain old texturing. The optimization passes on + * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. */ coord = get_temp(glsl_type::vec4_type); coord_dst = st_dst_reg(coord); @@ -3799,9 +3798,9 @@ static unsigned *get_label(struct st_translate *t, unsigned branch_target) } /** - * Called prior to emitting the TGSI code for each Mesa instruction. + * Called prior to emitting the TGSI code for each instruction. * Allocate additional space for instructions if needed. - * Update the insn[] array so the next Mesa instruction points to + * Update the insn[] array so the next glsl_to_tgsi_instruction points to * the next TGSI instruction. */ static void set_insn_start(struct st_translate *t, unsigned start) @@ -3856,7 +3855,7 @@ emit_immediate(struct st_translate *t, } /** - * Map a Mesa dst register to a TGSI ureg_dst register. + * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. */ static struct ureg_dst dst_register(struct st_translate *t, @@ -3898,7 +3897,7 @@ dst_register(struct st_translate *t, } /** - * Map a Mesa src register to a TGSI ureg_src register. + * Map a glsl_to_tgsi src register to a TGSI ureg_src register. */ static struct ureg_src src_register(struct st_translate *t, -- cgit v1.2.3 From 3354a5b56398f90fc36ab14b6444aae27b50e859 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Wed, 27 Jul 2011 15:20:19 -0500 Subject: glsl_to_tgsi: rework immediate tracking to not use gl_program_parameter_list --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 135 +++++++++++++++++++---------- 1 file changed, 88 insertions(+), 47 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 3a69a439822..6039488f26b 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -229,6 +229,20 @@ public: ir_variable *var; /* variable that maps to this, if any */ }; +class immediate_storage : public exec_node { +public: + immediate_storage(gl_constant_value *values, int size, int type) + { + memcpy(this->values, values, size * sizeof(gl_constant_value)); + this->size = size; + this->type = type; + } + + gl_constant_value values[4]; + int size; /**< Number of components (1-4) */ + int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ +}; + class function_entry : public exec_node { public: ir_function_signature *sig; @@ -272,7 +286,6 @@ public: struct gl_program *prog; struct gl_shader_program *shader_program; struct gl_shader_compiler_options *options; - struct gl_program_parameter_list *immediates; int next_temp; @@ -285,6 +298,9 @@ public: variable_storage *find_variable_storage(ir_variable *var); + int add_constant(gl_register_file file, gl_constant_value values[4], + int size, int datatype, GLuint *swizzle_out); + function_entry *get_function_signature(ir_function_signature *sig); st_src_reg get_temp(const glsl_type *type); @@ -326,6 +342,10 @@ public: /** List of variable_storage */ exec_list variables; + /** List of immediate_storage */ + exec_list immediates; + int num_immediates; + /** List of function_entry */ exec_list function_signatures; int next_signature_id; @@ -808,6 +828,42 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, } } +int +glsl_to_tgsi_visitor::add_constant(gl_register_file file, + gl_constant_value values[4], int size, int datatype, + GLuint *swizzle_out) +{ + if (file == PROGRAM_CONSTANT) { + return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, + size, datatype, swizzle_out); + } else { + int index = 0; + immediate_storage *entry; + assert(file == PROGRAM_IMMEDIATE); + fprintf(stderr, "adding immediate\n"); + + /* Search immediate storage to see if we already have an identical + * immediate that we can use instead of adding a duplicate entry. + */ + foreach_iter(exec_list_iterator, iter, this->immediates) { + entry = (immediate_storage *)iter.get(); + + if (entry->size == size && + entry->type == datatype && + !memcmp(entry->values, values, size * sizeof(gl_constant_value))) { + return index; + } + index++; + } + + /* Add this immediate to the list. */ + entry = new(mem_ctx) immediate_storage(values, size, datatype); + this->immediates.push_tail(entry); + this->num_immediates++; + return index; + } +} + struct st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_float(float val) { @@ -815,8 +871,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_float(float val) union gl_constant_value uval; uval.f = val; - src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1, - GL_FLOAT, &src.swizzle); + src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); return src; } @@ -830,8 +885,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val) assert(glsl_version >= 130); uval.i = val; - src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1, - GL_INT, &src.swizzle); + src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); return src; } @@ -1941,12 +1995,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) gl_constant_value *values = (gl_constant_value *) stack_vals; GLenum gl_type = GL_NONE; unsigned int i; - gl_register_file file; - gl_program_parameter_list *param_list; static int in_array = 0; - - file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; - param_list = in_array ? this->prog->Parameters : this->immediates; + gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; /* Unfortunately, 4 floats is all we can get into * _mesa_add_typed_unnamed_constant. So, make a temp to store an @@ -2009,11 +2059,11 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; src = st_src_reg(file, -1, ir->type->base_type); - src.index = _mesa_add_typed_unnamed_constant(param_list, - values, - ir->type->vector_elements, - GL_FLOAT, - &src.swizzle); + src.index = add_constant(file, + values, + ir->type->vector_elements, + GL_FLOAT, + &src.swizzle); emit(ir, TGSI_OPCODE_MOV, mat_column, src); mat_column.index++; @@ -2062,9 +2112,11 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) } this->result = st_src_reg(file, -1, ir->type); - this->result.index = _mesa_add_typed_unnamed_constant(param_list, - values, ir->type->vector_elements, gl_type, - &this->result.swizzle); + this->result.index = add_constant(file, + values, + ir->type->vector_elements, + gl_type, + &this->result.swizzle); } function_entry * @@ -2441,17 +2493,16 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() result.file = PROGRAM_UNDEFINED; next_temp = 1; next_signature_id = 1; + num_immediates = 0; current_function = NULL; num_address_regs = 0; indirect_addr_temps = false; indirect_addr_consts = false; - immediates = _mesa_new_parameter_list(); mem_ctx = ralloc_context(NULL); } glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() { - _mesa_free_parameter_list(immediates); ralloc_free(mem_ctx); } @@ -3538,8 +3589,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, v->samplers_used = prog->SamplersUsed = original->samplers_used; v->indirect_addr_temps = original->indirect_addr_temps; v->indirect_addr_consts = original->indirect_addr_consts; - _mesa_free_parameter_list(v->immediates); - v->immediates = _mesa_clone_parameter_list(original->immediates); + memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); /* * Get initial pixel color from the texture. @@ -3667,8 +3717,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, v->samplers_used = prog->SamplersUsed = original->samplers_used; v->indirect_addr_temps = original->indirect_addr_temps; v->indirect_addr_consts = original->indirect_addr_consts; - _mesa_free_parameter_list(v->immediates); - v->immediates = _mesa_clone_parameter_list(original->immediates); + memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); @@ -3822,32 +3871,20 @@ static void set_insn_start(struct st_translate *t, unsigned start) */ static struct ureg_src emit_immediate(struct st_translate *t, - struct gl_program_parameter_list *params, - int index) + gl_constant_value values[4], + int type, int size) { struct ureg_program *ureg = t->ureg; - switch(params->Parameters[index].DataType) + switch(type) { case GL_FLOAT: - case GL_FLOAT_VEC2: - case GL_FLOAT_VEC3: - case GL_FLOAT_VEC4: - return ureg_DECL_immediate(ureg, (float *)params->ParameterValues[index], 4); + return ureg_DECL_immediate(ureg, &values[0].f, size); case GL_INT: - case GL_INT_VEC2: - case GL_INT_VEC3: - case GL_INT_VEC4: - return ureg_DECL_immediate_int(ureg, (int *)params->ParameterValues[index], 4); + return ureg_DECL_immediate_int(ureg, &values[0].i, size); case GL_UNSIGNED_INT: - case GL_UNSIGNED_INT_VEC2: - case GL_UNSIGNED_INT_VEC3: - case GL_UNSIGNED_INT_VEC4: case GL_BOOL: - case GL_BOOL_VEC2: - case GL_BOOL_VEC3: - case GL_BOOL_VEC4: - return ureg_DECL_immediate_uint(ureg, (unsigned *)params->ParameterValues[index], 4); + return ureg_DECL_immediate_uint(ureg, &values[0].u, size); default: assert(!"should not get here - type must be float, int, uint, or bool"); return ureg_src_undef(); @@ -4483,7 +4520,10 @@ st_translate_program( if (program->indirect_addr_consts) t->constants[i] = ureg_DECL_constant(ureg, i); else - t->constants[i] = emit_immediate(t, proginfo->Parameters, i); + t->constants[i] = emit_immediate(t, + proginfo->Parameters->ParameterValues[i], + proginfo->Parameters->Parameters[i].DataType, + 4); break; default: break; @@ -4493,14 +4533,15 @@ st_translate_program( /* Emit immediate values. */ - t->immediates = (struct ureg_src *)CALLOC(program->immediates->NumParameters * sizeof(struct ureg_src)); + t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src)); if (t->immediates == NULL) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; } - for (i = 0; i < program->immediates->NumParameters; i++) { - assert(program->immediates->Parameters[i].Type == PROGRAM_IMMEDIATE); - t->immediates[i] = emit_immediate(t, program->immediates, i); + i = 0; + foreach_iter(exec_list_iterator, iter, program->immediates) { + immediate_storage *imm = (immediate_storage *)iter.get(); + t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size); } /* texture samplers */ -- cgit v1.2.3 From 10d31cb307f90a08fafed5c67945ffe53d279940 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Wed, 27 Jul 2011 15:45:16 -0500 Subject: glsl_to_tgsi: lower all ir_quadop_vector expressions Unlike Mesa IR, TGSI doesn't have a SWZ opcode. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 6039488f26b..0cbfc943a05 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -4825,7 +4825,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress; - progress = lower_quadop_vector(ir, true) || progress; + progress = lower_quadop_vector(ir, false) || progress; if (options->EmitNoIfs) { progress = lower_discard(ir) || progress; -- cgit v1.2.3 From 3e7fce9773ec332665326a785b6ed1fcf5bd578e Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Wed, 27 Jul 2011 16:36:10 -0500 Subject: glsl_to_tgsi: add each relative address to the previous This is a glsl_to_tgsi port of commit d6e1a8f71437. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 0cbfc943a05..f66e240a177 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1741,6 +1741,18 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) this->result, st_src_reg_for_float(element_size)); } + /* If there was already a relative address register involved, add the + * new and the old together to get the new offset. + */ + if (src.reladdr != NULL) { + st_src_reg accum_reg = get_temp(glsl_type::float_type); + + emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), + index_reg, *src.reladdr); + + index_reg = accum_reg; + } + src.reladdr = ralloc(mem_ctx, st_src_reg); memcpy(src.reladdr, &index_reg, sizeof(index_reg)); } -- cgit v1.2.3 From 189e9f12c7d3a82d7dd28695935a83e4319bb267 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Wed, 27 Jul 2011 16:39:40 -0500 Subject: glsl_to_tgsi: copy reladdr in st_src_reg(st_dst_reg) constructor This is a glsl_to_tgsi port of commit f7cd9a858c04. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index f66e240a177..ba4074eecd5 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -174,7 +174,7 @@ st_src_reg::st_src_reg(st_dst_reg reg) this->index = reg.index; this->swizzle = SWIZZLE_XYZW; this->negate = 0; - this->reladdr = NULL; + this->reladdr = reg.reladdr; } st_dst_reg::st_dst_reg(st_src_reg reg) -- cgit v1.2.3 From b7e89115310628310bf458a33f2df2bf23384cf3 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Tue, 2 Aug 2011 11:36:44 -0500 Subject: glsl_to_tgsi: remove debugging printf --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index ba4074eecd5..b5f4253ea64 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -840,7 +840,6 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file file, int index = 0; immediate_storage *entry; assert(file == PROGRAM_IMMEDIATE); - fprintf(stderr, "adding immediate\n"); /* Search immediate storage to see if we already have an identical * immediate that we can use instead of adding a duplicate entry. -- cgit v1.2.3 From 9adcab9cd464d659288e31e6767efb5dee3894ff Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 4 Aug 2011 10:15:54 -0500 Subject: st/mesa: replace duplicated create_color_map_texture() function with shared function --- src/mesa/state_tracker/st_atom_pixeltransfer.c | 22 +--------------------- src/mesa/state_tracker/st_cb_drawpixels.c | 23 +---------------------- src/mesa/state_tracker/st_texture.c | 20 ++++++++++++++++++++ src/mesa/state_tracker/st_texture.h | 4 ++++ 4 files changed, 26 insertions(+), 43 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index 95b706cb96c..12b5bc5ba79 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -84,26 +84,6 @@ make_state_key(struct gl_context *ctx, struct state_key *key) } -static struct pipe_resource * -create_color_map_texture(struct gl_context *ctx) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - struct pipe_resource *pt; - enum pipe_format format; - const uint texSize = 256; /* simple, and usually perfect */ - - /* find an RGBA texture format */ - format = st_choose_format(pipe->screen, GL_RGBA, - PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); - - /* create texture for color map/table */ - pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0, - texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW); - return pt; -} - - /** * Update the pixelmap texture with the contents of the R/G/B/A pixel maps. */ @@ -219,7 +199,7 @@ get_pixel_transfer_program(struct gl_context *ctx, const struct state_key *key) /* create the colormap/texture now if not already done */ if (!st->pixel_xfer.pixelmap_texture) { - st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx); + st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx); st->pixel_xfer.pixelmap_sampler_view = st_create_texture_sampler_view(st->pipe, st->pixel_xfer.pixelmap_texture); diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index f4dd2a42847..0c4dc23ccf7 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -94,27 +94,6 @@ is_passthrough_program(const struct gl_fragment_program *prog) } -/* XXX copied verbatim from st_atom_pixeltransfer.c */ -static struct pipe_resource * -create_color_map_texture(struct gl_context *ctx) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - struct pipe_resource *pt; - enum pipe_format format; - const uint texSize = 256; /* simple, and usually perfect */ - - /* find an RGBA texture format */ - format = st_choose_format(pipe->screen, GL_RGBA, - PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); - - /* create texture for color map/table */ - pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0, - texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW); - return pt; -} - - /** * Returns a fragment program which implements the current pixel transfer ops. */ @@ -142,7 +121,7 @@ get_glsl_pixel_transfer_program(struct st_context *st, if (pixelMaps) { /* create the colormap/texture now if not already done */ if (!st->pixel_xfer.pixelmap_texture) { - st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx); + st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx); st->pixel_xfer.pixelmap_sampler_view = st_create_texture_sampler_view(st->pipe, st->pixel_xfer.pixelmap_texture); diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index ffe7e256a56..d8ba3ac9252 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -396,3 +396,23 @@ st_texture_image_copy(struct pipe_context *pipe, } } + +struct pipe_resource * +st_create_color_map_texture(struct gl_context *ctx) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct pipe_resource *pt; + enum pipe_format format; + const uint texSize = 256; /* simple, and usually perfect */ + + /* find an RGBA texture format */ + format = st_choose_format(pipe->screen, GL_RGBA, + PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); + + /* create texture for color map/table */ + pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0, + texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW); + return pt; +} + diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h index d50c3c9af79..b822f47cf9e 100644 --- a/src/mesa/state_tracker/st_texture.h +++ b/src/mesa/state_tracker/st_texture.h @@ -232,4 +232,8 @@ st_texture_image_copy(struct pipe_context *pipe, struct pipe_resource *src, GLuint srcLevel, GLuint face); + +extern struct pipe_resource * +st_create_color_map_texture(struct gl_context *ctx); + #endif -- cgit v1.2.3