diff options
Diffstat (limited to 'src/mesa/program/ir_to_mesa.cpp')
-rw-r--r-- | src/mesa/program/ir_to_mesa.cpp | 3171 |
1 files changed, 0 insertions, 3171 deletions
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp deleted file mode 100644 index 8c2dc521443..00000000000 --- a/src/mesa/program/ir_to_mesa.cpp +++ /dev/null @@ -1,3171 +0,0 @@ -/* - * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. - * Copyright (C) 2008 VMware, Inc. All Rights Reserved. - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file ir_to_mesa.cpp - * - * Translate GLSL IR to Mesa's gl_program representation. - */ - -#include <stdio.h> -#include "main/macros.h" -#include "main/mtypes.h" -#include "main/shaderapi.h" -#include "main/shaderobj.h" -#include "main/uniforms.h" -#include "main/glspirv.h" -#include "compiler/glsl/ast.h" -#include "compiler/glsl/ir.h" -#include "compiler/glsl/ir_expression_flattening.h" -#include "compiler/glsl/ir_visitor.h" -#include "compiler/glsl/ir_optimization.h" -#include "compiler/glsl/ir_uniform.h" -#include "compiler/glsl/glsl_parser_extras.h" -#include "compiler/glsl_types.h" -#include "compiler/glsl/linker.h" -#include "compiler/glsl/program.h" -#include "compiler/glsl/shader_cache.h" -#include "compiler/glsl/string_to_uint_map.h" -#include "program/prog_instruction.h" -#include "program/prog_optimize.h" -#include "program/prog_print.h" -#include "program/program.h" -#include "program/prog_parameter.h" - - -static int swizzle_for_size(int size); - -namespace { - -class src_reg; -class dst_reg; - -/** - * This struct is a corresponding struct to Mesa prog_src_register, with - * wider fields. - */ -class src_reg { -public: - src_reg(gl_register_file file, int index, const glsl_type *type) - { - this->file = file; - this->index = index; - if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) - this->swizzle = swizzle_for_size(type->vector_elements); - else - this->swizzle = SWIZZLE_XYZW; - this->negate = 0; - this->reladdr = NULL; - } - - src_reg() - { - this->file = PROGRAM_UNDEFINED; - this->index = 0; - this->swizzle = 0; - this->negate = 0; - this->reladdr = NULL; - } - - explicit src_reg(dst_reg reg); - - gl_register_file file; /**< PROGRAM_* from Mesa */ - int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ - GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ - int negate; /**< NEGATE_XYZW mask from mesa */ - /** Register index should be offset by the integer in this reg. */ - src_reg *reladdr; -}; - -class dst_reg { -public: - dst_reg(gl_register_file file, int writemask) - { - this->file = file; - this->index = 0; - this->writemask = writemask; - this->reladdr = NULL; - } - - dst_reg() - { - this->file = PROGRAM_UNDEFINED; - this->index = 0; - this->writemask = 0; - this->reladdr = NULL; - } - - explicit dst_reg(src_reg reg); - - gl_register_file file; /**< PROGRAM_* from Mesa */ - int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ - int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ - /** Register index should be offset by the integer in this reg. */ - src_reg *reladdr; -}; - -} /* anonymous namespace */ - -src_reg::src_reg(dst_reg reg) -{ - this->file = reg.file; - this->index = reg.index; - this->swizzle = SWIZZLE_XYZW; - this->negate = 0; - this->reladdr = reg.reladdr; -} - -dst_reg::dst_reg(src_reg reg) -{ - this->file = reg.file; - this->index = reg.index; - this->writemask = WRITEMASK_XYZW; - this->reladdr = reg.reladdr; -} - -namespace { - -class ir_to_mesa_instruction : public exec_node { -public: - DECLARE_RALLOC_CXX_OPERATORS(ir_to_mesa_instruction) - - enum prog_opcode op; - dst_reg dst; - src_reg src[3]; - /** Pointer to the ir source this tree came from for debugging */ - ir_instruction *ir; - bool saturate; - int sampler; /**< sampler index */ - int tex_target; /**< One of TEXTURE_*_INDEX */ - GLboolean tex_shadow; -}; - -class variable_storage : public exec_node { -public: - variable_storage(ir_variable *var, gl_register_file file, int index) - : file(file), index(index), var(var) - { - /* empty */ - } - - gl_register_file file; - int index; - ir_variable *var; /* variable that maps to this, if any */ -}; - -class function_entry : public exec_node { -public: - ir_function_signature *sig; - - /** - * identifier of this function signature used by the program. - * - * At the point that Mesa instructions for function calls are - * generated, we don't know the address of the first instruction of - * the function body. So we make the BranchTarget that is called a - * small integer and rewrite them during set_branchtargets(). - */ - int sig_id; - - /** - * Pointer to first instruction of the function body. - * - * Set during function body emits after main() is processed. - */ - ir_to_mesa_instruction *bgn_inst; - - /** - * Index of the first instruction of the function body in actual - * Mesa IR. - * - * Set after convertion from ir_to_mesa_instruction to prog_instruction. - */ - int inst; - - /** Storage for the return value. */ - src_reg return_reg; -}; - -class ir_to_mesa_visitor : public ir_visitor { -public: - ir_to_mesa_visitor(); - ~ir_to_mesa_visitor(); - - function_entry *current_function; - - struct gl_context *ctx; - struct gl_program *prog; - struct gl_shader_program *shader_program; - struct gl_shader_compiler_options *options; - - int next_temp; - - variable_storage *find_variable_storage(const ir_variable *var); - - src_reg get_temp(const glsl_type *type); - void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); - - src_reg src_reg_for_float(float val); - - /** - * \name Visit methods - * - * As typical for the visitor pattern, there must be one \c visit method for - * each concrete subclass of \c ir_instruction. Virtual base classes within - * the hierarchy should not have \c visit methods. - */ - /*@{*/ - virtual void visit(ir_variable *); - virtual void visit(ir_loop *); - virtual void visit(ir_loop_jump *); - virtual void visit(ir_function_signature *); - virtual void visit(ir_function *); - virtual void visit(ir_expression *); - virtual void visit(ir_swizzle *); - virtual void visit(ir_dereference_variable *); - virtual void visit(ir_dereference_array *); - virtual void visit(ir_dereference_record *); - virtual void visit(ir_assignment *); - virtual void visit(ir_constant *); - virtual void visit(ir_call *); - virtual void visit(ir_return *); - virtual void visit(ir_discard *); - virtual void visit(ir_demote *); - virtual void visit(ir_texture *); - virtual void visit(ir_if *); - virtual void visit(ir_emit_vertex *); - virtual void visit(ir_end_primitive *); - virtual void visit(ir_barrier *); - /*@}*/ - - src_reg result; - - /** List of variable_storage */ - exec_list variables; - - /** List of function_entry */ - exec_list function_signatures; - int next_signature_id; - - /** List of ir_to_mesa_instruction */ - exec_list instructions; - - ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op); - - ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, src_reg src0); - - ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, src_reg src0, src_reg src1); - - ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, - src_reg src0, src_reg src1, src_reg src2); - - /** - * Emit the correct dot-product instruction for the type of arguments - */ - ir_to_mesa_instruction * emit_dp(ir_instruction *ir, - dst_reg dst, - src_reg src0, - src_reg src1, - unsigned elements); - - void emit_scalar(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, src_reg src0); - - void emit_scalar(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, src_reg src0, src_reg src1); - - bool try_emit_mad(ir_expression *ir, - int mul_operand); - bool try_emit_mad_for_and_not(ir_expression *ir, - int mul_operand); - - void emit_swz(ir_expression *ir); - - void emit_equality_comparison(ir_expression *ir, enum prog_opcode op, - dst_reg dst, - const src_reg &src0, const src_reg &src1); - - inline void emit_sne(ir_expression *ir, dst_reg dst, - const src_reg &src0, const src_reg &src1) - { - emit_equality_comparison(ir, OPCODE_SLT, dst, src0, src1); - } - - inline void emit_seq(ir_expression *ir, dst_reg dst, - const src_reg &src0, const src_reg &src1) - { - emit_equality_comparison(ir, OPCODE_SGE, dst, src0, src1); - } - - bool process_move_condition(ir_rvalue *ir); - - void copy_propagate(void); - - void *mem_ctx; -}; - -} /* anonymous namespace */ - -static src_reg undef_src = src_reg(PROGRAM_UNDEFINED, 0, NULL); - -static dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); - -static dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); - -static int -swizzle_for_size(int size) -{ - static const int size_swizzles[4] = { - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), - }; - - assert((size >= 1) && (size <= 4)); - return size_swizzles[size - 1]; -} - -ir_to_mesa_instruction * -ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, - src_reg src0, src_reg src1, src_reg src2) -{ - ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction(); - int num_reladdr = 0; - - /* If we have to do relative addressing, we want to load the ARL - * reg directly for one of the regs, and preload the other reladdr - * sources into temps. - */ - num_reladdr += dst.reladdr != NULL; - num_reladdr += src0.reladdr != NULL; - num_reladdr += src1.reladdr != NULL; - num_reladdr += src2.reladdr != NULL; - - reladdr_to_temp(ir, &src2, &num_reladdr); - reladdr_to_temp(ir, &src1, &num_reladdr); - reladdr_to_temp(ir, &src0, &num_reladdr); - - if (dst.reladdr) { - emit(ir, OPCODE_ARL, address_reg, *dst.reladdr); - num_reladdr--; - } - assert(num_reladdr == 0); - - inst->op = op; - inst->dst = dst; - inst->src[0] = src0; - inst->src[1] = src1; - inst->src[2] = src2; - inst->ir = ir; - - this->instructions.push_tail(inst); - - return inst; -} - - -ir_to_mesa_instruction * -ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, src_reg src0, src_reg src1) -{ - return emit(ir, op, dst, src0, src1, undef_src); -} - -ir_to_mesa_instruction * -ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, src_reg src0) -{ - assert(dst.writemask != 0); - return emit(ir, op, dst, src0, undef_src, undef_src); -} - -ir_to_mesa_instruction * -ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op) -{ - return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); -} - -ir_to_mesa_instruction * -ir_to_mesa_visitor::emit_dp(ir_instruction *ir, - dst_reg dst, src_reg src0, src_reg src1, - unsigned elements) -{ - static const enum prog_opcode dot_opcodes[] = { - OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 - }; - - return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); -} - -/** - * Emits Mesa scalar opcodes to produce unique answers across channels. - * - * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X - * channel determines the result across all channels. So to do a vec4 - * of this operation, we want to emit a scalar per source channel used - * to produce dest channels. - */ -void -ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, - src_reg orig_src0, src_reg orig_src1) -{ - int i, j; - int done_mask = ~dst.writemask; - - /* Mesa RCP is a scalar operation splatting results to all channels, - * like ARB_fp/vp. So emit as many RCPs as necessary to cover our - * dst channels. - */ - for (i = 0; i < 4; i++) { - GLuint this_mask = (1 << i); - ir_to_mesa_instruction *inst; - src_reg src0 = orig_src0; - src_reg src1 = orig_src1; - - if (done_mask & this_mask) - continue; - - GLuint src0_swiz = GET_SWZ(src0.swizzle, i); - GLuint src1_swiz = GET_SWZ(src1.swizzle, i); - for (j = i + 1; j < 4; j++) { - /* If there is another enabled component in the destination that is - * derived from the same inputs, generate its value on this pass as - * well. - */ - if (!(done_mask & (1 << j)) && - GET_SWZ(src0.swizzle, j) == src0_swiz && - GET_SWZ(src1.swizzle, j) == src1_swiz) { - this_mask |= (1 << j); - } - } - src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, - src0_swiz, src0_swiz); - src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, - src1_swiz, src1_swiz); - - inst = emit(ir, op, dst, src0, src1); - inst->dst.writemask = this_mask; - done_mask |= this_mask; - } -} - -void -ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, src_reg src0) -{ - src_reg undef = undef_src; - - undef.swizzle = SWIZZLE_XXXX; - - emit_scalar(ir, op, dst, src0, undef); -} - -src_reg -ir_to_mesa_visitor::src_reg_for_float(float val) -{ - src_reg src(PROGRAM_CONSTANT, -1, NULL); - - src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - (const gl_constant_value *)&val, 1, &src.swizzle); - - return src; -} - -static int -type_size(const struct glsl_type *type) -{ - return type->count_vec4_slots(false, false); -} - -/** - * In the initial pass of codegen, we assign temporary numbers to - * intermediate results. (not SSA -- variable assignments will reuse - * storage). Actual register allocation for the Mesa VM occurs in a - * pass over the Mesa IR later. - */ -src_reg -ir_to_mesa_visitor::get_temp(const glsl_type *type) -{ - src_reg src; - - src.file = PROGRAM_TEMPORARY; - src.index = next_temp; - src.reladdr = NULL; - next_temp += type_size(type); - - if (type->is_array() || type->is_struct()) { - src.swizzle = SWIZZLE_NOOP; - } else { - src.swizzle = swizzle_for_size(type->vector_elements); - } - src.negate = 0; - - return src; -} - -variable_storage * -ir_to_mesa_visitor::find_variable_storage(const ir_variable *var) -{ - foreach_in_list(variable_storage, entry, &this->variables) { - if (entry->var == var) - return entry; - } - - return NULL; -} - -void -ir_to_mesa_visitor::visit(ir_variable *ir) -{ - if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { - unsigned int i; - const ir_state_slot *const slots = ir->get_state_slots(); - assert(slots != NULL); - - /* Check if this statevar's setup in the STATE file exactly - * matches how we'll want to reference it as a - * struct/array/whatever. If not, then we need to move it into - * temporary storage and hope that it'll get copy-propagated - * out. - */ - for (i = 0; i < ir->get_num_state_slots(); i++) { - if (slots[i].swizzle != SWIZZLE_XYZW) { - break; - } - } - - variable_storage *storage; - dst_reg dst; - if (i == ir->get_num_state_slots()) { - /* We'll set the index later. */ - storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); - this->variables.push_tail(storage); - - dst = undef_dst; - } else { - /* The variable_storage constructor allocates slots based on the size - * of the type. However, this had better match the number of state - * elements that we're going to copy into the new temporary. - */ - assert((int) ir->get_num_state_slots() == type_size(ir->type)); - - storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, - this->next_temp); - this->variables.push_tail(storage); - this->next_temp += type_size(ir->type); - - dst = dst_reg(src_reg(PROGRAM_TEMPORARY, storage->index, NULL)); - } - - - for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) { - int index = _mesa_add_state_reference(this->prog->Parameters, - slots[i].tokens); - - if (storage->file == PROGRAM_STATE_VAR) { - if (storage->index == -1) { - storage->index = index; - } else { - assert(index == storage->index + (int)i); - } - } else { - src_reg src(PROGRAM_STATE_VAR, index, NULL); - src.swizzle = slots[i].swizzle; - emit(ir, OPCODE_MOV, dst, src); - /* even a float takes up a whole vec4 reg in a struct/array. */ - dst.index++; - } - } - - if (storage->file == PROGRAM_TEMPORARY && - dst.index != storage->index + (int) ir->get_num_state_slots()) { - linker_error(this->shader_program, - "failed to load builtin uniform `%s' " - "(%d/%d regs loaded)\n", - ir->name, dst.index - storage->index, - type_size(ir->type)); - } - } -} - -void -ir_to_mesa_visitor::visit(ir_loop *ir) -{ - emit(NULL, OPCODE_BGNLOOP); - - visit_exec_list(&ir->body_instructions, this); - - emit(NULL, OPCODE_ENDLOOP); -} - -void -ir_to_mesa_visitor::visit(ir_loop_jump *ir) -{ - switch (ir->mode) { - case ir_loop_jump::jump_break: - emit(NULL, OPCODE_BRK); - break; - case ir_loop_jump::jump_continue: - emit(NULL, OPCODE_CONT); - break; - } -} - - -void -ir_to_mesa_visitor::visit(ir_function_signature *ir) -{ - assert(0); - (void)ir; -} - -void -ir_to_mesa_visitor::visit(ir_function *ir) -{ - /* Ignore function bodies other than main() -- we shouldn't see calls to - * them since they should all be inlined before we get to ir_to_mesa. - */ - if (strcmp(ir->name, "main") == 0) { - const ir_function_signature *sig; - exec_list empty; - - sig = ir->matching_signature(NULL, &empty, false); - - assert(sig); - - foreach_in_list(ir_instruction, ir, &sig->body) { - ir->accept(this); - } - } -} - -bool -ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand) -{ - int nonmul_operand = 1 - mul_operand; - src_reg a, b, c; - - ir_expression *expr = ir->operands[mul_operand]->as_expression(); - if (!expr || expr->operation != ir_binop_mul) - return false; - - expr->operands[0]->accept(this); - a = this->result; - expr->operands[1]->accept(this); - b = this->result; - ir->operands[nonmul_operand]->accept(this); - c = this->result; - - this->result = get_temp(ir->type); - emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, c); - - return true; -} - -/** - * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b)) - * - * The logic values are 1.0 for true and 0.0 for false. Logical-and is - * implemented using multiplication, and logical-or is implemented using - * addition. Logical-not can be implemented as (true - x), or (1.0 - x). - * As result, the logical expression (a & !b) can be rewritten as: - * - * - a * !b - * - a * (1 - b) - * - (a * 1) - (a * b) - * - a + -(a * b) - * - a + (a * -b) - * - * This final expression can be implemented as a single MAD(a, -b, a) - * instruction. - */ -bool -ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) -{ - const int other_operand = 1 - try_operand; - src_reg a, b; - - ir_expression *expr = ir->operands[try_operand]->as_expression(); - if (!expr || expr->operation != ir_unop_logic_not) - return false; - - ir->operands[other_operand]->accept(this); - a = this->result; - expr->operands[0]->accept(this); - b = this->result; - - b.negate = ~b.negate; - - this->result = get_temp(ir->type); - emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a); - - return true; -} - -void -ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir, - src_reg *reg, int *num_reladdr) -{ - if (!reg->reladdr) - return; - - emit(ir, OPCODE_ARL, address_reg, *reg->reladdr); - - if (*num_reladdr != 1) { - src_reg temp = get_temp(glsl_type::vec4_type); - - emit(ir, OPCODE_MOV, dst_reg(temp), *reg); - *reg = temp; - } - - (*num_reladdr)--; -} - -void -ir_to_mesa_visitor::emit_swz(ir_expression *ir) -{ - /* Assume that the vector operator is in a form compatible with OPCODE_SWZ. - * This means that each of the operands is either an immediate value of -1, - * 0, or 1, or is a component from one source register (possibly with - * negation). - */ - uint8_t components[4] = { 0 }; - bool negate[4] = { false }; - ir_variable *var = NULL; - - for (unsigned i = 0; i < ir->type->vector_elements; i++) { - ir_rvalue *op = ir->operands[i]; - - assert(op->type->is_scalar()); - - while (op != NULL) { - switch (op->ir_type) { - case ir_type_constant: { - - assert(op->type->is_scalar()); - - const ir_constant *const c = op->as_constant(); - if (c->is_one()) { - components[i] = SWIZZLE_ONE; - } else if (c->is_zero()) { - components[i] = SWIZZLE_ZERO; - } else if (c->is_negative_one()) { - components[i] = SWIZZLE_ONE; - negate[i] = true; - } else { - assert(!"SWZ constant must be 0.0 or 1.0."); - } - - op = NULL; - break; - } - - case ir_type_dereference_variable: { - ir_dereference_variable *const deref = - (ir_dereference_variable *) op; - - assert((var == NULL) || (deref->var == var)); - components[i] = SWIZZLE_X; - var = deref->var; - op = NULL; - break; - } - - case ir_type_expression: { - ir_expression *const expr = (ir_expression *) op; - - assert(expr->operation == ir_unop_neg); - negate[i] = true; - - op = expr->operands[0]; - break; - } - - case ir_type_swizzle: { - ir_swizzle *const swiz = (ir_swizzle *) op; - - components[i] = swiz->mask.x; - op = swiz->val; - break; - } - - default: - assert(!"Should not get here."); - return; - } - } - } - - assert(var != NULL); - - ir_dereference_variable *const deref = - new(mem_ctx) ir_dereference_variable(var); - - this->result.file = PROGRAM_UNDEFINED; - deref->accept(this); - if (this->result.file == PROGRAM_UNDEFINED) { - printf("Failed to get tree for expression operand:\n"); - deref->print(); - printf("\n"); - exit(1); - } - - src_reg src; - - src = this->result; - src.swizzle = MAKE_SWIZZLE4(components[0], - components[1], - components[2], - components[3]); - src.negate = ((unsigned(negate[0]) << 0) - | (unsigned(negate[1]) << 1) - | (unsigned(negate[2]) << 2) - | (unsigned(negate[3]) << 3)); - - /* Storage for our result. Ideally for an assignment we'd be using the - * actual storage for the result here, instead. - */ - const src_reg result_src = get_temp(ir->type); - dst_reg result_dst = dst_reg(result_src); - - /* Limit writes to the channels that will be used by result_src later. - * This does limit this temp's use as a temporary for multi-instruction - * sequences. - */ - result_dst.writemask = (1 << ir->type->vector_elements) - 1; - - emit(ir, OPCODE_SWZ, result_dst, src); - this->result = result_src; -} - -void -ir_to_mesa_visitor::emit_equality_comparison(ir_expression *ir, - enum prog_opcode op, - dst_reg dst, - const src_reg &src0, - const src_reg &src1) -{ - src_reg difference; - src_reg abs_difference = get_temp(glsl_type::vec4_type); - const src_reg zero = src_reg_for_float(0.0); - - /* x == y is equivalent to -abs(x-y) >= 0. Since all of the code that - * consumes the generated IR is pretty dumb, take special care when one - * of the operands is zero. - * - * Similarly, x != y is equivalent to -abs(x-y) < 0. - */ - if (src0.file == zero.file && - src0.index == zero.index && - src0.swizzle == zero.swizzle) { - difference = src1; - } else if (src1.file == zero.file && - src1.index == zero.index && - src1.swizzle == zero.swizzle) { - difference = src0; - } else { - difference = get_temp(glsl_type::vec4_type); - - src_reg tmp_src = src0; - tmp_src.negate = ~tmp_src.negate; - - emit(ir, OPCODE_ADD, dst_reg(difference), tmp_src, src1); - } - - emit(ir, OPCODE_ABS, dst_reg(abs_difference), difference); - - abs_difference.negate = ~abs_difference.negate; - emit(ir, op, dst, abs_difference, zero); -} - -void -ir_to_mesa_visitor::visit(ir_expression *ir) -{ - unsigned int operand; - src_reg op[ARRAY_SIZE(ir->operands)]; - src_reg result_src; - dst_reg result_dst; - - /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c) - */ - if (ir->operation == ir_binop_add) { - if (try_emit_mad(ir, 1)) - return; - if (try_emit_mad(ir, 0)) - return; - } - - /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) - */ - if (ir->operation == ir_binop_logic_and) { - if (try_emit_mad_for_and_not(ir, 1)) - return; - if (try_emit_mad_for_and_not(ir, 0)) - return; - } - - if (ir->operation == ir_quadop_vector) { - this->emit_swz(ir); - return; - } - - for (operand = 0; operand < ir->num_operands; operand++) { - this->result.file = PROGRAM_UNDEFINED; - ir->operands[operand]->accept(this); - if (this->result.file == PROGRAM_UNDEFINED) { - printf("Failed to get tree for expression operand:\n"); - ir->operands[operand]->print(); - printf("\n"); - exit(1); - } - op[operand] = this->result; - - /* Matrix expression operands should have been broken down to vector - * operations already. - */ - assert(!ir->operands[operand]->type->is_matrix()); - } - - int vector_elements = ir->operands[0]->type->vector_elements; - if (ir->operands[1]) { - vector_elements = MAX2(vector_elements, - ir->operands[1]->type->vector_elements); - } - - this->result.file = PROGRAM_UNDEFINED; - - /* Storage for our result. Ideally for an assignment we'd be using - * the actual storage for the result here, instead. - */ - result_src = get_temp(ir->type); - /* convenience for the emit functions below. */ - result_dst = dst_reg(result_src); - /* Limit writes to the channels that will be used by result_src later. - * This does limit this temp's use as a temporary for multi-instruction - * sequences. - */ - result_dst.writemask = (1 << ir->type->vector_elements) - 1; - - switch (ir->operation) { - case ir_unop_logic_not: - /* Previously 'SEQ dst, src, 0.0' was used for this. However, many - * older GPUs implement SEQ using multiple instructions (i915 uses two - * SGE instructions and a MUL instruction). Since our logic values are - * 0.0 and 1.0, 1-x also implements !x. - */ - op[0].negate = ~op[0].negate; - emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0)); - break; - case ir_unop_neg: - op[0].negate = ~op[0].negate; - result_src = op[0]; - break; - case ir_unop_abs: - emit(ir, OPCODE_ABS, result_dst, op[0]); - break; - case ir_unop_sign: - emit(ir, OPCODE_SSG, result_dst, op[0]); - break; - case ir_unop_rcp: - emit_scalar(ir, OPCODE_RCP, result_dst, op[0]); - break; - - case ir_unop_exp2: - emit_scalar(ir, OPCODE_EX2, result_dst, op[0]); - break; - case ir_unop_exp: - assert(!"not reached: should be handled by exp_to_exp2"); - break; - case ir_unop_log: - assert(!"not reached: should be handled by log_to_log2"); - break; - case ir_unop_log2: - emit_scalar(ir, OPCODE_LG2, result_dst, op[0]); - break; - case ir_unop_sin: - emit_scalar(ir, OPCODE_SIN, result_dst, op[0]); - break; - case ir_unop_cos: - emit_scalar(ir, OPCODE_COS, result_dst, op[0]); - break; - - case ir_unop_dFdx: - emit(ir, OPCODE_DDX, result_dst, op[0]); - break; - case ir_unop_dFdy: - emit(ir, OPCODE_DDY, result_dst, op[0]); - break; - - case ir_unop_saturate: { - ir_to_mesa_instruction *inst = emit(ir, OPCODE_MOV, - result_dst, op[0]); - inst->saturate = true; - break; - } - - case ir_binop_add: - emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); - break; - case ir_binop_sub: - emit(ir, OPCODE_SUB, result_dst, op[0], op[1]); - break; - - case ir_binop_mul: - emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); - break; - case ir_binop_div: - assert(!"not reached: should be handled by ir_div_to_mul_rcp"); - break; - case ir_binop_mod: - /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */ - assert(ir->type->is_integer_32()); - emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); - break; - - case ir_binop_less: - emit(ir, OPCODE_SLT, result_dst, op[0], op[1]); - break; - case ir_binop_gequal: - emit(ir, OPCODE_SGE, result_dst, op[0], op[1]); - break; - case ir_binop_equal: - emit_seq(ir, result_dst, op[0], op[1]); - break; - case ir_binop_nequal: - emit_sne(ir, result_dst, op[0], op[1]); - break; - case ir_binop_all_equal: - /* "==" operator producing a scalar boolean. */ - if (ir->operands[0]->type->is_vector() || - ir->operands[1]->type->is_vector()) { - src_reg temp = get_temp(glsl_type::vec4_type); - emit_sne(ir, dst_reg(temp), op[0], op[1]); - - /* After the dot-product, the value will be an integer on the - * range [0,4]. Zero becomes 1.0, and positive values become zero. - */ - emit_dp(ir, result_dst, temp, temp, vector_elements); - - /* Negating the result of the dot-product gives values on the range - * [-4, 0]. Zero becomes 1.0, and negative values become zero. This - * achieved using SGE. - */ - src_reg sge_src = result_src; - sge_src.negate = ~sge_src.negate; - emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0)); - } else { - emit_seq(ir, result_dst, op[0], op[1]); - } - break; - case ir_binop_any_nequal: - /* "!=" operator producing a scalar boolean. */ - if (ir->operands[0]->type->is_vector() || - ir->operands[1]->type->is_vector()) { - src_reg temp = get_temp(glsl_type::vec4_type); - if (ir->operands[0]->type->is_boolean() && - ir->operands[1]->as_constant() && - ir->operands[1]->as_constant()->is_zero()) { - temp = op[0]; - } else { - emit_sne(ir, dst_reg(temp), op[0], op[1]); - } - - /* After the dot-product, the value will be an integer on the - * range [0,4]. Zero stays zero, and positive values become 1.0. - */ - ir_to_mesa_instruction *const dp = - emit_dp(ir, result_dst, temp, temp, vector_elements); - if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { - /* The clamping to [0,1] can be done for free in the fragment - * shader with a saturate. - */ - dp->saturate = true; - } else { - /* Negating the result of the dot-product gives values on the range - * [-4, 0]. Zero stays zero, and negative values become 1.0. This - * achieved using SLT. - */ - src_reg slt_src = result_src; - slt_src.negate = ~slt_src.negate; - emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); - } - } else { - emit_sne(ir, result_dst, op[0], op[1]); - } - break; - - case ir_binop_logic_xor: - emit_sne(ir, result_dst, op[0], op[1]); - break; - - case ir_binop_logic_or: { - if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { - /* After the addition, the value will be an integer on the - * range [0,2]. Zero stays zero, and positive values become 1.0. - */ - ir_to_mesa_instruction *add = - emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); - add->saturate = true; - } else { - /* The Boolean arguments are stored as float 0.0 and 1.0. If either - * value is 1.0, the result of the logcal-or should be 1.0. If both - * values are 0.0, the result should be 0.0. This is exactly what - * MAX does. - */ - emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); - } - break; - } - - case ir_binop_logic_and: - /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ - emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); - break; - - case ir_binop_dot: - assert(ir->operands[0]->type->is_vector()); - assert(ir->operands[0]->type == ir->operands[1]->type); - emit_dp(ir, result_dst, op[0], op[1], - ir->operands[0]->type->vector_elements); - break; - - case ir_unop_sqrt: - /* sqrt(x) = x * rsq(x). */ - emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); - emit(ir, OPCODE_MUL, result_dst, result_src, op[0]); - /* For incoming channels <= 0, set the result to 0. */ - op[0].negate = ~op[0].negate; - emit(ir, OPCODE_CMP, result_dst, - op[0], result_src, src_reg_for_float(0.0)); - break; - case ir_unop_rsq: - emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); - break; - case ir_unop_i2f: - case ir_unop_u2f: - case ir_unop_b2f: - case ir_unop_b2i: - case ir_unop_i2u: - case ir_unop_u2i: - /* Mesa IR lacks types, ints are stored as truncated floats. */ - result_src = op[0]; - break; - case ir_unop_f2i: - case ir_unop_f2u: - emit(ir, OPCODE_TRUNC, result_dst, op[0]); - break; - case ir_unop_f2b: - case ir_unop_i2b: - emit_sne(ir, result_dst, op[0], src_reg_for_float(0.0)); - break; - case ir_unop_bitcast_f2i: // Ignore these 4, they can't happen here anyway - case ir_unop_bitcast_f2u: - case ir_unop_bitcast_i2f: - case ir_unop_bitcast_u2f: - break; - case ir_unop_trunc: - emit(ir, OPCODE_TRUNC, result_dst, op[0]); - break; - case ir_unop_ceil: - op[0].negate = ~op[0].negate; - emit(ir, OPCODE_FLR, result_dst, op[0]); - result_src.negate = ~result_src.negate; - break; - case ir_unop_floor: - emit(ir, OPCODE_FLR, result_dst, op[0]); - break; - case ir_unop_fract: - emit(ir, OPCODE_FRC, result_dst, op[0]); - break; - case ir_unop_pack_snorm_2x16: - case ir_unop_pack_snorm_4x8: - case ir_unop_pack_unorm_2x16: - case ir_unop_pack_unorm_4x8: - case ir_unop_pack_half_2x16: - case ir_unop_pack_double_2x32: - case ir_unop_unpack_snorm_2x16: - case ir_unop_unpack_snorm_4x8: - case ir_unop_unpack_unorm_2x16: - case ir_unop_unpack_unorm_4x8: - case ir_unop_unpack_half_2x16: - case ir_unop_unpack_double_2x32: - case ir_unop_bitfield_reverse: - case ir_unop_bit_count: - case ir_unop_find_msb: - case ir_unop_find_lsb: - case ir_unop_d2f: - case ir_unop_f2d: - case ir_unop_d2i: - case ir_unop_i2d: - case ir_unop_d2u: - case ir_unop_u2d: - case ir_unop_d2b: - case ir_unop_frexp_sig: - case ir_unop_frexp_exp: - assert(!"not supported"); - break; - case ir_binop_min: - emit(ir, OPCODE_MIN, result_dst, op[0], op[1]); - break; - case ir_binop_max: - emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); - break; - case ir_binop_pow: - emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]); - break; - - /* GLSL 1.30 integer ops are unsupported in Mesa IR, but since - * hardware backends have no way to avoid Mesa IR generation - * even if they don't use it, we need to emit "something" and - * continue. - */ - case ir_binop_lshift: - case ir_binop_rshift: - case ir_binop_bit_and: - case ir_binop_bit_xor: - case ir_binop_bit_or: - emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); - break; - - case ir_unop_bit_not: - case ir_unop_round_even: - emit(ir, OPCODE_MOV, result_dst, op[0]); - break; - - case ir_binop_ubo_load: - assert(!"not supported"); - break; - - case ir_triop_lrp: - /* ir_triop_lrp operands are (x, y, a) while - * OPCODE_LRP operands are (a, y, x) to match ARB_fragment_program. - */ - emit(ir, OPCODE_LRP, result_dst, op[2], op[1], op[0]); - break; - - case ir_triop_csel: - /* We assume that boolean true and false are 1.0 and 0.0. OPCODE_CMP - * selects src1 if src0 is < 0, src2 otherwise. - */ - op[0].negate = ~op[0].negate; - emit(ir, OPCODE_CMP, result_dst, op[0], op[1], op[2]); - break; - - case ir_binop_vector_extract: - case ir_triop_fma: - case ir_triop_bitfield_extract: - case ir_triop_vector_insert: - case ir_quadop_bitfield_insert: - case ir_binop_ldexp: - case ir_binop_carry: - case ir_binop_borrow: - case ir_binop_abs_sub: - case ir_binop_add_sat: - case ir_binop_sub_sat: - case ir_binop_avg: - case ir_binop_avg_round: - case ir_binop_mul_32x16: - case ir_binop_imul_high: - case ir_unop_interpolate_at_centroid: - case ir_binop_interpolate_at_offset: - case ir_binop_interpolate_at_sample: - case ir_unop_dFdx_coarse: - case ir_unop_dFdx_fine: - case ir_unop_dFdy_coarse: - case ir_unop_dFdy_fine: - case ir_unop_subroutine_to_int: - case ir_unop_get_buffer_size: - case ir_unop_bitcast_u642d: - case ir_unop_bitcast_i642d: - case ir_unop_bitcast_d2u64: - case ir_unop_bitcast_d2i64: - case ir_unop_i642i: - case ir_unop_u642i: - case ir_unop_i642u: - case ir_unop_u642u: - case ir_unop_i642b: - case ir_unop_i642f: - case ir_unop_u642f: - case ir_unop_i642d: - case ir_unop_u642d: - case ir_unop_i2i64: - case ir_unop_u2i64: - case ir_unop_b2i64: - case ir_unop_f2i64: - case ir_unop_d2i64: - case ir_unop_i2u64: - case ir_unop_u2u64: - case ir_unop_f2u64: - case ir_unop_d2u64: - case ir_unop_u642i64: - case ir_unop_i642u64: - case ir_unop_pack_int_2x32: - case ir_unop_unpack_int_2x32: - case ir_unop_pack_uint_2x32: - case ir_unop_unpack_uint_2x32: - case ir_unop_pack_sampler_2x32: - case ir_unop_unpack_sampler_2x32: - case ir_unop_pack_image_2x32: - case ir_unop_unpack_image_2x32: - case ir_unop_atan: - case ir_binop_atan2: - case ir_unop_clz: - case ir_unop_f162f: - case ir_unop_f2f16: - case ir_unop_f2fmp: - case ir_unop_f162b: - case ir_unop_b2f16: - case ir_unop_i2i: - case ir_unop_i2imp: - case ir_unop_u2u: - case ir_unop_u2ump: - assert(!"not supported"); - break; - - case ir_unop_ssbo_unsized_array_length: - case ir_unop_implicitly_sized_array_length: - case ir_quadop_vector: - /* This operation should have already been handled. - */ - assert(!"Should not get here."); - break; - } - - this->result = result_src; -} - - -void -ir_to_mesa_visitor::visit(ir_swizzle *ir) -{ - src_reg src; - int i; - int swizzle[4] = {0}; - - /* Note that this is only swizzles in expressions, not those on the left - * hand side of an assignment, which do write masking. See ir_assignment - * for that. - */ - - ir->val->accept(this); - src = this->result; - assert(src.file != PROGRAM_UNDEFINED); - assert(ir->type->vector_elements > 0); - - for (i = 0; i < 4; i++) { - if (i < ir->type->vector_elements) { - switch (i) { - case 0: - swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); - break; - case 1: - swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); - break; - case 2: - swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); - break; - case 3: - swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); - break; - } - } else { - /* If the type is smaller than a vec4, replicate the last - * channel out. - */ - swizzle[i] = swizzle[ir->type->vector_elements - 1]; - } - } - - src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); - - this->result = src; -} - -void -ir_to_mesa_visitor::visit(ir_dereference_variable *ir) -{ - variable_storage *entry = find_variable_storage(ir->var); - ir_variable *var = ir->var; - - if (!entry) { - switch (var->data.mode) { - case ir_var_uniform: - entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, - var->data.param_index); - this->variables.push_tail(entry); - break; - case ir_var_shader_in: - /* The linker assigns locations for varyings and attributes, - * including deprecated builtins (like gl_Color), - * user-assigned generic attributes (glBindVertexLocation), - * and user-defined varyings. - */ - assert(var->data.location != -1); - entry = new(mem_ctx) variable_storage(var, - PROGRAM_INPUT, - var->data.location); - break; - case ir_var_shader_out: - assert(var->data.location != -1); - entry = new(mem_ctx) variable_storage(var, - PROGRAM_OUTPUT, - var->data.location); - break; - case ir_var_system_value: - entry = new(mem_ctx) variable_storage(var, - PROGRAM_SYSTEM_VALUE, - var->data.location); - break; - case ir_var_auto: - case ir_var_temporary: - entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, - this->next_temp); - this->variables.push_tail(entry); - - next_temp += type_size(var->type); - break; - } - - if (!entry) { - printf("Failed to make storage for %s\n", var->name); - exit(1); - } - } - - this->result = src_reg(entry->file, entry->index, var->type); -} - -void -ir_to_mesa_visitor::visit(ir_dereference_array *ir) -{ - ir_constant *index; - src_reg src; - int element_size = type_size(ir->type); - - index = ir->array_index->constant_expression_value(ralloc_parent(ir)); - - ir->array->accept(this); - src = this->result; - - if (index) { - src.index += index->value.i[0] * element_size; - } else { - /* Variable index array dereference. It eats the "vec4" of the - * base of the array and an index that offsets the Mesa register - * index. - */ - ir->array_index->accept(this); - - src_reg index_reg; - - if (element_size == 1) { - index_reg = this->result; - } else { - index_reg = get_temp(glsl_type::float_type); - - emit(ir, OPCODE_MUL, dst_reg(index_reg), - this->result, src_reg_for_float(element_size)); - } - - /* If there was already a relative address register involved, add the - * new and the old together to get the new offset. - */ - if (src.reladdr != NULL) { - src_reg accum_reg = get_temp(glsl_type::float_type); - - emit(ir, OPCODE_ADD, dst_reg(accum_reg), - index_reg, *src.reladdr); - - index_reg = accum_reg; - } - - src.reladdr = ralloc(mem_ctx, src_reg); - memcpy(src.reladdr, &index_reg, sizeof(index_reg)); - } - - /* If the type is smaller than a vec4, replicate the last channel out. */ - if (ir->type->is_scalar() || ir->type->is_vector()) - src.swizzle = swizzle_for_size(ir->type->vector_elements); - else - src.swizzle = SWIZZLE_NOOP; - - this->result = src; -} - -void -ir_to_mesa_visitor::visit(ir_dereference_record *ir) -{ - unsigned int i; - const glsl_type *struct_type = ir->record->type; - int offset = 0; - - ir->record->accept(this); - - assert(ir->field_idx >= 0); - for (i = 0; i < struct_type->length; i++) { - if (i == (unsigned) ir->field_idx) - break; - offset += type_size(struct_type->fields.structure[i].type); - } - - /* If the type is smaller than a vec4, replicate the last channel out. */ - if (ir->type->is_scalar() || ir->type->is_vector()) - this->result.swizzle = swizzle_for_size(ir->type->vector_elements); - else - this->result.swizzle = SWIZZLE_NOOP; - - this->result.index += offset; -} - -/** - * We want to be careful in assignment setup to hit the actual storage - * instead of potentially using a temporary like we might with the - * ir_dereference handler. - */ -static dst_reg -get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v) -{ - /* The LHS must be a dereference. If the LHS is a variable indexed array - * access of a vector, it must be separated into a series conditional moves - * before reaching this point (see ir_vec_index_to_cond_assign). - */ - assert(ir->as_dereference()); - ir_dereference_array *deref_array = ir->as_dereference_array(); - if (deref_array) { - assert(!deref_array->array->type->is_vector()); - } - - /* Use the rvalue deref handler for the most part. We'll ignore - * swizzles in it and write swizzles using writemask, though. - */ - ir->accept(v); - return dst_reg(v->result); -} - -/* Calculate the sampler index and also calculate the base uniform location - * for struct members. - */ -static void -calc_sampler_offsets(struct gl_shader_program *prog, ir_dereference *deref, - unsigned *offset, unsigned *array_elements, - unsigned *location) -{ - if (deref->ir_type == ir_type_dereference_variable) - return; - - switch (deref->ir_type) { - case ir_type_dereference_array: { - ir_dereference_array *deref_arr = deref->as_dereference_array(); - - void *mem_ctx = ralloc_parent(deref_arr); - ir_constant *array_index = - deref_arr->array_index->constant_expression_value(mem_ctx); - - if (!array_index) { - /* GLSL 1.10 and 1.20 allowed variable sampler array indices, - * while GLSL 1.30 requires that the array indices be - * constant integer expressions. We don't expect any driver - * to actually work with a really variable array index, so - * all that would work would be an unrolled loop counter that ends - * up being constant above. - */ - ralloc_strcat(&prog->data->InfoLog, - "warning: Variable sampler array index unsupported.\n" - "This feature of the language was removed in GLSL 1.20 " - "and is unlikely to be supported for 1.10 in Mesa.\n"); - } else { - *offset += array_index->value.u[0] * *array_elements; - } - - *array_elements *= deref_arr->array->type->length; - - calc_sampler_offsets(prog, deref_arr->array->as_dereference(), - offset, array_elements, location); - break; - } - - case ir_type_dereference_record: { - ir_dereference_record *deref_record = deref->as_dereference_record(); - unsigned field_index = deref_record->field_idx; - *location += - deref_record->record->type->struct_location_offset(field_index); - calc_sampler_offsets(prog, deref_record->record->as_dereference(), - offset, array_elements, location); - break; - } - - default: - unreachable("Invalid deref type"); - break; - } -} - -static int -get_sampler_uniform_value(class ir_dereference *sampler, - struct gl_shader_program *shader_program, - const struct gl_program *prog) -{ - GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target); - ir_variable *var = sampler->variable_referenced(); - unsigned location = var->data.location; - unsigned array_elements = 1; - unsigned offset = 0; - - calc_sampler_offsets(shader_program, sampler, &offset, &array_elements, - &location); - - assert(shader_program->data->UniformStorage[location].opaque[shader].active); - return shader_program->data->UniformStorage[location].opaque[shader].index + - offset; -} - -/** - * Process the condition of a conditional assignment - * - * Examines the condition of a conditional assignment to generate the optimal - * first operand of a \c CMP instruction. If the condition is a relational - * operator with 0 (e.g., \c ir_binop_less), the value being compared will be - * used as the source for the \c CMP instruction. Otherwise the comparison - * is processed to a boolean result, and the boolean result is used as the - * operand to the CMP instruction. - */ -bool -ir_to_mesa_visitor::process_move_condition(ir_rvalue *ir) -{ - ir_rvalue *src_ir = ir; - bool negate = true; - bool switch_order = false; - - ir_expression *const expr = ir->as_expression(); - if ((expr != NULL) && (expr->num_operands == 2)) { - bool zero_on_left = false; - - if (expr->operands[0]->is_zero()) { - src_ir = expr->operands[1]; - zero_on_left = true; - } else if (expr->operands[1]->is_zero()) { - src_ir = expr->operands[0]; - zero_on_left = false; - } - - /* a is - 0 + - 0 + - * (a < 0) T F F ( a < 0) T F F - * (0 < a) F F T (-a < 0) F F T - * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) - * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) - * - * Note that exchanging the order of 0 and 'a' in the comparison simply - * means that the value of 'a' should be negated. - */ - if (src_ir != ir) { - switch (expr->operation) { - case ir_binop_less: - switch_order = false; - negate = zero_on_left; - break; - - case ir_binop_gequal: - switch_order = true; - negate = zero_on_left; - break; - - default: - /* This isn't the right kind of comparison afterall, so make sure - * the whole condition is visited. - */ - src_ir = ir; - break; - } - } - } - - src_ir->accept(this); - - /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the - * condition we produced is 0.0 or 1.0. By flipping the sign, we can - * choose which value OPCODE_CMP produces without an extra instruction - * computing the condition. - */ - if (negate) - this->result.negate = ~this->result.negate; - - return switch_order; -} - -void -ir_to_mesa_visitor::visit(ir_assignment *ir) -{ - dst_reg l; - src_reg r; - int i; - - ir->rhs->accept(this); - r = this->result; - - l = get_assignment_lhs(ir->lhs, this); - - /* FINISHME: This should really set to the correct maximal writemask for each - * FINISHME: component written (in the loops below). This case can only - * FINISHME: occur for matrices, arrays, and structures. - */ - if (ir->write_mask == 0) { - assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); - l.writemask = WRITEMASK_XYZW; - } else if (ir->lhs->type->is_scalar()) { - /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the - * FINISHME: W component of fragment shader output zero, work correctly. - */ - l.writemask = WRITEMASK_XYZW; - } else { - int swizzles[4]; - int first_enabled_chan = 0; - int rhs_chan = 0; - - assert(ir->lhs->type->is_vector()); - l.writemask = ir->write_mask; - - for (int i = 0; i < 4; i++) { - if (l.writemask & (1 << i)) { - first_enabled_chan = GET_SWZ(r.swizzle, i); - break; - } - } - - /* Swizzle a small RHS vector into the channels being written. - * - * glsl ir treats write_mask as dictating how many channels are - * present on the RHS while Mesa IR treats write_mask as just - * showing which channels of the vec4 RHS get written. - */ - for (int i = 0; i < 4; i++) { - if (l.writemask & (1 << i)) - swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); - else - swizzles[i] = first_enabled_chan; - } - r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], - swizzles[2], swizzles[3]); - } - - assert(l.file != PROGRAM_UNDEFINED); - assert(r.file != PROGRAM_UNDEFINED); - - if (ir->condition) { - const bool switch_order = this->process_move_condition(ir->condition); - src_reg condition = this->result; - - for (i = 0; i < type_size(ir->lhs->type); i++) { - if (switch_order) { - emit(ir, OPCODE_CMP, l, condition, src_reg(l), r); - } else { - emit(ir, OPCODE_CMP, l, condition, r, src_reg(l)); - } - - l.index++; - r.index++; - } - } else { - for (i = 0; i < type_size(ir->lhs->type); i++) { - emit(ir, OPCODE_MOV, l, r); - l.index++; - r.index++; - } - } -} - - -void -ir_to_mesa_visitor::visit(ir_constant *ir) -{ - src_reg src; - GLfloat stack_vals[4] = { 0 }; - GLfloat *values = stack_vals; - unsigned int i; - - /* Unfortunately, 4 floats is all we can get into - * _mesa_add_unnamed_constant. So, make a temp to store an - * aggregate constant and move each constant value into it. If we - * get lucky, copy propagation will eliminate the extra moves. - */ - - if (ir->type->is_struct()) { - src_reg temp_base = get_temp(ir->type); - dst_reg temp = dst_reg(temp_base); - - for (i = 0; i < ir->type->length; i++) { - ir_constant *const field_value = ir->get_record_field(i); - int size = type_size(field_value->type); - - assert(size > 0); - - field_value->accept(this); - src = this->result; - - for (unsigned j = 0; j < (unsigned int)size; j++) { - emit(ir, OPCODE_MOV, temp, src); - - src.index++; - temp.index++; - } - } - this->result = temp_base; - return; - } - - if (ir->type->is_array()) { - src_reg temp_base = get_temp(ir->type); - dst_reg temp = dst_reg(temp_base); - int size = type_size(ir->type->fields.array); - - assert(size > 0); - - for (i = 0; i < ir->type->length; i++) { - ir->const_elements[i]->accept(this); - src = this->result; - for (int j = 0; j < size; j++) { - emit(ir, OPCODE_MOV, temp, src); - - src.index++; - temp.index++; - } - } - this->result = temp_base; - return; - } - - if (ir->type->is_matrix()) { - src_reg mat = get_temp(ir->type); - dst_reg mat_column = dst_reg(mat); - - for (i = 0; i < ir->type->matrix_columns; i++) { - assert(ir->type->is_float()); - values = &ir->value.f[i * ir->type->vector_elements]; - - src = src_reg(PROGRAM_CONSTANT, -1, NULL); - src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - (gl_constant_value *) values, - ir->type->vector_elements, - &src.swizzle); - emit(ir, OPCODE_MOV, mat_column, src); - - mat_column.index++; - } - - this->result = mat; - return; - } - - src.file = PROGRAM_CONSTANT; - switch (ir->type->base_type) { - case GLSL_TYPE_FLOAT: - values = &ir->value.f[0]; - break; - case GLSL_TYPE_UINT: - for (i = 0; i < ir->type->vector_elements; i++) { - values[i] = ir->value.u[i]; - } - break; - case GLSL_TYPE_INT: - for (i = 0; i < ir->type->vector_elements; i++) { - values[i] = ir->value.i[i]; - } - break; - case GLSL_TYPE_BOOL: - for (i = 0; i < ir->type->vector_elements; i++) { - values[i] = ir->value.b[i]; - } - break; - default: - assert(!"Non-float/uint/int/bool constant"); - } - - this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type); - this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, - (gl_constant_value *) values, - ir->type->vector_elements, - &this->result.swizzle); -} - -void -ir_to_mesa_visitor::visit(ir_call *) -{ - assert(!"ir_to_mesa: All function calls should have been inlined by now."); -} - -void -ir_to_mesa_visitor::visit(ir_texture *ir) -{ - src_reg result_src, coord, lod_info, projector, dx, dy; - dst_reg result_dst, coord_dst; - ir_to_mesa_instruction *inst = NULL; - prog_opcode opcode = OPCODE_NOP; - - if (ir->op == ir_txs) - this->result = src_reg_for_float(0.0); - else - ir->coordinate->accept(this); - - /* Put our coords in a temp. We'll need to modify them for shadow, - * projection, or LOD, so the only case we'd use it as-is is if - * we're doing plain old texturing. Mesa IR optimization should - * handle cleaning up our mess in that case. - */ - coord = get_temp(glsl_type::vec4_type); - coord_dst = dst_reg(coord); - emit(ir, OPCODE_MOV, coord_dst, this->result); - - if (ir->projector) { - ir->projector->accept(this); - projector = this->result; - } - - /* Storage for our result. Ideally for an assignment we'd be using - * the actual storage for the result here, instead. - */ - result_src = get_temp(glsl_type::vec4_type); - result_dst = dst_reg(result_src); - - switch (ir->op) { - case ir_tex: - case ir_txs: - opcode = OPCODE_TEX; - break; - case ir_txb: - opcode = OPCODE_TXB; - ir->lod_info.bias->accept(this); - lod_info = this->result; - break; - case ir_txf: - /* Pretend to be TXL so the sampler, coordinate, lod are available */ - case ir_txl: - opcode = OPCODE_TXL; - ir->lod_info.lod->accept(this); - lod_info = this->result; - break; - case ir_txd: - opcode = OPCODE_TXD; - ir->lod_info.grad.dPdx->accept(this); - dx = this->result; - ir->lod_info.grad.dPdy->accept(this); - dy = this->result; - break; - case ir_txf_ms: - assert(!"Unexpected ir_txf_ms opcode"); - break; - case ir_lod: - assert(!"Unexpected ir_lod opcode"); - break; - case ir_tg4: - assert(!"Unexpected ir_tg4 opcode"); - break; - case ir_query_levels: - assert(!"Unexpected ir_query_levels opcode"); - break; - case ir_samples_identical: - unreachable("Unexpected ir_samples_identical opcode"); - case ir_texture_samples: - unreachable("Unexpected ir_texture_samples opcode"); - } - - const glsl_type *sampler_type = ir->sampler->type; - - if (ir->projector) { - if (opcode == OPCODE_TEX) { - /* Slot the projector in as the last component of the coord. */ - coord_dst.writemask = WRITEMASK_W; - emit(ir, OPCODE_MOV, coord_dst, projector); - coord_dst.writemask = WRITEMASK_XYZW; - opcode = OPCODE_TXP; - } else { - src_reg coord_w = coord; - coord_w.swizzle = SWIZZLE_WWWW; - - /* For the other TEX opcodes there's no projective version - * since the last slot is taken up by lod info. Do the - * projective divide now. - */ - coord_dst.writemask = WRITEMASK_W; - emit(ir, OPCODE_RCP, coord_dst, projector); - - /* In the case where we have to project the coordinates "by hand," - * the shadow comparator value must also be projected. - */ - src_reg tmp_src = coord; - if (ir->shadow_comparator) { - /* Slot the shadow value in as the second to last component of the - * coord. - */ - ir->shadow_comparator->accept(this); - - tmp_src = get_temp(glsl_type::vec4_type); - dst_reg tmp_dst = dst_reg(tmp_src); - - /* Projective division not allowed for array samplers. */ - assert(!sampler_type->sampler_array); - - tmp_dst.writemask = WRITEMASK_Z; - emit(ir, OPCODE_MOV, tmp_dst, this->result); - - tmp_dst.writemask = WRITEMASK_XY; - emit(ir, OPCODE_MOV, tmp_dst, coord); - } - - coord_dst.writemask = WRITEMASK_XYZ; - emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w); - - coord_dst.writemask = WRITEMASK_XYZW; - coord.swizzle = SWIZZLE_XYZW; - } - } - - /* If projection is done and the opcode is not OPCODE_TXP, then the shadow - * comparator was put in the correct place (and projected) by the code, - * above, that handles by-hand projection. - */ - if (ir->shadow_comparator && (!ir->projector || opcode == OPCODE_TXP)) { - /* Slot the shadow value in as the second to last component of the - * coord. - */ - ir->shadow_comparator->accept(this); - - /* XXX This will need to be updated for cubemap array samplers. */ - if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && - sampler_type->sampler_array) { - coord_dst.writemask = WRITEMASK_W; - } else { - coord_dst.writemask = WRITEMASK_Z; - } - - emit(ir, OPCODE_MOV, coord_dst, this->result); - coord_dst.writemask = WRITEMASK_XYZW; - } - - if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) { - /* Mesa IR stores lod or lod bias in the last channel of the coords. */ - coord_dst.writemask = WRITEMASK_W; - emit(ir, OPCODE_MOV, coord_dst, lod_info); - coord_dst.writemask = WRITEMASK_XYZW; - } - - if (opcode == OPCODE_TXD) - inst = emit(ir, opcode, result_dst, coord, dx, dy); - else - inst = emit(ir, opcode, result_dst, coord); - - if (ir->shadow_comparator) - inst->tex_shadow = GL_TRUE; - - inst->sampler = get_sampler_uniform_value(ir->sampler, shader_program, - prog); - - switch (sampler_type->sampler_dimensionality) { - case GLSL_SAMPLER_DIM_1D: - inst->tex_target = (sampler_type->sampler_array) - ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; - break; - case GLSL_SAMPLER_DIM_2D: - inst->tex_target = (sampler_type->sampler_array) - ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; - break; - case GLSL_SAMPLER_DIM_3D: - inst->tex_target = TEXTURE_3D_INDEX; - break; - case GLSL_SAMPLER_DIM_CUBE: - inst->tex_target = TEXTURE_CUBE_INDEX; - break; - case GLSL_SAMPLER_DIM_RECT: - inst->tex_target = TEXTURE_RECT_INDEX; - break; - case GLSL_SAMPLER_DIM_BUF: - assert(!"FINISHME: Implement ARB_texture_buffer_object"); - break; - case GLSL_SAMPLER_DIM_EXTERNAL: - inst->tex_target = TEXTURE_EXTERNAL_INDEX; - break; - default: - assert(!"Should not get here."); - } - - this->result = result_src; -} - -void -ir_to_mesa_visitor::visit(ir_return *ir) -{ - /* Non-void functions should have been inlined. We may still emit RETs - * from main() unless the EmitNoMainReturn option is set. - */ - assert(!ir->get_value()); - emit(ir, OPCODE_RET); -} - -void -ir_to_mesa_visitor::visit(ir_discard *ir) -{ - if (!ir->condition) - ir->condition = new(mem_ctx) ir_constant(true); - - ir->condition->accept(this); - this->result.negate = ~this->result.negate; - emit(ir, OPCODE_KIL, undef_dst, this->result); -} - -void -ir_to_mesa_visitor::visit(ir_demote *ir) -{ - assert(!"demote statement unsupported"); -} - -void -ir_to_mesa_visitor::visit(ir_if *ir) -{ - ir_to_mesa_instruction *if_inst; - - ir->condition->accept(this); - assert(this->result.file != PROGRAM_UNDEFINED); - - if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result); - - this->instructions.push_tail(if_inst); - - visit_exec_list(&ir->then_instructions, this); - - if (!ir->else_instructions.is_empty()) { - emit(ir->condition, OPCODE_ELSE); - visit_exec_list(&ir->else_instructions, this); - } - - emit(ir->condition, OPCODE_ENDIF); -} - -void -ir_to_mesa_visitor::visit(ir_emit_vertex *) -{ - assert(!"Geometry shaders not supported."); -} - -void -ir_to_mesa_visitor::visit(ir_end_primitive *) -{ - assert(!"Geometry shaders not supported."); -} - -void -ir_to_mesa_visitor::visit(ir_barrier *) -{ - unreachable("GLSL barrier() not supported."); -} - -ir_to_mesa_visitor::ir_to_mesa_visitor() -{ - result.file = PROGRAM_UNDEFINED; - next_temp = 1; - next_signature_id = 1; - current_function = NULL; - mem_ctx = ralloc_context(NULL); - ctx = NULL; - prog = NULL; - shader_program = NULL; - options = NULL; -} - -ir_to_mesa_visitor::~ir_to_mesa_visitor() -{ - ralloc_free(mem_ctx); -} - -static struct prog_src_register -mesa_src_reg_from_ir_src_reg(src_reg reg) -{ - struct prog_src_register mesa_reg; - - mesa_reg.File = reg.file; - assert(reg.index < (1 << INST_INDEX_BITS)); - mesa_reg.Index = reg.index; - mesa_reg.Swizzle = reg.swizzle; - mesa_reg.RelAddr = reg.reladdr != NULL; - mesa_reg.Negate = reg.negate; - - return mesa_reg; -} - -static void -set_branchtargets(ir_to_mesa_visitor *v, - struct prog_instruction *mesa_instructions, - int num_instructions) -{ - int if_count = 0, loop_count = 0; - int *if_stack, *loop_stack; - int if_stack_pos = 0, loop_stack_pos = 0; - int i, j; - - for (i = 0; i < num_instructions; i++) { - switch (mesa_instructions[i].Opcode) { - case OPCODE_IF: - if_count++; - break; - case OPCODE_BGNLOOP: - loop_count++; - break; - case OPCODE_BRK: - case OPCODE_CONT: - mesa_instructions[i].BranchTarget = -1; - break; - default: - break; - } - } - - if_stack = rzalloc_array(v->mem_ctx, int, if_count); - loop_stack = rzalloc_array(v->mem_ctx, int, loop_count); - - for (i = 0; i < num_instructions; i++) { - switch (mesa_instructions[i].Opcode) { - case OPCODE_IF: - if_stack[if_stack_pos] = i; - if_stack_pos++; - break; - case OPCODE_ELSE: - mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; - if_stack[if_stack_pos - 1] = i; - break; - case OPCODE_ENDIF: - mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; - if_stack_pos--; - break; - case OPCODE_BGNLOOP: - loop_stack[loop_stack_pos] = i; - loop_stack_pos++; - break; - case OPCODE_ENDLOOP: - loop_stack_pos--; - /* Rewrite any breaks/conts at this nesting level (haven't - * already had a BranchTarget assigned) to point to the end - * of the loop. - */ - for (j = loop_stack[loop_stack_pos]; j < i; j++) { - if (mesa_instructions[j].Opcode == OPCODE_BRK || - mesa_instructions[j].Opcode == OPCODE_CONT) { - if (mesa_instructions[j].BranchTarget == -1) { - mesa_instructions[j].BranchTarget = i; - } - } - } - /* The loop ends point at each other. */ - mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos]; - mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i; - break; - case OPCODE_CAL: - foreach_in_list(function_entry, entry, &v->function_signatures) { - if (entry->sig_id == mesa_instructions[i].BranchTarget) { - mesa_instructions[i].BranchTarget = entry->inst; - break; - } - } - break; - default: - break; - } - } -} - -static void -print_program(struct prog_instruction *mesa_instructions, - ir_instruction **mesa_instruction_annotation, - int num_instructions) -{ - ir_instruction *last_ir = NULL; - int i; - int indent = 0; - - for (i = 0; i < num_instructions; i++) { - struct prog_instruction *mesa_inst = mesa_instructions + i; - ir_instruction *ir = mesa_instruction_annotation[i]; - - fprintf(stdout, "%3d: ", i); - - if (last_ir != ir && ir) { - int j; - - for (j = 0; j < indent; j++) { - fprintf(stdout, " "); - } - ir->print(); - printf("\n"); - last_ir = ir; - - fprintf(stdout, " "); /* line number spacing. */ - } - - indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent, - PROG_PRINT_DEBUG, NULL); - } -} - -namespace { - -class add_uniform_to_shader : public program_resource_visitor { -public: - add_uniform_to_shader(struct gl_context *ctx, - struct gl_shader_program *shader_program, - struct gl_program_parameter_list *params) - : ctx(ctx), shader_program(shader_program), params(params), idx(-1), - var(NULL) - { - /* empty */ - } - - void process(ir_variable *var) - { - this->idx = -1; - this->var = var; - this->program_resource_visitor::process(var, - ctx->Const.UseSTD430AsDefaultPacking); - var->data.param_index = this->idx; - } - -private: - virtual void visit_field(const glsl_type *type, const char *name, - bool row_major, const glsl_type *record_type, - const enum glsl_interface_packing packing, - bool last_field); - - struct gl_context *ctx; - struct gl_shader_program *shader_program; - struct gl_program_parameter_list *params; - int idx; - ir_variable *var; -}; - -} /* anonymous namespace */ - -void -add_uniform_to_shader::visit_field(const glsl_type *type, const char *name, - bool /* row_major */, - const glsl_type * /* record_type */, - const enum glsl_interface_packing, - bool /* last_field */) -{ - /* opaque types don't use storage in the param list unless they are - * bindless samplers or images. - */ - if (type->contains_opaque() && !var->data.bindless) - return; - - /* Add the uniform to the param list */ - assert(_mesa_lookup_parameter_index(params, name) < 0); - int index = _mesa_lookup_parameter_index(params, name); - - unsigned num_params = type->arrays_of_arrays_size(); - num_params = MAX2(num_params, 1); - num_params *= type->without_array()->matrix_columns; - - bool is_dual_slot = type->without_array()->is_dual_slot(); - if (is_dual_slot) - num_params *= 2; - - _mesa_reserve_parameter_storage(params, num_params, num_params); - index = params->NumParameters; - - if (ctx->Const.PackedDriverUniformStorage) { - for (unsigned i = 0; i < num_params; i++) { - unsigned dmul = type->without_array()->is_64bit() ? 2 : 1; - unsigned comps = type->without_array()->vector_elements * dmul; - if (is_dual_slot) { - if (i & 0x1) - comps -= 4; - else - comps = 4; - } - - _mesa_add_parameter(params, PROGRAM_UNIFORM, name, comps, - type->gl_type, NULL, NULL, false); - } - } else { - for (unsigned i = 0; i < num_params; i++) { - _mesa_add_parameter(params, PROGRAM_UNIFORM, name, 4, - type->gl_type, NULL, NULL, true); - } - } - - /* The first part of the uniform that's processed determines the base - * location of the whole uniform (for structures). - */ - if (this->idx < 0) - this->idx = index; - - /* Each Parameter will hold the index to the backing uniform storage. - * This avoids relying on names to match parameters and uniform - * storages later when associating uniform storage. - */ - unsigned location = -1; - ASSERTED const bool found = - shader_program->UniformHash->get(location, params->Parameters[index].Name); - assert(found); - - for (unsigned i = 0; i < num_params; i++) { - struct gl_program_parameter *param = ¶ms->Parameters[index + i]; - param->UniformStorageIndex = location; - param->MainUniformStorageIndex = params->Parameters[this->idx].UniformStorageIndex; - } -} - -/** - * Generate the program parameters list for the user uniforms in a shader - * - * \param shader_program Linked shader program. This is only used to - * emit possible link errors to the info log. - * \param sh Shader whose uniforms are to be processed. - * \param params Parameter list to be filled in. - */ -void -_mesa_generate_parameters_list_for_uniforms(struct gl_context *ctx, - struct gl_shader_program - *shader_program, - struct gl_linked_shader *sh, - struct gl_program_parameter_list - *params) -{ - add_uniform_to_shader add(ctx, shader_program, params); - - foreach_in_list(ir_instruction, node, sh->ir) { - ir_variable *var = node->as_variable(); - - if ((var == NULL) || (var->data.mode != ir_var_uniform) - || var->is_in_buffer_block() || (strncmp(var->name, "gl_", 3) == 0)) - continue; - - add.process(var); - } -} - -void -_mesa_associate_uniform_storage(struct gl_context *ctx, - struct gl_shader_program *shader_program, - struct gl_program *prog) -{ - struct gl_program_parameter_list *params = prog->Parameters; - gl_shader_stage shader_type = prog->info.stage; - - _mesa_disallow_parameter_storage_realloc(params); - - /* After adding each uniform to the parameter list, connect the storage for - * the parameter with the tracking structure used by the API for the - * uniform. - */ - unsigned last_location = unsigned(~0); - for (unsigned i = 0; i < params->NumParameters; i++) { - if (params->Parameters[i].Type != PROGRAM_UNIFORM) - continue; - - unsigned location = params->Parameters[i].UniformStorageIndex; - - struct gl_uniform_storage *storage = - &shader_program->data->UniformStorage[location]; - - /* Do not associate any uniform storage to built-in uniforms */ - if (storage->builtin) - continue; - - if (location != last_location) { - enum gl_uniform_driver_format format = uniform_native; - unsigned columns = 0; - - int dmul; - if (ctx->Const.PackedDriverUniformStorage && !prog->info.is_arb_asm) { - dmul = storage->type->vector_elements * sizeof(float); - } else { - dmul = 4 * sizeof(float); - } - - switch (storage->type->base_type) { - case GLSL_TYPE_UINT64: - if (storage->type->vector_elements > 2) - dmul *= 2; - FALLTHROUGH; - case GLSL_TYPE_UINT: - case GLSL_TYPE_UINT16: - case GLSL_TYPE_UINT8: - assert(ctx->Const.NativeIntegers); - format = uniform_native; - columns = 1; - break; - case GLSL_TYPE_INT64: - if (storage->type->vector_elements > 2) - dmul *= 2; - FALLTHROUGH; - case GLSL_TYPE_INT: - case GLSL_TYPE_INT16: - case GLSL_TYPE_INT8: - format = - (ctx->Const.NativeIntegers) ? uniform_native : uniform_int_float; - columns = 1; - break; - case GLSL_TYPE_DOUBLE: - if (storage->type->vector_elements > 2) - dmul *= 2; - FALLTHROUGH; - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_FLOAT16: - format = uniform_native; - columns = storage->type->matrix_columns; - break; - case GLSL_TYPE_BOOL: - format = uniform_native; - columns = 1; - break; - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_IMAGE: - case GLSL_TYPE_SUBROUTINE: - format = uniform_native; - columns = 1; - break; - case GLSL_TYPE_ATOMIC_UINT: - case GLSL_TYPE_ARRAY: - case GLSL_TYPE_VOID: - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_ERROR: - case GLSL_TYPE_INTERFACE: - case GLSL_TYPE_FUNCTION: - assert(!"Should not get here."); - break; - } - - unsigned pvo = params->Parameters[i].ValueOffset; - _mesa_uniform_attach_driver_storage(storage, dmul * columns, dmul, - format, - ¶ms->ParameterValues[pvo]); - - /* When a bindless sampler/image is bound to a texture/image unit, we - * have to overwrite the constant value by the resident handle - * directly in the constant buffer before the next draw. One solution - * is to keep track a pointer to the base of the data. - */ - if (storage->is_bindless && (prog->sh.NumBindlessSamplers || - prog->sh.NumBindlessImages)) { - unsigned array_elements = MAX2(1, storage->array_elements); - - for (unsigned j = 0; j < array_elements; ++j) { - unsigned unit = storage->opaque[shader_type].index + j; - - if (storage->type->without_array()->is_sampler()) { - assert(unit >= 0 && unit < prog->sh.NumBindlessSamplers); - prog->sh.BindlessSamplers[unit].data = - ¶ms->ParameterValues[pvo] + 4 * j; - } else if (storage->type->without_array()->is_image()) { - assert(unit >= 0 && unit < prog->sh.NumBindlessImages); - prog->sh.BindlessImages[unit].data = - ¶ms->ParameterValues[pvo] + 4 * j; - } - } - } - - /* After attaching the driver's storage to the uniform, propagate any - * data from the linker's backing store. This will cause values from - * initializers in the source code to be copied over. - */ - unsigned array_elements = MAX2(1, storage->array_elements); - if (ctx->Const.PackedDriverUniformStorage && !prog->info.is_arb_asm && - (storage->is_bindless || !storage->type->contains_opaque())) { - const int dmul = storage->type->is_64bit() ? 2 : 1; - const unsigned components = - storage->type->vector_elements * - storage->type->matrix_columns; - - for (unsigned s = 0; s < storage->num_driver_storage; s++) { - gl_constant_value *uni_storage = (gl_constant_value *) - storage->driver_storage[s].data; - memcpy(uni_storage, storage->storage, - sizeof(storage->storage[0]) * components * - array_elements * dmul); - } - } else { - _mesa_propagate_uniforms_to_driver_storage(storage, 0, - array_elements); - } - - last_location = location; - } - } -} - -void -_mesa_ensure_and_associate_uniform_storage(struct gl_context *ctx, - struct gl_shader_program *shader_program, - struct gl_program *prog, unsigned required_space) -{ - /* Avoid reallocation of the program parameter list, because the uniform - * storage is only associated with the original parameter list. - */ - _mesa_reserve_parameter_storage(prog->Parameters, required_space, - required_space); - - /* This has to be done last. Any operation the can cause - * prog->ParameterValues to get reallocated (e.g., anything that adds a - * program constant) has to happen before creating this linkage. - */ - _mesa_associate_uniform_storage(ctx, shader_program, prog); -} - -/* - * On a basic block basis, tracks available PROGRAM_TEMPORARY register - * channels for copy propagation and updates following instructions to - * use the original versions. - * - * The ir_to_mesa_visitor lazily produces code assuming that this pass - * will occur. As an example, a TXP production before this pass: - * - * 0: MOV TEMP[1], INPUT[4].xyyy; - * 1: MOV TEMP[1].w, INPUT[4].wwww; - * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; - * - * and after: - * - * 0: MOV TEMP[1], INPUT[4].xyyy; - * 1: MOV TEMP[1].w, INPUT[4].wwww; - * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; - * - * which allows for dead code elimination on TEMP[1]'s writes. - */ -void -ir_to_mesa_visitor::copy_propagate(void) -{ - ir_to_mesa_instruction **acp = rzalloc_array(mem_ctx, - ir_to_mesa_instruction *, - this->next_temp * 4); - int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); - int level = 0; - - foreach_in_list(ir_to_mesa_instruction, inst, &this->instructions) { - assert(inst->dst.file != PROGRAM_TEMPORARY - || inst->dst.index < this->next_temp); - - /* First, do any copy propagation possible into the src regs. */ - for (int r = 0; r < 3; r++) { - ir_to_mesa_instruction *first = NULL; - bool good = true; - int acp_base = inst->src[r].index * 4; - - if (inst->src[r].file != PROGRAM_TEMPORARY || - inst->src[r].reladdr) - continue; - - /* See if we can find entries in the ACP consisting of MOVs - * from the same src register for all the swizzled channels - * of this src register reference. - */ - for (int i = 0; i < 4; i++) { - int src_chan = GET_SWZ(inst->src[r].swizzle, i); - ir_to_mesa_instruction *copy_chan = acp[acp_base + src_chan]; - - if (!copy_chan) { - good = false; - break; - } - - assert(acp_level[acp_base + src_chan] <= level); - - if (!first) { - first = copy_chan; - } else { - if (first->src[0].file != copy_chan->src[0].file || - first->src[0].index != copy_chan->src[0].index) { - good = false; - break; - } - } - } - - if (good) { - /* We've now validated that we can copy-propagate to - * replace this src register reference. Do it. - */ - inst->src[r].file = first->src[0].file; - inst->src[r].index = first->src[0].index; - - int swizzle = 0; - for (int i = 0; i < 4; i++) { - int src_chan = GET_SWZ(inst->src[r].swizzle, i); - ir_to_mesa_instruction *copy_inst = acp[acp_base + src_chan]; - swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << - (3 * i)); - } - inst->src[r].swizzle = swizzle; - } - } - - switch (inst->op) { - case OPCODE_BGNLOOP: - case OPCODE_ENDLOOP: - /* End of a basic block, clear the ACP entirely. */ - memset(acp, 0, sizeof(*acp) * this->next_temp * 4); - break; - - case OPCODE_IF: - ++level; - break; - - case OPCODE_ENDIF: - case OPCODE_ELSE: - /* Clear all channels written inside the block from the ACP, but - * leaving those that were not touched. - */ - for (int r = 0; r < this->next_temp; r++) { - for (int c = 0; c < 4; c++) { - if (!acp[4 * r + c]) - continue; - - if (acp_level[4 * r + c] >= level) - acp[4 * r + c] = NULL; - } - } - if (inst->op == OPCODE_ENDIF) - --level; - break; - - default: - /* Continuing the block, clear any written channels from - * the ACP. - */ - if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { - /* Any temporary might be written, so no copy propagation - * across this instruction. - */ - memset(acp, 0, sizeof(*acp) * this->next_temp * 4); - } else if (inst->dst.file == PROGRAM_OUTPUT && - inst->dst.reladdr) { - /* Any output might be written, so no copy propagation - * from outputs across this instruction. - */ - for (int r = 0; r < this->next_temp; r++) { - for (int c = 0; c < 4; c++) { - if (!acp[4 * r + c]) - continue; - - if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) - acp[4 * r + c] = NULL; - } - } - } else if (inst->dst.file == PROGRAM_TEMPORARY || - inst->dst.file == PROGRAM_OUTPUT) { - /* Clear where it's used as dst. */ - if (inst->dst.file == PROGRAM_TEMPORARY) { - for (int c = 0; c < 4; c++) { - if (inst->dst.writemask & (1 << c)) { - acp[4 * inst->dst.index + c] = NULL; - } - } - } - - /* Clear where it's used as src. */ - for (int r = 0; r < this->next_temp; r++) { - for (int c = 0; c < 4; c++) { - if (!acp[4 * r + c]) - continue; - - int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); - - if (acp[4 * r + c]->src[0].file == inst->dst.file && - acp[4 * r + c]->src[0].index == inst->dst.index && - inst->dst.writemask & (1 << src_chan)) - { - acp[4 * r + c] = NULL; - } - } - } - } - break; - } - - /* If this is a copy, add it to the ACP. */ - if (inst->op == OPCODE_MOV && - inst->dst.file == PROGRAM_TEMPORARY && - !(inst->dst.file == inst->src[0].file && - inst->dst.index == inst->src[0].index) && - !inst->dst.reladdr && - !inst->saturate && - !inst->src[0].reladdr && - !inst->src[0].negate) { - for (int i = 0; i < 4; i++) { - if (inst->dst.writemask & (1 << i)) { - acp[4 * inst->dst.index + i] = inst; - acp_level[4 * inst->dst.index + i] = level; - } - } - } - } - - ralloc_free(acp_level); - ralloc_free(acp); -} - - -/** - * Convert a shader's GLSL IR into a Mesa gl_program. - */ -static struct gl_program * -get_mesa_program(struct gl_context *ctx, - struct gl_shader_program *shader_program, - struct gl_linked_shader *shader) -{ - ir_to_mesa_visitor v; - struct prog_instruction *mesa_instructions, *mesa_inst; - ir_instruction **mesa_instruction_annotation; - int i; - struct gl_program *prog; - GLenum target = _mesa_shader_stage_to_program(shader->Stage); - const char *target_string = _mesa_shader_stage_to_string(shader->Stage); - struct gl_shader_compiler_options *options = - &ctx->Const.ShaderCompilerOptions[shader->Stage]; - - validate_ir_tree(shader->ir); - - prog = shader->Program; - prog->Parameters = _mesa_new_parameter_list(); - v.ctx = ctx; - v.prog = prog; - v.shader_program = shader_program; - v.options = options; - - _mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader, - prog->Parameters); - - /* Emit Mesa IR for main(). */ - visit_exec_list(shader->ir, &v); - v.emit(NULL, OPCODE_END); - - prog->arb.NumTemporaries = v.next_temp; - - unsigned num_instructions = v.instructions.length(); - - mesa_instructions = rzalloc_array(prog, struct prog_instruction, - num_instructions); - mesa_instruction_annotation = ralloc_array(v.mem_ctx, ir_instruction *, - num_instructions); - - v.copy_propagate(); - - /* Convert ir_mesa_instructions into prog_instructions. - */ - mesa_inst = mesa_instructions; - i = 0; - foreach_in_list(const ir_to_mesa_instruction, inst, &v.instructions) { - mesa_inst->Opcode = inst->op; - if (inst->saturate) - mesa_inst->Saturate = GL_TRUE; - mesa_inst->DstReg.File = inst->dst.file; - mesa_inst->DstReg.Index = inst->dst.index; - mesa_inst->DstReg.WriteMask = inst->dst.writemask; - mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL; - mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src[0]); - mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src[1]); - mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src[2]); - mesa_inst->TexSrcUnit = inst->sampler; - mesa_inst->TexSrcTarget = inst->tex_target; - mesa_inst->TexShadow = inst->tex_shadow; - mesa_instruction_annotation[i] = inst->ir; - - /* Set IndirectRegisterFiles. */ - if (mesa_inst->DstReg.RelAddr) - prog->arb.IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File; - - /* Update program's bitmask of indirectly accessed register files */ - for (unsigned src = 0; src < 3; src++) - if (mesa_inst->SrcReg[src].RelAddr) - prog->arb.IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; - - switch (mesa_inst->Opcode) { - case OPCODE_IF: - if (options->MaxIfDepth == 0) { - linker_warning(shader_program, - "Couldn't flatten if-statement. " - "This will likely result in software " - "rasterization.\n"); - } - break; - case OPCODE_BGNLOOP: - if (options->EmitNoLoops) { - linker_warning(shader_program, - "Couldn't unroll loop. " - "This will likely result in software " - "rasterization.\n"); - } - break; - case OPCODE_CONT: - if (options->EmitNoCont) { - linker_warning(shader_program, - "Couldn't lower continue-statement. " - "This will likely result in software " - "rasterization.\n"); - } - break; - case OPCODE_ARL: - prog->arb.NumAddressRegs = 1; - break; - default: - break; - } - - mesa_inst++; - i++; - - if (!shader_program->data->LinkStatus) - break; - } - - if (!shader_program->data->LinkStatus) { - goto fail_exit; - } - - set_branchtargets(&v, mesa_instructions, num_instructions); - - if (ctx->_Shader->Flags & GLSL_DUMP) { - fprintf(stderr, "\n"); - fprintf(stderr, "GLSL IR for linked %s program %d:\n", target_string, - shader_program->Name); - _mesa_print_ir(stderr, shader->ir, NULL); - fprintf(stderr, "\n"); - fprintf(stderr, "\n"); - fprintf(stderr, "Mesa IR for linked %s program %d:\n", target_string, - shader_program->Name); - print_program(mesa_instructions, mesa_instruction_annotation, - num_instructions); - fflush(stderr); - } - - prog->arb.Instructions = mesa_instructions; - prog->arb.NumInstructions = num_instructions; - - /* Setting this to NULL prevents a possible double free in the fail_exit - * path (far below). - */ - mesa_instructions = NULL; - - do_set_program_inouts(shader->ir, prog, shader->Stage); - - prog->ShadowSamplers = shader->shadow_samplers; - prog->ExternalSamplersUsed = gl_external_samplers(prog); - _mesa_update_shader_textures_used(shader_program, prog); - - /* Set the gl_FragDepth layout. */ - if (target == GL_FRAGMENT_PROGRAM_ARB) { - prog->info.fs.depth_layout = shader_program->FragDepthLayout; - } - - _mesa_optimize_program(prog, prog); - - /* This has to be done last. Any operation that can cause - * prog->ParameterValues to get reallocated (e.g., anything that adds a - * program constant) has to happen before creating this linkage. - */ - _mesa_associate_uniform_storage(ctx, shader_program, prog); - if (!shader_program->data->LinkStatus) { - goto fail_exit; - } - - return prog; - -fail_exit: - ralloc_free(mesa_instructions); - _mesa_reference_program(ctx, &shader->Program, NULL); - return NULL; -} - -extern "C" { - -/** - * Link a shader. - * Called via ctx->Driver.LinkShader() - * This actually involves converting GLSL IR into Mesa gl_programs with - * code lowering and other optimizations. - */ -GLboolean -_mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) -{ - assert(prog->data->LinkStatus); - - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (prog->_LinkedShaders[i] == NULL) - continue; - - bool progress; - exec_list *ir = prog->_LinkedShaders[i]->ir; - const struct gl_shader_compiler_options *options = - &ctx->Const.ShaderCompilerOptions[prog->_LinkedShaders[i]->Stage]; - - do { - progress = false; - - /* Lowering */ - do_mat_op_to_vec(ir); - lower_instructions(ir, (MOD_TO_FLOOR | DIV_TO_MUL_RCP | EXP_TO_EXP2 - | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP - | MUL64_TO_MUL_AND_MUL_HIGH - | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); - - progress = do_common_optimization(ir, true, true, - options, ctx->Const.NativeIntegers) - || progress; - - progress = lower_quadop_vector(ir, true) || progress; - - if (options->MaxIfDepth == 0) - progress = lower_discard(ir) || progress; - - progress = lower_if_to_cond_assign((gl_shader_stage)i, ir, - options->MaxIfDepth) || progress; - - /* If there are forms of indirect addressing that the driver - * cannot handle, perform the lowering pass. - */ - if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput - || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) - progress = - lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir, - options->EmitNoIndirectInput, - options->EmitNoIndirectOutput, - options->EmitNoIndirectTemp, - options->EmitNoIndirectUniform) - || progress; - - progress = do_vec_index_to_cond_assign(ir) || progress; - progress = lower_vector_insert(ir, true) || progress; - } while (progress); - - validate_ir_tree(ir); - } - - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - struct gl_program *linked_prog; - - if (prog->_LinkedShaders[i] == NULL) - continue; - - linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); - - if (linked_prog) { - _mesa_copy_linked_program_data(prog, prog->_LinkedShaders[i]); - - if (!ctx->Driver.ProgramStringNotify(ctx, - _mesa_shader_stage_to_program(i), - linked_prog)) { - _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, - NULL); - return GL_FALSE; - } - } - } - - build_program_resource_list(ctx, prog, false); - return prog->data->LinkStatus; -} - -/** - * Link a GLSL shader program. Called via glLinkProgram(). - */ -void -_mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) -{ - unsigned int i; - bool spirv = false; - - _mesa_clear_shader_program_data(ctx, prog); - - prog->data = _mesa_create_shader_program_data(); - - prog->data->LinkStatus = LINKING_SUCCESS; - - for (i = 0; i < prog->NumShaders; i++) { - if (!prog->Shaders[i]->CompileStatus) { - linker_error(prog, "linking with uncompiled/unspecialized shader"); - } - - if (!i) { - spirv = (prog->Shaders[i]->spirv_data != NULL); - } else if (spirv && !prog->Shaders[i]->spirv_data) { - /* The GL_ARB_gl_spirv spec adds a new bullet point to the list of - * reasons LinkProgram can fail: - * - * "All the shader objects attached to <program> do not have the - * same value for the SPIR_V_BINARY_ARB state." - */ - linker_error(prog, - "not all attached shaders have the same " - "SPIR_V_BINARY_ARB state"); - } - } - prog->data->spirv = spirv; - - if (prog->data->LinkStatus) { - if (!spirv) - link_shaders(ctx, prog); - else - _mesa_spirv_link_shaders(ctx, prog); - } - - /* If LinkStatus is LINKING_SUCCESS, then reset sampler validated to true. - * Validation happens via the LinkShader call below. If LinkStatus is - * LINKING_SKIPPED, then SamplersValidated will have been restored from the - * shader cache. - */ - if (prog->data->LinkStatus == LINKING_SUCCESS) { - prog->SamplersValidated = GL_TRUE; - } - - if (prog->data->LinkStatus && !ctx->Driver.LinkShader(ctx, prog)) { - prog->data->LinkStatus = LINKING_FAILURE; - } - - if (prog->data->LinkStatus != LINKING_FAILURE) - _mesa_create_program_resource_hash(prog); - - /* Return early if we are loading the shader from on-disk cache */ - if (prog->data->LinkStatus == LINKING_SKIPPED) - return; - - if (ctx->_Shader->Flags & GLSL_DUMP) { - if (!prog->data->LinkStatus) { - fprintf(stderr, "GLSL shader program %d failed to link\n", prog->Name); - } - - if (prog->data->InfoLog && prog->data->InfoLog[0] != 0) { - fprintf(stderr, "GLSL shader program %d info log:\n", prog->Name); - fprintf(stderr, "%s\n", prog->data->InfoLog); - } - } - -#ifdef ENABLE_SHADER_CACHE - if (prog->data->LinkStatus) - shader_cache_write_program_metadata(ctx, prog); -#endif -} - -} /* extern "C" */ |