diff options
author | Timothy Arceri <tarceri@itsqueeze.com> | 2021-12-05 13:21:07 +1100 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2021-12-06 10:15:08 +0000 |
commit | 4b895dc8955ccca3e2603024599dfb0589236e98 (patch) | |
tree | 23e278a6e5335e6ed5e6191df8c708df2858e181 | |
parent | d3e0cfaa086e9ff56ca38605e9410cb1a141981d (diff) |
mesa: remove GLSL IR to Mesa IR code
The last user of this was dropped with the classic drivers.
Reviewed-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14066>
-rw-r--r-- | src/mesa/program/ir_to_mesa.cpp | 2638 |
1 files changed, 0 insertions, 2638 deletions
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 7de6c16ce5d..49fc8861a56 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -55,2281 +55,6 @@ #include "program/prog_parameter.h" -static int swizzle_for_size(int size); - -namespace { - -class src_reg; -class dst_reg; - -/** - * This struct is a corresponding struct to Mesa prog_src_register, with - * wider fields. - */ -class src_reg { -public: - src_reg(gl_register_file file, int index, const glsl_type *type) - { - this->file = file; - this->index = index; - if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) - this->swizzle = swizzle_for_size(type->vector_elements); - else - this->swizzle = SWIZZLE_XYZW; - this->negate = 0; - this->reladdr = NULL; - } - - src_reg() - { - this->file = PROGRAM_UNDEFINED; - this->index = 0; - this->swizzle = 0; - this->negate = 0; - this->reladdr = NULL; - } - - explicit src_reg(dst_reg reg); - - gl_register_file file; /**< PROGRAM_* from Mesa */ - int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ - GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ - int negate; /**< NEGATE_XYZW mask from mesa */ - /** Register index should be offset by the integer in this reg. */ - src_reg *reladdr; -}; - -class dst_reg { -public: - dst_reg(gl_register_file file, int writemask) - { - this->file = file; - this->index = 0; - this->writemask = writemask; - this->reladdr = NULL; - } - - dst_reg() - { - this->file = PROGRAM_UNDEFINED; - this->index = 0; - this->writemask = 0; - this->reladdr = NULL; - } - - explicit dst_reg(src_reg reg); - - gl_register_file file; /**< PROGRAM_* from Mesa */ - int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ - int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ - /** Register index should be offset by the integer in this reg. */ - src_reg *reladdr; -}; - -} /* anonymous namespace */ - -src_reg::src_reg(dst_reg reg) -{ - this->file = reg.file; - this->index = reg.index; - this->swizzle = SWIZZLE_XYZW; - this->negate = 0; - this->reladdr = reg.reladdr; -} - -dst_reg::dst_reg(src_reg reg) -{ - this->file = reg.file; - this->index = reg.index; - this->writemask = WRITEMASK_XYZW; - this->reladdr = reg.reladdr; -} - -namespace { - -class ir_to_mesa_instruction : public exec_node { -public: - DECLARE_RALLOC_CXX_OPERATORS(ir_to_mesa_instruction) - - enum prog_opcode op; - dst_reg dst; - src_reg src[3]; - /** Pointer to the ir source this tree came from for debugging */ - ir_instruction *ir; - bool saturate; - int sampler; /**< sampler index */ - int tex_target; /**< One of TEXTURE_*_INDEX */ - GLboolean tex_shadow; -}; - -class variable_storage : public exec_node { -public: - variable_storage(ir_variable *var, gl_register_file file, int index) - : file(file), index(index), var(var) - { - /* empty */ - } - - gl_register_file file; - int index; - ir_variable *var; /* variable that maps to this, if any */ -}; - -class function_entry : public exec_node { -public: - ir_function_signature *sig; - - /** - * identifier of this function signature used by the program. - * - * At the point that Mesa instructions for function calls are - * generated, we don't know the address of the first instruction of - * the function body. So we make the BranchTarget that is called a - * small integer and rewrite them during set_branchtargets(). - */ - int sig_id; - - /** - * Pointer to first instruction of the function body. - * - * Set during function body emits after main() is processed. - */ - ir_to_mesa_instruction *bgn_inst; - - /** - * Index of the first instruction of the function body in actual - * Mesa IR. - * - * Set after convertion from ir_to_mesa_instruction to prog_instruction. - */ - int inst; - - /** Storage for the return value. */ - src_reg return_reg; -}; - -class ir_to_mesa_visitor : public ir_visitor { -public: - ir_to_mesa_visitor(); - ~ir_to_mesa_visitor(); - - function_entry *current_function; - - struct gl_context *ctx; - struct gl_program *prog; - struct gl_shader_program *shader_program; - struct gl_shader_compiler_options *options; - - int next_temp; - - variable_storage *find_variable_storage(const ir_variable *var); - - src_reg get_temp(const glsl_type *type); - void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); - - src_reg src_reg_for_float(float val); - - /** - * \name Visit methods - * - * As typical for the visitor pattern, there must be one \c visit method for - * each concrete subclass of \c ir_instruction. Virtual base classes within - * the hierarchy should not have \c visit methods. - */ - /*@{*/ - virtual void visit(ir_variable *); - virtual void visit(ir_loop *); - virtual void visit(ir_loop_jump *); - virtual void visit(ir_function_signature *); - virtual void visit(ir_function *); - virtual void visit(ir_expression *); - virtual void visit(ir_swizzle *); - virtual void visit(ir_dereference_variable *); - virtual void visit(ir_dereference_array *); - virtual void visit(ir_dereference_record *); - virtual void visit(ir_assignment *); - virtual void visit(ir_constant *); - virtual void visit(ir_call *); - virtual void visit(ir_return *); - virtual void visit(ir_discard *); - virtual void visit(ir_demote *); - virtual void visit(ir_texture *); - virtual void visit(ir_if *); - virtual void visit(ir_emit_vertex *); - virtual void visit(ir_end_primitive *); - virtual void visit(ir_barrier *); - /*@}*/ - - src_reg result; - - /** List of variable_storage */ - exec_list variables; - - /** List of function_entry */ - exec_list function_signatures; - int next_signature_id; - - /** List of ir_to_mesa_instruction */ - exec_list instructions; - - ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op); - - ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, src_reg src0); - - ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, src_reg src0, src_reg src1); - - ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, - src_reg src0, src_reg src1, src_reg src2); - - /** - * Emit the correct dot-product instruction for the type of arguments - */ - ir_to_mesa_instruction * emit_dp(ir_instruction *ir, - dst_reg dst, - src_reg src0, - src_reg src1, - unsigned elements); - - void emit_scalar(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, src_reg src0); - - void emit_scalar(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, src_reg src0, src_reg src1); - - bool try_emit_mad(ir_expression *ir, - int mul_operand); - bool try_emit_mad_for_and_not(ir_expression *ir, - int mul_operand); - - void emit_swz(ir_expression *ir); - - void emit_equality_comparison(ir_expression *ir, enum prog_opcode op, - dst_reg dst, - const src_reg &src0, const src_reg &src1); - - inline void emit_sne(ir_expression *ir, dst_reg dst, - const src_reg &src0, const src_reg &src1) - { - emit_equality_comparison(ir, OPCODE_SLT, dst, src0, src1); - } - - inline void emit_seq(ir_expression *ir, dst_reg dst, - const src_reg &src0, const src_reg &src1) - { - emit_equality_comparison(ir, OPCODE_SGE, dst, src0, src1); - } - - bool process_move_condition(ir_rvalue *ir); - - void copy_propagate(void); - - void *mem_ctx; -}; - -} /* anonymous namespace */ - -static src_reg undef_src = src_reg(PROGRAM_UNDEFINED, 0, NULL); - -static dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); - -static dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); - -static int -swizzle_for_size(int size) -{ - static const int size_swizzles[4] = { - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), - }; - - assert((size >= 1) && (size <= 4)); - return size_swizzles[size - 1]; -} - -ir_to_mesa_instruction * -ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, - src_reg src0, src_reg src1, src_reg src2) -{ - ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction(); - int num_reladdr = 0; - - /* If we have to do relative addressing, we want to load the ARL - * reg directly for one of the regs, and preload the other reladdr - * sources into temps. - */ - num_reladdr += dst.reladdr != NULL; - num_reladdr += src0.reladdr != NULL; - num_reladdr += src1.reladdr != NULL; - num_reladdr += src2.reladdr != NULL; - - reladdr_to_temp(ir, &src2, &num_reladdr); - reladdr_to_temp(ir, &src1, &num_reladdr); - reladdr_to_temp(ir, &src0, &num_reladdr); - - if (dst.reladdr) { - emit(ir, OPCODE_ARL, address_reg, *dst.reladdr); - num_reladdr--; - } - assert(num_reladdr == 0); - - inst->op = op; - inst->dst = dst; - inst->src[0] = src0; - inst->src[1] = src1; - inst->src[2] = src2; - inst->ir = ir; - - this->instructions.push_tail(inst); - - return inst; -} - - -ir_to_mesa_instruction * -ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, src_reg src0, src_reg src1) -{ - return emit(ir, op, dst, src0, src1, undef_src); -} - -ir_to_mesa_instruction * -ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, src_reg src0) -{ - assert(dst.writemask != 0); - return emit(ir, op, dst, src0, undef_src, undef_src); -} - -ir_to_mesa_instruction * -ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op) -{ - return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); -} - -ir_to_mesa_instruction * -ir_to_mesa_visitor::emit_dp(ir_instruction *ir, - dst_reg dst, src_reg src0, src_reg src1, - unsigned elements) -{ - static const enum prog_opcode dot_opcodes[] = { - OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 - }; - - return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); -} - -/** - * Emits Mesa scalar opcodes to produce unique answers across channels. - * - * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X - * channel determines the result across all channels. So to do a vec4 - * of this operation, we want to emit a scalar per source channel used - * to produce dest channels. - */ -void -ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, - src_reg orig_src0, src_reg orig_src1) -{ - int i, j; - int done_mask = ~dst.writemask; - - /* Mesa RCP is a scalar operation splatting results to all channels, - * like ARB_fp/vp. So emit as many RCPs as necessary to cover our - * dst channels. - */ - for (i = 0; i < 4; i++) { - GLuint this_mask = (1 << i); - ir_to_mesa_instruction *inst; - src_reg src0 = orig_src0; - src_reg src1 = orig_src1; - - if (done_mask & this_mask) - continue; - - GLuint src0_swiz = GET_SWZ(src0.swizzle, i); - GLuint src1_swiz = GET_SWZ(src1.swizzle, i); - for (j = i + 1; j < 4; j++) { - /* If there is another enabled component in the destination that is - * derived from the same inputs, generate its value on this pass as - * well. - */ - if (!(done_mask & (1 << j)) && - GET_SWZ(src0.swizzle, j) == src0_swiz && - GET_SWZ(src1.swizzle, j) == src1_swiz) { - this_mask |= (1 << j); - } - } - src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, - src0_swiz, src0_swiz); - src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, - src1_swiz, src1_swiz); - - inst = emit(ir, op, dst, src0, src1); - inst->dst.writemask = this_mask; - done_mask |= this_mask; - } -} - -void -ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, src_reg src0) -{ - src_reg undef = undef_src; - - undef.swizzle = SWIZZLE_XXXX; - - emit_scalar(ir, op, dst, src0, undef); -} - -src_reg -ir_to_mesa_visitor::src_reg_for_float(float val) -{ - src_reg src(PROGRAM_CONSTANT, -1, NULL); - - src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - (const gl_constant_value *)&val, 1, &src.swizzle); - - return src; -} - -static int -type_size(const struct glsl_type *type) -{ - return type->count_vec4_slots(false, false); -} - -/** - * In the initial pass of codegen, we assign temporary numbers to - * intermediate results. (not SSA -- variable assignments will reuse - * storage). Actual register allocation for the Mesa VM occurs in a - * pass over the Mesa IR later. - */ -src_reg -ir_to_mesa_visitor::get_temp(const glsl_type *type) -{ - src_reg src; - - src.file = PROGRAM_TEMPORARY; - src.index = next_temp; - src.reladdr = NULL; - next_temp += type_size(type); - - if (type->is_array() || type->is_struct()) { - src.swizzle = SWIZZLE_NOOP; - } else { - src.swizzle = swizzle_for_size(type->vector_elements); - } - src.negate = 0; - - return src; -} - -variable_storage * -ir_to_mesa_visitor::find_variable_storage(const ir_variable *var) -{ - foreach_in_list(variable_storage, entry, &this->variables) { - if (entry->var == var) - return entry; - } - - return NULL; -} - -void -ir_to_mesa_visitor::visit(ir_variable *ir) -{ - if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { - unsigned int i; - const ir_state_slot *const slots = ir->get_state_slots(); - assert(slots != NULL); - - /* Check if this statevar's setup in the STATE file exactly - * matches how we'll want to reference it as a - * struct/array/whatever. If not, then we need to move it into - * temporary storage and hope that it'll get copy-propagated - * out. - */ - for (i = 0; i < ir->get_num_state_slots(); i++) { - if (slots[i].swizzle != SWIZZLE_XYZW) { - break; - } - } - - variable_storage *storage; - dst_reg dst; - if (i == ir->get_num_state_slots()) { - /* We'll set the index later. */ - storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); - this->variables.push_tail(storage); - - dst = undef_dst; - } else { - /* The variable_storage constructor allocates slots based on the size - * of the type. However, this had better match the number of state - * elements that we're going to copy into the new temporary. - */ - assert((int) ir->get_num_state_slots() == type_size(ir->type)); - - storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, - this->next_temp); - this->variables.push_tail(storage); - this->next_temp += type_size(ir->type); - - dst = dst_reg(src_reg(PROGRAM_TEMPORARY, storage->index, NULL)); - } - - - for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) { - int index = _mesa_add_state_reference(this->prog->Parameters, - slots[i].tokens); - - if (storage->file == PROGRAM_STATE_VAR) { - if (storage->index == -1) { - storage->index = index; - } else { - assert(index == storage->index + (int)i); - } - } else { - src_reg src(PROGRAM_STATE_VAR, index, NULL); - src.swizzle = slots[i].swizzle; - emit(ir, OPCODE_MOV, dst, src); - /* even a float takes up a whole vec4 reg in a struct/array. */ - dst.index++; - } - } - - if (storage->file == PROGRAM_TEMPORARY && - dst.index != storage->index + (int) ir->get_num_state_slots()) { - linker_error(this->shader_program, - "failed to load builtin uniform `%s' " - "(%d/%d regs loaded)\n", - ir->name, dst.index - storage->index, - type_size(ir->type)); - } - } -} - -void -ir_to_mesa_visitor::visit(ir_loop *ir) -{ - emit(NULL, OPCODE_BGNLOOP); - - visit_exec_list(&ir->body_instructions, this); - - emit(NULL, OPCODE_ENDLOOP); -} - -void -ir_to_mesa_visitor::visit(ir_loop_jump *ir) -{ - switch (ir->mode) { - case ir_loop_jump::jump_break: - emit(NULL, OPCODE_BRK); - break; - case ir_loop_jump::jump_continue: - emit(NULL, OPCODE_CONT); - break; - } -} - - -void -ir_to_mesa_visitor::visit(ir_function_signature *ir) -{ - assert(0); - (void)ir; -} - -void -ir_to_mesa_visitor::visit(ir_function *ir) -{ - /* Ignore function bodies other than main() -- we shouldn't see calls to - * them since they should all be inlined before we get to ir_to_mesa. - */ - if (strcmp(ir->name, "main") == 0) { - const ir_function_signature *sig; - exec_list empty; - - sig = ir->matching_signature(NULL, &empty, false); - - assert(sig); - - foreach_in_list(ir_instruction, ir, &sig->body) { - ir->accept(this); - } - } -} - -bool -ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand) -{ - int nonmul_operand = 1 - mul_operand; - src_reg a, b, c; - - ir_expression *expr = ir->operands[mul_operand]->as_expression(); - if (!expr || expr->operation != ir_binop_mul) - return false; - - expr->operands[0]->accept(this); - a = this->result; - expr->operands[1]->accept(this); - b = this->result; - ir->operands[nonmul_operand]->accept(this); - c = this->result; - - this->result = get_temp(ir->type); - emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, c); - - return true; -} - -/** - * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b)) - * - * The logic values are 1.0 for true and 0.0 for false. Logical-and is - * implemented using multiplication, and logical-or is implemented using - * addition. Logical-not can be implemented as (true - x), or (1.0 - x). - * As result, the logical expression (a & !b) can be rewritten as: - * - * - a * !b - * - a * (1 - b) - * - (a * 1) - (a * b) - * - a + -(a * b) - * - a + (a * -b) - * - * This final expression can be implemented as a single MAD(a, -b, a) - * instruction. - */ -bool -ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) -{ - const int other_operand = 1 - try_operand; - src_reg a, b; - - ir_expression *expr = ir->operands[try_operand]->as_expression(); - if (!expr || expr->operation != ir_unop_logic_not) - return false; - - ir->operands[other_operand]->accept(this); - a = this->result; - expr->operands[0]->accept(this); - b = this->result; - - b.negate = ~b.negate; - - this->result = get_temp(ir->type); - emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a); - - return true; -} - -void -ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir, - src_reg *reg, int *num_reladdr) -{ - if (!reg->reladdr) - return; - - emit(ir, OPCODE_ARL, address_reg, *reg->reladdr); - - if (*num_reladdr != 1) { - src_reg temp = get_temp(glsl_type::vec4_type); - - emit(ir, OPCODE_MOV, dst_reg(temp), *reg); - *reg = temp; - } - - (*num_reladdr)--; -} - -void -ir_to_mesa_visitor::emit_swz(ir_expression *ir) -{ - /* Assume that the vector operator is in a form compatible with OPCODE_SWZ. - * This means that each of the operands is either an immediate value of -1, - * 0, or 1, or is a component from one source register (possibly with - * negation). - */ - uint8_t components[4] = { 0 }; - bool negate[4] = { false }; - ir_variable *var = NULL; - - for (unsigned i = 0; i < ir->type->vector_elements; i++) { - ir_rvalue *op = ir->operands[i]; - - assert(op->type->is_scalar()); - - while (op != NULL) { - switch (op->ir_type) { - case ir_type_constant: { - - assert(op->type->is_scalar()); - - const ir_constant *const c = op->as_constant(); - if (c->is_one()) { - components[i] = SWIZZLE_ONE; - } else if (c->is_zero()) { - components[i] = SWIZZLE_ZERO; - } else if (c->is_negative_one()) { - components[i] = SWIZZLE_ONE; - negate[i] = true; - } else { - assert(!"SWZ constant must be 0.0 or 1.0."); - } - - op = NULL; - break; - } - - case ir_type_dereference_variable: { - ir_dereference_variable *const deref = - (ir_dereference_variable *) op; - - assert((var == NULL) || (deref->var == var)); - components[i] = SWIZZLE_X; - var = deref->var; - op = NULL; - break; - } - - case ir_type_expression: { - ir_expression *const expr = (ir_expression *) op; - - assert(expr->operation == ir_unop_neg); - negate[i] = true; - - op = expr->operands[0]; - break; - } - - case ir_type_swizzle: { - ir_swizzle *const swiz = (ir_swizzle *) op; - - components[i] = swiz->mask.x; - op = swiz->val; - break; - } - - default: - assert(!"Should not get here."); - return; - } - } - } - - assert(var != NULL); - - ir_dereference_variable *const deref = - new(mem_ctx) ir_dereference_variable(var); - - this->result.file = PROGRAM_UNDEFINED; - deref->accept(this); - if (this->result.file == PROGRAM_UNDEFINED) { - printf("Failed to get tree for expression operand:\n"); - deref->print(); - printf("\n"); - exit(1); - } - - src_reg src; - - src = this->result; - src.swizzle = MAKE_SWIZZLE4(components[0], - components[1], - components[2], - components[3]); - src.negate = ((unsigned(negate[0]) << 0) - | (unsigned(negate[1]) << 1) - | (unsigned(negate[2]) << 2) - | (unsigned(negate[3]) << 3)); - - /* Storage for our result. Ideally for an assignment we'd be using the - * actual storage for the result here, instead. - */ - const src_reg result_src = get_temp(ir->type); - dst_reg result_dst = dst_reg(result_src); - - /* Limit writes to the channels that will be used by result_src later. - * This does limit this temp's use as a temporary for multi-instruction - * sequences. - */ - result_dst.writemask = (1 << ir->type->vector_elements) - 1; - - emit(ir, OPCODE_SWZ, result_dst, src); - this->result = result_src; -} - -void -ir_to_mesa_visitor::emit_equality_comparison(ir_expression *ir, - enum prog_opcode op, - dst_reg dst, - const src_reg &src0, - const src_reg &src1) -{ - src_reg difference; - src_reg abs_difference = get_temp(glsl_type::vec4_type); - const src_reg zero = src_reg_for_float(0.0); - - /* x == y is equivalent to -abs(x-y) >= 0. Since all of the code that - * consumes the generated IR is pretty dumb, take special care when one - * of the operands is zero. - * - * Similarly, x != y is equivalent to -abs(x-y) < 0. - */ - if (src0.file == zero.file && - src0.index == zero.index && - src0.swizzle == zero.swizzle) { - difference = src1; - } else if (src1.file == zero.file && - src1.index == zero.index && - src1.swizzle == zero.swizzle) { - difference = src0; - } else { - difference = get_temp(glsl_type::vec4_type); - - src_reg tmp_src = src0; - tmp_src.negate = ~tmp_src.negate; - - emit(ir, OPCODE_ADD, dst_reg(difference), tmp_src, src1); - } - - emit(ir, OPCODE_ABS, dst_reg(abs_difference), difference); - - abs_difference.negate = ~abs_difference.negate; - emit(ir, op, dst, abs_difference, zero); -} - -void -ir_to_mesa_visitor::visit(ir_expression *ir) -{ - unsigned int operand; - src_reg op[ARRAY_SIZE(ir->operands)]; - src_reg result_src; - dst_reg result_dst; - - /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c) - */ - if (ir->operation == ir_binop_add) { - if (try_emit_mad(ir, 1)) - return; - if (try_emit_mad(ir, 0)) - return; - } - - /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) - */ - if (ir->operation == ir_binop_logic_and) { - if (try_emit_mad_for_and_not(ir, 1)) - return; - if (try_emit_mad_for_and_not(ir, 0)) - return; - } - - if (ir->operation == ir_quadop_vector) { - this->emit_swz(ir); - return; - } - - for (operand = 0; operand < ir->num_operands; operand++) { - this->result.file = PROGRAM_UNDEFINED; - ir->operands[operand]->accept(this); - if (this->result.file == PROGRAM_UNDEFINED) { - printf("Failed to get tree for expression operand:\n"); - ir->operands[operand]->print(); - printf("\n"); - exit(1); - } - op[operand] = this->result; - - /* Matrix expression operands should have been broken down to vector - * operations already. - */ - assert(!ir->operands[operand]->type->is_matrix()); - } - - int vector_elements = ir->operands[0]->type->vector_elements; - if (ir->operands[1]) { - vector_elements = MAX2(vector_elements, - ir->operands[1]->type->vector_elements); - } - - this->result.file = PROGRAM_UNDEFINED; - - /* Storage for our result. Ideally for an assignment we'd be using - * the actual storage for the result here, instead. - */ - result_src = get_temp(ir->type); - /* convenience for the emit functions below. */ - result_dst = dst_reg(result_src); - /* Limit writes to the channels that will be used by result_src later. - * This does limit this temp's use as a temporary for multi-instruction - * sequences. - */ - result_dst.writemask = (1 << ir->type->vector_elements) - 1; - - switch (ir->operation) { - case ir_unop_logic_not: - /* Previously 'SEQ dst, src, 0.0' was used for this. However, many - * older GPUs implement SEQ using multiple instructions (i915 uses two - * SGE instructions and a MUL instruction). Since our logic values are - * 0.0 and 1.0, 1-x also implements !x. - */ - op[0].negate = ~op[0].negate; - emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0)); - break; - case ir_unop_neg: - op[0].negate = ~op[0].negate; - result_src = op[0]; - break; - case ir_unop_abs: - emit(ir, OPCODE_ABS, result_dst, op[0]); - break; - case ir_unop_sign: - emit(ir, OPCODE_SSG, result_dst, op[0]); - break; - case ir_unop_rcp: - emit_scalar(ir, OPCODE_RCP, result_dst, op[0]); - break; - - case ir_unop_exp2: - emit_scalar(ir, OPCODE_EX2, result_dst, op[0]); - break; - case ir_unop_exp: - assert(!"not reached: should be handled by exp_to_exp2"); - break; - case ir_unop_log: - assert(!"not reached: should be handled by log_to_log2"); - break; - case ir_unop_log2: - emit_scalar(ir, OPCODE_LG2, result_dst, op[0]); - break; - case ir_unop_sin: - emit_scalar(ir, OPCODE_SIN, result_dst, op[0]); - break; - case ir_unop_cos: - emit_scalar(ir, OPCODE_COS, result_dst, op[0]); - break; - - case ir_unop_dFdx: - emit(ir, OPCODE_DDX, result_dst, op[0]); - break; - case ir_unop_dFdy: - emit(ir, OPCODE_DDY, result_dst, op[0]); - break; - - case ir_unop_saturate: { - ir_to_mesa_instruction *inst = emit(ir, OPCODE_MOV, - result_dst, op[0]); - inst->saturate = true; - break; - } - - case ir_binop_add: - emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); - break; - case ir_binop_sub: - emit(ir, OPCODE_SUB, result_dst, op[0], op[1]); - break; - - case ir_binop_mul: - emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); - break; - case ir_binop_div: - assert(!"not reached: should be handled by ir_div_to_mul_rcp"); - break; - case ir_binop_mod: - /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */ - assert(ir->type->is_integer_32()); - emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); - break; - - case ir_binop_less: - emit(ir, OPCODE_SLT, result_dst, op[0], op[1]); - break; - case ir_binop_gequal: - emit(ir, OPCODE_SGE, result_dst, op[0], op[1]); - break; - case ir_binop_equal: - emit_seq(ir, result_dst, op[0], op[1]); - break; - case ir_binop_nequal: - emit_sne(ir, result_dst, op[0], op[1]); - break; - case ir_binop_all_equal: - /* "==" operator producing a scalar boolean. */ - if (ir->operands[0]->type->is_vector() || - ir->operands[1]->type->is_vector()) { - src_reg temp = get_temp(glsl_type::vec4_type); - emit_sne(ir, dst_reg(temp), op[0], op[1]); - - /* After the dot-product, the value will be an integer on the - * range [0,4]. Zero becomes 1.0, and positive values become zero. - */ - emit_dp(ir, result_dst, temp, temp, vector_elements); - - /* Negating the result of the dot-product gives values on the range - * [-4, 0]. Zero becomes 1.0, and negative values become zero. This - * achieved using SGE. - */ - src_reg sge_src = result_src; - sge_src.negate = ~sge_src.negate; - emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0)); - } else { - emit_seq(ir, result_dst, op[0], op[1]); - } - break; - case ir_binop_any_nequal: - /* "!=" operator producing a scalar boolean. */ - if (ir->operands[0]->type->is_vector() || - ir->operands[1]->type->is_vector()) { - src_reg temp = get_temp(glsl_type::vec4_type); - if (ir->operands[0]->type->is_boolean() && - ir->operands[1]->as_constant() && - ir->operands[1]->as_constant()->is_zero()) { - temp = op[0]; - } else { - emit_sne(ir, dst_reg(temp), op[0], op[1]); - } - - /* After the dot-product, the value will be an integer on the - * range [0,4]. Zero stays zero, and positive values become 1.0. - */ - ir_to_mesa_instruction *const dp = - emit_dp(ir, result_dst, temp, temp, vector_elements); - if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { - /* The clamping to [0,1] can be done for free in the fragment - * shader with a saturate. - */ - dp->saturate = true; - } else { - /* Negating the result of the dot-product gives values on the range - * [-4, 0]. Zero stays zero, and negative values become 1.0. This - * achieved using SLT. - */ - src_reg slt_src = result_src; - slt_src.negate = ~slt_src.negate; - emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); - } - } else { - emit_sne(ir, result_dst, op[0], op[1]); - } - break; - - case ir_binop_logic_xor: - emit_sne(ir, result_dst, op[0], op[1]); - break; - - case ir_binop_logic_or: { - if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { - /* After the addition, the value will be an integer on the - * range [0,2]. Zero stays zero, and positive values become 1.0. - */ - ir_to_mesa_instruction *add = - emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); - add->saturate = true; - } else { - /* The Boolean arguments are stored as float 0.0 and 1.0. If either - * value is 1.0, the result of the logcal-or should be 1.0. If both - * values are 0.0, the result should be 0.0. This is exactly what - * MAX does. - */ - emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); - } - break; - } - - case ir_binop_logic_and: - /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ - emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); - break; - - case ir_binop_dot: - assert(ir->operands[0]->type->is_vector()); - assert(ir->operands[0]->type == ir->operands[1]->type); - emit_dp(ir, result_dst, op[0], op[1], - ir->operands[0]->type->vector_elements); - break; - - case ir_unop_sqrt: - /* sqrt(x) = x * rsq(x). */ - emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); - emit(ir, OPCODE_MUL, result_dst, result_src, op[0]); - /* For incoming channels <= 0, set the result to 0. */ - op[0].negate = ~op[0].negate; - emit(ir, OPCODE_CMP, result_dst, - op[0], result_src, src_reg_for_float(0.0)); - break; - case ir_unop_rsq: - emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); - break; - case ir_unop_i2f: - case ir_unop_u2f: - case ir_unop_b2f: - case ir_unop_b2i: - case ir_unop_i2u: - case ir_unop_u2i: - /* Mesa IR lacks types, ints are stored as truncated floats. */ - result_src = op[0]; - break; - case ir_unop_f2i: - case ir_unop_f2u: - emit(ir, OPCODE_TRUNC, result_dst, op[0]); - break; - case ir_unop_f2b: - case ir_unop_i2b: - emit_sne(ir, result_dst, op[0], src_reg_for_float(0.0)); - break; - case ir_unop_bitcast_f2i: // Ignore these 4, they can't happen here anyway - case ir_unop_bitcast_f2u: - case ir_unop_bitcast_i2f: - case ir_unop_bitcast_u2f: - break; - case ir_unop_trunc: - emit(ir, OPCODE_TRUNC, result_dst, op[0]); - break; - case ir_unop_ceil: - op[0].negate = ~op[0].negate; - emit(ir, OPCODE_FLR, result_dst, op[0]); - result_src.negate = ~result_src.negate; - break; - case ir_unop_floor: - emit(ir, OPCODE_FLR, result_dst, op[0]); - break; - case ir_unop_fract: - emit(ir, OPCODE_FRC, result_dst, op[0]); - break; - case ir_unop_pack_snorm_2x16: - case ir_unop_pack_snorm_4x8: - case ir_unop_pack_unorm_2x16: - case ir_unop_pack_unorm_4x8: - case ir_unop_pack_half_2x16: - case ir_unop_pack_double_2x32: - case ir_unop_unpack_snorm_2x16: - case ir_unop_unpack_snorm_4x8: - case ir_unop_unpack_unorm_2x16: - case ir_unop_unpack_unorm_4x8: - case ir_unop_unpack_half_2x16: - case ir_unop_unpack_double_2x32: - case ir_unop_bitfield_reverse: - case ir_unop_bit_count: - case ir_unop_find_msb: - case ir_unop_find_lsb: - case ir_unop_d2f: - case ir_unop_f2d: - case ir_unop_d2i: - case ir_unop_i2d: - case ir_unop_d2u: - case ir_unop_u2d: - case ir_unop_d2b: - case ir_unop_frexp_sig: - case ir_unop_frexp_exp: - assert(!"not supported"); - break; - case ir_binop_min: - emit(ir, OPCODE_MIN, result_dst, op[0], op[1]); - break; - case ir_binop_max: - emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); - break; - case ir_binop_pow: - emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]); - break; - - /* GLSL 1.30 integer ops are unsupported in Mesa IR, but since - * hardware backends have no way to avoid Mesa IR generation - * even if they don't use it, we need to emit "something" and - * continue. - */ - case ir_binop_lshift: - case ir_binop_rshift: - case ir_binop_bit_and: - case ir_binop_bit_xor: - case ir_binop_bit_or: - emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); - break; - - case ir_unop_bit_not: - case ir_unop_round_even: - emit(ir, OPCODE_MOV, result_dst, op[0]); - break; - - case ir_binop_ubo_load: - assert(!"not supported"); - break; - - case ir_triop_lrp: - /* ir_triop_lrp operands are (x, y, a) while - * OPCODE_LRP operands are (a, y, x) to match ARB_fragment_program. - */ - emit(ir, OPCODE_LRP, result_dst, op[2], op[1], op[0]); - break; - - case ir_triop_csel: - /* We assume that boolean true and false are 1.0 and 0.0. OPCODE_CMP - * selects src1 if src0 is < 0, src2 otherwise. - */ - op[0].negate = ~op[0].negate; - emit(ir, OPCODE_CMP, result_dst, op[0], op[1], op[2]); - break; - - case ir_binop_vector_extract: - case ir_triop_fma: - case ir_triop_bitfield_extract: - case ir_triop_vector_insert: - case ir_quadop_bitfield_insert: - case ir_binop_ldexp: - case ir_binop_carry: - case ir_binop_borrow: - case ir_binop_abs_sub: - case ir_binop_add_sat: - case ir_binop_sub_sat: - case ir_binop_avg: - case ir_binop_avg_round: - case ir_binop_mul_32x16: - case ir_binop_imul_high: - case ir_unop_interpolate_at_centroid: - case ir_binop_interpolate_at_offset: - case ir_binop_interpolate_at_sample: - case ir_unop_dFdx_coarse: - case ir_unop_dFdx_fine: - case ir_unop_dFdy_coarse: - case ir_unop_dFdy_fine: - case ir_unop_subroutine_to_int: - case ir_unop_get_buffer_size: - case ir_unop_bitcast_u642d: - case ir_unop_bitcast_i642d: - case ir_unop_bitcast_d2u64: - case ir_unop_bitcast_d2i64: - case ir_unop_i642i: - case ir_unop_u642i: - case ir_unop_i642u: - case ir_unop_u642u: - case ir_unop_i642b: - case ir_unop_i642f: - case ir_unop_u642f: - case ir_unop_i642d: - case ir_unop_u642d: - case ir_unop_i2i64: - case ir_unop_u2i64: - case ir_unop_b2i64: - case ir_unop_f2i64: - case ir_unop_d2i64: - case ir_unop_i2u64: - case ir_unop_u2u64: - case ir_unop_f2u64: - case ir_unop_d2u64: - case ir_unop_u642i64: - case ir_unop_i642u64: - case ir_unop_pack_int_2x32: - case ir_unop_unpack_int_2x32: - case ir_unop_pack_uint_2x32: - case ir_unop_unpack_uint_2x32: - case ir_unop_pack_sampler_2x32: - case ir_unop_unpack_sampler_2x32: - case ir_unop_pack_image_2x32: - case ir_unop_unpack_image_2x32: - case ir_unop_atan: - case ir_binop_atan2: - case ir_unop_clz: - case ir_unop_f162f: - case ir_unop_f2f16: - case ir_unop_f2fmp: - case ir_unop_f162b: - case ir_unop_b2f16: - case ir_unop_i2i: - case ir_unop_i2imp: - case ir_unop_u2u: - case ir_unop_u2ump: - assert(!"not supported"); - break; - - case ir_unop_ssbo_unsized_array_length: - case ir_unop_implicitly_sized_array_length: - case ir_quadop_vector: - /* This operation should have already been handled. - */ - assert(!"Should not get here."); - break; - } - - this->result = result_src; -} - - -void -ir_to_mesa_visitor::visit(ir_swizzle *ir) -{ - src_reg src; - int i; - int swizzle[4] = {0}; - - /* Note that this is only swizzles in expressions, not those on the left - * hand side of an assignment, which do write masking. See ir_assignment - * for that. - */ - - ir->val->accept(this); - src = this->result; - assert(src.file != PROGRAM_UNDEFINED); - assert(ir->type->vector_elements > 0); - - for (i = 0; i < 4; i++) { - if (i < ir->type->vector_elements) { - switch (i) { - case 0: - swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); - break; - case 1: - swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); - break; - case 2: - swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); - break; - case 3: - swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); - break; - } - } else { - /* If the type is smaller than a vec4, replicate the last - * channel out. - */ - swizzle[i] = swizzle[ir->type->vector_elements - 1]; - } - } - - src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); - - this->result = src; -} - -void -ir_to_mesa_visitor::visit(ir_dereference_variable *ir) -{ - variable_storage *entry = find_variable_storage(ir->var); - ir_variable *var = ir->var; - - if (!entry) { - switch (var->data.mode) { - case ir_var_uniform: - entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, - var->data.param_index); - this->variables.push_tail(entry); - break; - case ir_var_shader_in: - /* The linker assigns locations for varyings and attributes, - * including deprecated builtins (like gl_Color), - * user-assigned generic attributes (glBindVertexLocation), - * and user-defined varyings. - */ - assert(var->data.location != -1); - entry = new(mem_ctx) variable_storage(var, - PROGRAM_INPUT, - var->data.location); - break; - case ir_var_shader_out: - assert(var->data.location != -1); - entry = new(mem_ctx) variable_storage(var, - PROGRAM_OUTPUT, - var->data.location); - break; - case ir_var_system_value: - entry = new(mem_ctx) variable_storage(var, - PROGRAM_SYSTEM_VALUE, - var->data.location); - break; - case ir_var_auto: - case ir_var_temporary: - entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, - this->next_temp); - this->variables.push_tail(entry); - - next_temp += type_size(var->type); - break; - } - - if (!entry) { - printf("Failed to make storage for %s\n", var->name); - exit(1); - } - } - - this->result = src_reg(entry->file, entry->index, var->type); -} - -void -ir_to_mesa_visitor::visit(ir_dereference_array *ir) -{ - ir_constant *index; - src_reg src; - int element_size = type_size(ir->type); - - index = ir->array_index->constant_expression_value(ralloc_parent(ir)); - - ir->array->accept(this); - src = this->result; - - if (index) { - src.index += index->value.i[0] * element_size; - } else { - /* Variable index array dereference. It eats the "vec4" of the - * base of the array and an index that offsets the Mesa register - * index. - */ - ir->array_index->accept(this); - - src_reg index_reg; - - if (element_size == 1) { - index_reg = this->result; - } else { - index_reg = get_temp(glsl_type::float_type); - - emit(ir, OPCODE_MUL, dst_reg(index_reg), - this->result, src_reg_for_float(element_size)); - } - - /* If there was already a relative address register involved, add the - * new and the old together to get the new offset. - */ - if (src.reladdr != NULL) { - src_reg accum_reg = get_temp(glsl_type::float_type); - - emit(ir, OPCODE_ADD, dst_reg(accum_reg), - index_reg, *src.reladdr); - - index_reg = accum_reg; - } - - src.reladdr = ralloc(mem_ctx, src_reg); - memcpy(src.reladdr, &index_reg, sizeof(index_reg)); - } - - /* If the type is smaller than a vec4, replicate the last channel out. */ - if (ir->type->is_scalar() || ir->type->is_vector()) - src.swizzle = swizzle_for_size(ir->type->vector_elements); - else - src.swizzle = SWIZZLE_NOOP; - - this->result = src; -} - -void -ir_to_mesa_visitor::visit(ir_dereference_record *ir) -{ - unsigned int i; - const glsl_type *struct_type = ir->record->type; - int offset = 0; - - ir->record->accept(this); - - assert(ir->field_idx >= 0); - for (i = 0; i < struct_type->length; i++) { - if (i == (unsigned) ir->field_idx) - break; - offset += type_size(struct_type->fields.structure[i].type); - } - - /* If the type is smaller than a vec4, replicate the last channel out. */ - if (ir->type->is_scalar() || ir->type->is_vector()) - this->result.swizzle = swizzle_for_size(ir->type->vector_elements); - else - this->result.swizzle = SWIZZLE_NOOP; - - this->result.index += offset; -} - -/** - * We want to be careful in assignment setup to hit the actual storage - * instead of potentially using a temporary like we might with the - * ir_dereference handler. - */ -static dst_reg -get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v) -{ - /* The LHS must be a dereference. If the LHS is a variable indexed array - * access of a vector, it must be separated into a series conditional moves - * before reaching this point (see ir_vec_index_to_cond_assign). - */ - assert(ir->as_dereference()); - ir_dereference_array *deref_array = ir->as_dereference_array(); - if (deref_array) { - assert(!deref_array->array->type->is_vector()); - } - - /* Use the rvalue deref handler for the most part. We'll ignore - * swizzles in it and write swizzles using writemask, though. - */ - ir->accept(v); - return dst_reg(v->result); -} - -/* Calculate the sampler index and also calculate the base uniform location - * for struct members. - */ -static void -calc_sampler_offsets(struct gl_shader_program *prog, ir_dereference *deref, - unsigned *offset, unsigned *array_elements, - unsigned *location) -{ - if (deref->ir_type == ir_type_dereference_variable) - return; - - switch (deref->ir_type) { - case ir_type_dereference_array: { - ir_dereference_array *deref_arr = deref->as_dereference_array(); - - void *mem_ctx = ralloc_parent(deref_arr); - ir_constant *array_index = - deref_arr->array_index->constant_expression_value(mem_ctx); - - if (!array_index) { - /* GLSL 1.10 and 1.20 allowed variable sampler array indices, - * while GLSL 1.30 requires that the array indices be - * constant integer expressions. We don't expect any driver - * to actually work with a really variable array index, so - * all that would work would be an unrolled loop counter that ends - * up being constant above. - */ - ralloc_strcat(&prog->data->InfoLog, - "warning: Variable sampler array index unsupported.\n" - "This feature of the language was removed in GLSL 1.20 " - "and is unlikely to be supported for 1.10 in Mesa.\n"); - } else { - *offset += array_index->value.u[0] * *array_elements; - } - - *array_elements *= deref_arr->array->type->length; - - calc_sampler_offsets(prog, deref_arr->array->as_dereference(), - offset, array_elements, location); - break; - } - - case ir_type_dereference_record: { - ir_dereference_record *deref_record = deref->as_dereference_record(); - unsigned field_index = deref_record->field_idx; - *location += - deref_record->record->type->struct_location_offset(field_index); - calc_sampler_offsets(prog, deref_record->record->as_dereference(), - offset, array_elements, location); - break; - } - - default: - unreachable("Invalid deref type"); - break; - } -} - -static int -get_sampler_uniform_value(class ir_dereference *sampler, - struct gl_shader_program *shader_program, - const struct gl_program *prog) -{ - GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target); - ir_variable *var = sampler->variable_referenced(); - unsigned location = var->data.location; - unsigned array_elements = 1; - unsigned offset = 0; - - calc_sampler_offsets(shader_program, sampler, &offset, &array_elements, - &location); - - assert(shader_program->data->UniformStorage[location].opaque[shader].active); - return shader_program->data->UniformStorage[location].opaque[shader].index + - offset; -} - -/** - * Process the condition of a conditional assignment - * - * Examines the condition of a conditional assignment to generate the optimal - * first operand of a \c CMP instruction. If the condition is a relational - * operator with 0 (e.g., \c ir_binop_less), the value being compared will be - * used as the source for the \c CMP instruction. Otherwise the comparison - * is processed to a boolean result, and the boolean result is used as the - * operand to the CMP instruction. - */ -bool -ir_to_mesa_visitor::process_move_condition(ir_rvalue *ir) -{ - ir_rvalue *src_ir = ir; - bool negate = true; - bool switch_order = false; - - ir_expression *const expr = ir->as_expression(); - if ((expr != NULL) && (expr->num_operands == 2)) { - bool zero_on_left = false; - - if (expr->operands[0]->is_zero()) { - src_ir = expr->operands[1]; - zero_on_left = true; - } else if (expr->operands[1]->is_zero()) { - src_ir = expr->operands[0]; - zero_on_left = false; - } - - /* a is - 0 + - 0 + - * (a < 0) T F F ( a < 0) T F F - * (0 < a) F F T (-a < 0) F F T - * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) - * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) - * - * Note that exchanging the order of 0 and 'a' in the comparison simply - * means that the value of 'a' should be negated. - */ - if (src_ir != ir) { - switch (expr->operation) { - case ir_binop_less: - switch_order = false; - negate = zero_on_left; - break; - - case ir_binop_gequal: - switch_order = true; - negate = zero_on_left; - break; - - default: - /* This isn't the right kind of comparison afterall, so make sure - * the whole condition is visited. - */ - src_ir = ir; - break; - } - } - } - - src_ir->accept(this); - - /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the - * condition we produced is 0.0 or 1.0. By flipping the sign, we can - * choose which value OPCODE_CMP produces without an extra instruction - * computing the condition. - */ - if (negate) - this->result.negate = ~this->result.negate; - - return switch_order; -} - -void -ir_to_mesa_visitor::visit(ir_assignment *ir) -{ - dst_reg l; - src_reg r; - int i; - - ir->rhs->accept(this); - r = this->result; - - l = get_assignment_lhs(ir->lhs, this); - - /* FINISHME: This should really set to the correct maximal writemask for each - * FINISHME: component written (in the loops below). This case can only - * FINISHME: occur for matrices, arrays, and structures. - */ - if (ir->write_mask == 0) { - assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); - l.writemask = WRITEMASK_XYZW; - } else if (ir->lhs->type->is_scalar()) { - /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the - * FINISHME: W component of fragment shader output zero, work correctly. - */ - l.writemask = WRITEMASK_XYZW; - } else { - int swizzles[4]; - int first_enabled_chan = 0; - int rhs_chan = 0; - - assert(ir->lhs->type->is_vector()); - l.writemask = ir->write_mask; - - for (int i = 0; i < 4; i++) { - if (l.writemask & (1 << i)) { - first_enabled_chan = GET_SWZ(r.swizzle, i); - break; - } - } - - /* Swizzle a small RHS vector into the channels being written. - * - * glsl ir treats write_mask as dictating how many channels are - * present on the RHS while Mesa IR treats write_mask as just - * showing which channels of the vec4 RHS get written. - */ - for (int i = 0; i < 4; i++) { - if (l.writemask & (1 << i)) - swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); - else - swizzles[i] = first_enabled_chan; - } - r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], - swizzles[2], swizzles[3]); - } - - assert(l.file != PROGRAM_UNDEFINED); - assert(r.file != PROGRAM_UNDEFINED); - - if (ir->condition) { - const bool switch_order = this->process_move_condition(ir->condition); - src_reg condition = this->result; - - for (i = 0; i < type_size(ir->lhs->type); i++) { - if (switch_order) { - emit(ir, OPCODE_CMP, l, condition, src_reg(l), r); - } else { - emit(ir, OPCODE_CMP, l, condition, r, src_reg(l)); - } - - l.index++; - r.index++; - } - } else { - for (i = 0; i < type_size(ir->lhs->type); i++) { - emit(ir, OPCODE_MOV, l, r); - l.index++; - r.index++; - } - } -} - - -void -ir_to_mesa_visitor::visit(ir_constant *ir) -{ - src_reg src; - GLfloat stack_vals[4] = { 0 }; - GLfloat *values = stack_vals; - unsigned int i; - - /* Unfortunately, 4 floats is all we can get into - * _mesa_add_unnamed_constant. So, make a temp to store an - * aggregate constant and move each constant value into it. If we - * get lucky, copy propagation will eliminate the extra moves. - */ - - if (ir->type->is_struct()) { - src_reg temp_base = get_temp(ir->type); - dst_reg temp = dst_reg(temp_base); - - for (i = 0; i < ir->type->length; i++) { - ir_constant *const field_value = ir->get_record_field(i); - int size = type_size(field_value->type); - - assert(size > 0); - - field_value->accept(this); - src = this->result; - - for (unsigned j = 0; j < (unsigned int)size; j++) { - emit(ir, OPCODE_MOV, temp, src); - - src.index++; - temp.index++; - } - } - this->result = temp_base; - return; - } - - if (ir->type->is_array()) { - src_reg temp_base = get_temp(ir->type); - dst_reg temp = dst_reg(temp_base); - int size = type_size(ir->type->fields.array); - - assert(size > 0); - - for (i = 0; i < ir->type->length; i++) { - ir->const_elements[i]->accept(this); - src = this->result; - for (int j = 0; j < size; j++) { - emit(ir, OPCODE_MOV, temp, src); - - src.index++; - temp.index++; - } - } - this->result = temp_base; - return; - } - - if (ir->type->is_matrix()) { - src_reg mat = get_temp(ir->type); - dst_reg mat_column = dst_reg(mat); - - for (i = 0; i < ir->type->matrix_columns; i++) { - assert(ir->type->is_float()); - values = &ir->value.f[i * ir->type->vector_elements]; - - src = src_reg(PROGRAM_CONSTANT, -1, NULL); - src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - (gl_constant_value *) values, - ir->type->vector_elements, - &src.swizzle); - emit(ir, OPCODE_MOV, mat_column, src); - - mat_column.index++; - } - - this->result = mat; - return; - } - - src.file = PROGRAM_CONSTANT; - switch (ir->type->base_type) { - case GLSL_TYPE_FLOAT: - values = &ir->value.f[0]; - break; - case GLSL_TYPE_UINT: - for (i = 0; i < ir->type->vector_elements; i++) { - values[i] = ir->value.u[i]; - } - break; - case GLSL_TYPE_INT: - for (i = 0; i < ir->type->vector_elements; i++) { - values[i] = ir->value.i[i]; - } - break; - case GLSL_TYPE_BOOL: - for (i = 0; i < ir->type->vector_elements; i++) { - values[i] = ir->value.b[i]; - } - break; - default: - assert(!"Non-float/uint/int/bool constant"); - } - - this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type); - this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, - (gl_constant_value *) values, - ir->type->vector_elements, - &this->result.swizzle); -} - -void -ir_to_mesa_visitor::visit(ir_call *) -{ - assert(!"ir_to_mesa: All function calls should have been inlined by now."); -} - -void -ir_to_mesa_visitor::visit(ir_texture *ir) -{ - src_reg result_src, coord, lod_info, projector, dx, dy; - dst_reg result_dst, coord_dst; - ir_to_mesa_instruction *inst = NULL; - prog_opcode opcode = OPCODE_NOP; - - if (ir->op == ir_txs) - this->result = src_reg_for_float(0.0); - else - ir->coordinate->accept(this); - - /* Put our coords in a temp. We'll need to modify them for shadow, - * projection, or LOD, so the only case we'd use it as-is is if - * we're doing plain old texturing. Mesa IR optimization should - * handle cleaning up our mess in that case. - */ - coord = get_temp(glsl_type::vec4_type); - coord_dst = dst_reg(coord); - emit(ir, OPCODE_MOV, coord_dst, this->result); - - if (ir->projector) { - ir->projector->accept(this); - projector = this->result; - } - - /* Storage for our result. Ideally for an assignment we'd be using - * the actual storage for the result here, instead. - */ - result_src = get_temp(glsl_type::vec4_type); - result_dst = dst_reg(result_src); - - switch (ir->op) { - case ir_tex: - case ir_txs: - opcode = OPCODE_TEX; - break; - case ir_txb: - opcode = OPCODE_TXB; - ir->lod_info.bias->accept(this); - lod_info = this->result; - break; - case ir_txf: - /* Pretend to be TXL so the sampler, coordinate, lod are available */ - case ir_txl: - opcode = OPCODE_TXL; - ir->lod_info.lod->accept(this); - lod_info = this->result; - break; - case ir_txd: - opcode = OPCODE_TXD; - ir->lod_info.grad.dPdx->accept(this); - dx = this->result; - ir->lod_info.grad.dPdy->accept(this); - dy = this->result; - break; - case ir_txf_ms: - assert(!"Unexpected ir_txf_ms opcode"); - break; - case ir_lod: - assert(!"Unexpected ir_lod opcode"); - break; - case ir_tg4: - assert(!"Unexpected ir_tg4 opcode"); - break; - case ir_query_levels: - assert(!"Unexpected ir_query_levels opcode"); - break; - case ir_samples_identical: - unreachable("Unexpected ir_samples_identical opcode"); - case ir_texture_samples: - unreachable("Unexpected ir_texture_samples opcode"); - } - - const glsl_type *sampler_type = ir->sampler->type; - - if (ir->projector) { - if (opcode == OPCODE_TEX) { - /* Slot the projector in as the last component of the coord. */ - coord_dst.writemask = WRITEMASK_W; - emit(ir, OPCODE_MOV, coord_dst, projector); - coord_dst.writemask = WRITEMASK_XYZW; - opcode = OPCODE_TXP; - } else { - src_reg coord_w = coord; - coord_w.swizzle = SWIZZLE_WWWW; - - /* For the other TEX opcodes there's no projective version - * since the last slot is taken up by lod info. Do the - * projective divide now. - */ - coord_dst.writemask = WRITEMASK_W; - emit(ir, OPCODE_RCP, coord_dst, projector); - - /* In the case where we have to project the coordinates "by hand," - * the shadow comparator value must also be projected. - */ - src_reg tmp_src = coord; - if (ir->shadow_comparator) { - /* Slot the shadow value in as the second to last component of the - * coord. - */ - ir->shadow_comparator->accept(this); - - tmp_src = get_temp(glsl_type::vec4_type); - dst_reg tmp_dst = dst_reg(tmp_src); - - /* Projective division not allowed for array samplers. */ - assert(!sampler_type->sampler_array); - - tmp_dst.writemask = WRITEMASK_Z; - emit(ir, OPCODE_MOV, tmp_dst, this->result); - - tmp_dst.writemask = WRITEMASK_XY; - emit(ir, OPCODE_MOV, tmp_dst, coord); - } - - coord_dst.writemask = WRITEMASK_XYZ; - emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w); - - coord_dst.writemask = WRITEMASK_XYZW; - coord.swizzle = SWIZZLE_XYZW; - } - } - - /* If projection is done and the opcode is not OPCODE_TXP, then the shadow - * comparator was put in the correct place (and projected) by the code, - * above, that handles by-hand projection. - */ - if (ir->shadow_comparator && (!ir->projector || opcode == OPCODE_TXP)) { - /* Slot the shadow value in as the second to last component of the - * coord. - */ - ir->shadow_comparator->accept(this); - - /* XXX This will need to be updated for cubemap array samplers. */ - if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && - sampler_type->sampler_array) { - coord_dst.writemask = WRITEMASK_W; - } else { - coord_dst.writemask = WRITEMASK_Z; - } - - emit(ir, OPCODE_MOV, coord_dst, this->result); - coord_dst.writemask = WRITEMASK_XYZW; - } - - if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) { - /* Mesa IR stores lod or lod bias in the last channel of the coords. */ - coord_dst.writemask = WRITEMASK_W; - emit(ir, OPCODE_MOV, coord_dst, lod_info); - coord_dst.writemask = WRITEMASK_XYZW; - } - - if (opcode == OPCODE_TXD) - inst = emit(ir, opcode, result_dst, coord, dx, dy); - else - inst = emit(ir, opcode, result_dst, coord); - - if (ir->shadow_comparator) - inst->tex_shadow = GL_TRUE; - - inst->sampler = get_sampler_uniform_value(ir->sampler, shader_program, - prog); - - switch (sampler_type->sampler_dimensionality) { - case GLSL_SAMPLER_DIM_1D: - inst->tex_target = (sampler_type->sampler_array) - ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; - break; - case GLSL_SAMPLER_DIM_2D: - inst->tex_target = (sampler_type->sampler_array) - ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; - break; - case GLSL_SAMPLER_DIM_3D: - inst->tex_target = TEXTURE_3D_INDEX; - break; - case GLSL_SAMPLER_DIM_CUBE: - inst->tex_target = TEXTURE_CUBE_INDEX; - break; - case GLSL_SAMPLER_DIM_RECT: - inst->tex_target = TEXTURE_RECT_INDEX; - break; - case GLSL_SAMPLER_DIM_BUF: - assert(!"FINISHME: Implement ARB_texture_buffer_object"); - break; - case GLSL_SAMPLER_DIM_EXTERNAL: - inst->tex_target = TEXTURE_EXTERNAL_INDEX; - break; - default: - assert(!"Should not get here."); - } - - this->result = result_src; -} - -void -ir_to_mesa_visitor::visit(ir_return *ir) -{ - /* Non-void functions should have been inlined. We may still emit RETs - * from main() unless the EmitNoMainReturn option is set. - */ - assert(!ir->get_value()); - emit(ir, OPCODE_RET); -} - -void -ir_to_mesa_visitor::visit(ir_discard *ir) -{ - if (!ir->condition) - ir->condition = new(mem_ctx) ir_constant(true); - - ir->condition->accept(this); - this->result.negate = ~this->result.negate; - emit(ir, OPCODE_KIL, undef_dst, this->result); -} - -void -ir_to_mesa_visitor::visit(ir_demote *ir) -{ - assert(!"demote statement unsupported"); -} - -void -ir_to_mesa_visitor::visit(ir_if *ir) -{ - ir_to_mesa_instruction *if_inst; - - ir->condition->accept(this); - assert(this->result.file != PROGRAM_UNDEFINED); - - if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result); - - this->instructions.push_tail(if_inst); - - visit_exec_list(&ir->then_instructions, this); - - if (!ir->else_instructions.is_empty()) { - emit(ir->condition, OPCODE_ELSE); - visit_exec_list(&ir->else_instructions, this); - } - - emit(ir->condition, OPCODE_ENDIF); -} - -void -ir_to_mesa_visitor::visit(ir_emit_vertex *) -{ - assert(!"Geometry shaders not supported."); -} - -void -ir_to_mesa_visitor::visit(ir_end_primitive *) -{ - assert(!"Geometry shaders not supported."); -} - -void -ir_to_mesa_visitor::visit(ir_barrier *) -{ - unreachable("GLSL barrier() not supported."); -} - -ir_to_mesa_visitor::ir_to_mesa_visitor() -{ - result.file = PROGRAM_UNDEFINED; - next_temp = 1; - next_signature_id = 1; - current_function = NULL; - mem_ctx = ralloc_context(NULL); - ctx = NULL; - prog = NULL; - shader_program = NULL; - options = NULL; -} - -ir_to_mesa_visitor::~ir_to_mesa_visitor() -{ - ralloc_free(mem_ctx); -} - -static struct prog_src_register -mesa_src_reg_from_ir_src_reg(src_reg reg) -{ - struct prog_src_register mesa_reg; - - mesa_reg.File = reg.file; - assert(reg.index < (1 << INST_INDEX_BITS)); - mesa_reg.Index = reg.index; - mesa_reg.Swizzle = reg.swizzle; - mesa_reg.RelAddr = reg.reladdr != NULL; - mesa_reg.Negate = reg.negate; - - return mesa_reg; -} - -static void -set_branchtargets(ir_to_mesa_visitor *v, - struct prog_instruction *mesa_instructions, - int num_instructions) -{ - int if_count = 0, loop_count = 0; - int *if_stack, *loop_stack; - int if_stack_pos = 0, loop_stack_pos = 0; - int i, j; - - for (i = 0; i < num_instructions; i++) { - switch (mesa_instructions[i].Opcode) { - case OPCODE_IF: - if_count++; - break; - case OPCODE_BGNLOOP: - loop_count++; - break; - case OPCODE_BRK: - case OPCODE_CONT: - mesa_instructions[i].BranchTarget = -1; - break; - default: - break; - } - } - - if_stack = rzalloc_array(v->mem_ctx, int, if_count); - loop_stack = rzalloc_array(v->mem_ctx, int, loop_count); - - for (i = 0; i < num_instructions; i++) { - switch (mesa_instructions[i].Opcode) { - case OPCODE_IF: - if_stack[if_stack_pos] = i; - if_stack_pos++; - break; - case OPCODE_ELSE: - mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; - if_stack[if_stack_pos - 1] = i; - break; - case OPCODE_ENDIF: - mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; - if_stack_pos--; - break; - case OPCODE_BGNLOOP: - loop_stack[loop_stack_pos] = i; - loop_stack_pos++; - break; - case OPCODE_ENDLOOP: - loop_stack_pos--; - /* Rewrite any breaks/conts at this nesting level (haven't - * already had a BranchTarget assigned) to point to the end - * of the loop. - */ - for (j = loop_stack[loop_stack_pos]; j < i; j++) { - if (mesa_instructions[j].Opcode == OPCODE_BRK || - mesa_instructions[j].Opcode == OPCODE_CONT) { - if (mesa_instructions[j].BranchTarget == -1) { - mesa_instructions[j].BranchTarget = i; - } - } - } - /* The loop ends point at each other. */ - mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos]; - mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i; - break; - case OPCODE_CAL: - foreach_in_list(function_entry, entry, &v->function_signatures) { - if (entry->sig_id == mesa_instructions[i].BranchTarget) { - mesa_instructions[i].BranchTarget = entry->inst; - break; - } - } - break; - default: - break; - } - } -} - -static void -print_program(struct prog_instruction *mesa_instructions, - ir_instruction **mesa_instruction_annotation, - int num_instructions) -{ - ir_instruction *last_ir = NULL; - int i; - int indent = 0; - - for (i = 0; i < num_instructions; i++) { - struct prog_instruction *mesa_inst = mesa_instructions + i; - ir_instruction *ir = mesa_instruction_annotation[i]; - - fprintf(stdout, "%3d: ", i); - - if (last_ir != ir && ir) { - int j; - - for (j = 0; j < indent; j++) { - fprintf(stdout, " "); - } - ir->print(); - printf("\n"); - last_ir = ir; - - fprintf(stdout, " "); /* line number spacing. */ - } - - indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent, - PROG_PRINT_DEBUG, NULL); - } -} - namespace { class add_uniform_to_shader : public program_resource_visitor { @@ -2636,369 +361,6 @@ _mesa_ensure_and_associate_uniform_storage(struct gl_context *ctx, associate_uniform_storage(ctx, shader_program, prog); } -/* - * On a basic block basis, tracks available PROGRAM_TEMPORARY register - * channels for copy propagation and updates following instructions to - * use the original versions. - * - * The ir_to_mesa_visitor lazily produces code assuming that this pass - * will occur. As an example, a TXP production before this pass: - * - * 0: MOV TEMP[1], INPUT[4].xyyy; - * 1: MOV TEMP[1].w, INPUT[4].wwww; - * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; - * - * and after: - * - * 0: MOV TEMP[1], INPUT[4].xyyy; - * 1: MOV TEMP[1].w, INPUT[4].wwww; - * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; - * - * which allows for dead code elimination on TEMP[1]'s writes. - */ -void -ir_to_mesa_visitor::copy_propagate(void) -{ - ir_to_mesa_instruction **acp = rzalloc_array(mem_ctx, - ir_to_mesa_instruction *, - this->next_temp * 4); - int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); - int level = 0; - - foreach_in_list(ir_to_mesa_instruction, inst, &this->instructions) { - assert(inst->dst.file != PROGRAM_TEMPORARY - || inst->dst.index < this->next_temp); - - /* First, do any copy propagation possible into the src regs. */ - for (int r = 0; r < 3; r++) { - ir_to_mesa_instruction *first = NULL; - bool good = true; - int acp_base = inst->src[r].index * 4; - - if (inst->src[r].file != PROGRAM_TEMPORARY || - inst->src[r].reladdr) - continue; - - /* See if we can find entries in the ACP consisting of MOVs - * from the same src register for all the swizzled channels - * of this src register reference. - */ - for (int i = 0; i < 4; i++) { - int src_chan = GET_SWZ(inst->src[r].swizzle, i); - ir_to_mesa_instruction *copy_chan = acp[acp_base + src_chan]; - - if (!copy_chan) { - good = false; - break; - } - - assert(acp_level[acp_base + src_chan] <= level); - - if (!first) { - first = copy_chan; - } else { - if (first->src[0].file != copy_chan->src[0].file || - first->src[0].index != copy_chan->src[0].index) { - good = false; - break; - } - } - } - - if (good) { - /* We've now validated that we can copy-propagate to - * replace this src register reference. Do it. - */ - inst->src[r].file = first->src[0].file; - inst->src[r].index = first->src[0].index; - - int swizzle = 0; - for (int i = 0; i < 4; i++) { - int src_chan = GET_SWZ(inst->src[r].swizzle, i); - ir_to_mesa_instruction *copy_inst = acp[acp_base + src_chan]; - swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << - (3 * i)); - } - inst->src[r].swizzle = swizzle; - } - } - - switch (inst->op) { - case OPCODE_BGNLOOP: - case OPCODE_ENDLOOP: - /* End of a basic block, clear the ACP entirely. */ - memset(acp, 0, sizeof(*acp) * this->next_temp * 4); - break; - - case OPCODE_IF: - ++level; - break; - - case OPCODE_ENDIF: - case OPCODE_ELSE: - /* Clear all channels written inside the block from the ACP, but - * leaving those that were not touched. - */ - for (int r = 0; r < this->next_temp; r++) { - for (int c = 0; c < 4; c++) { - if (!acp[4 * r + c]) - continue; - - if (acp_level[4 * r + c] >= level) - acp[4 * r + c] = NULL; - } - } - if (inst->op == OPCODE_ENDIF) - --level; - break; - - default: - /* Continuing the block, clear any written channels from - * the ACP. - */ - if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { - /* Any temporary might be written, so no copy propagation - * across this instruction. - */ - memset(acp, 0, sizeof(*acp) * this->next_temp * 4); - } else if (inst->dst.file == PROGRAM_OUTPUT && - inst->dst.reladdr) { - /* Any output might be written, so no copy propagation - * from outputs across this instruction. - */ - for (int r = 0; r < this->next_temp; r++) { - for (int c = 0; c < 4; c++) { - if (!acp[4 * r + c]) - continue; - - if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) - acp[4 * r + c] = NULL; - } - } - } else if (inst->dst.file == PROGRAM_TEMPORARY || - inst->dst.file == PROGRAM_OUTPUT) { - /* Clear where it's used as dst. */ - if (inst->dst.file == PROGRAM_TEMPORARY) { - for (int c = 0; c < 4; c++) { - if (inst->dst.writemask & (1 << c)) { - acp[4 * inst->dst.index + c] = NULL; - } - } - } - - /* Clear where it's used as src. */ - for (int r = 0; r < this->next_temp; r++) { - for (int c = 0; c < 4; c++) { - if (!acp[4 * r + c]) - continue; - - int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); - - if (acp[4 * r + c]->src[0].file == inst->dst.file && - acp[4 * r + c]->src[0].index == inst->dst.index && - inst->dst.writemask & (1 << src_chan)) - { - acp[4 * r + c] = NULL; - } - } - } - } - break; - } - - /* If this is a copy, add it to the ACP. */ - if (inst->op == OPCODE_MOV && - inst->dst.file == PROGRAM_TEMPORARY && - !(inst->dst.file == inst->src[0].file && - inst->dst.index == inst->src[0].index) && - !inst->dst.reladdr && - !inst->saturate && - !inst->src[0].reladdr && - !inst->src[0].negate) { - for (int i = 0; i < 4; i++) { - if (inst->dst.writemask & (1 << i)) { - acp[4 * inst->dst.index + i] = inst; - acp_level[4 * inst->dst.index + i] = level; - } - } - } - } - - ralloc_free(acp_level); - ralloc_free(acp); -} - - -/** - * Convert a shader's GLSL IR into a Mesa gl_program. - */ -static struct gl_program * -get_mesa_program(struct gl_context *ctx, - struct gl_shader_program *shader_program, - struct gl_linked_shader *shader) -{ - ir_to_mesa_visitor v; - struct prog_instruction *mesa_instructions, *mesa_inst; - ir_instruction **mesa_instruction_annotation; - int i; - struct gl_program *prog; - GLenum target = _mesa_shader_stage_to_program(shader->Stage); - const char *target_string = _mesa_shader_stage_to_string(shader->Stage); - struct gl_shader_compiler_options *options = - &ctx->Const.ShaderCompilerOptions[shader->Stage]; - - validate_ir_tree(shader->ir); - - prog = shader->Program; - prog->Parameters = _mesa_new_parameter_list(); - v.ctx = ctx; - v.prog = prog; - v.shader_program = shader_program; - v.options = options; - - _mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader, - prog->Parameters); - - /* Emit Mesa IR for main(). */ - visit_exec_list(shader->ir, &v); - v.emit(NULL, OPCODE_END); - - prog->arb.NumTemporaries = v.next_temp; - - unsigned num_instructions = v.instructions.length(); - - mesa_instructions = rzalloc_array(prog, struct prog_instruction, - num_instructions); - mesa_instruction_annotation = ralloc_array(v.mem_ctx, ir_instruction *, - num_instructions); - - v.copy_propagate(); - - /* Convert ir_mesa_instructions into prog_instructions. - */ - mesa_inst = mesa_instructions; - i = 0; - foreach_in_list(const ir_to_mesa_instruction, inst, &v.instructions) { - mesa_inst->Opcode = inst->op; - if (inst->saturate) - mesa_inst->Saturate = GL_TRUE; - mesa_inst->DstReg.File = inst->dst.file; - mesa_inst->DstReg.Index = inst->dst.index; - mesa_inst->DstReg.WriteMask = inst->dst.writemask; - mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL; - mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src[0]); - mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src[1]); - mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src[2]); - mesa_inst->TexSrcUnit = inst->sampler; - mesa_inst->TexSrcTarget = inst->tex_target; - mesa_inst->TexShadow = inst->tex_shadow; - mesa_instruction_annotation[i] = inst->ir; - - /* Set IndirectRegisterFiles. */ - if (mesa_inst->DstReg.RelAddr) - prog->arb.IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File; - - /* Update program's bitmask of indirectly accessed register files */ - for (unsigned src = 0; src < 3; src++) - if (mesa_inst->SrcReg[src].RelAddr) - prog->arb.IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; - - switch (mesa_inst->Opcode) { - case OPCODE_IF: - if (options->MaxIfDepth == 0) { - linker_warning(shader_program, - "Couldn't flatten if-statement. " - "This will likely result in software " - "rasterization.\n"); - } - break; - case OPCODE_BGNLOOP: - if (options->EmitNoLoops) { - linker_warning(shader_program, - "Couldn't unroll loop. " - "This will likely result in software " - "rasterization.\n"); - } - break; - case OPCODE_CONT: - if (options->EmitNoCont) { - linker_warning(shader_program, - "Couldn't lower continue-statement. " - "This will likely result in software " - "rasterization.\n"); - } - break; - case OPCODE_ARL: - prog->arb.NumAddressRegs = 1; - break; - default: - break; - } - - mesa_inst++; - i++; - - if (!shader_program->data->LinkStatus) - break; - } - - if (!shader_program->data->LinkStatus) { - goto fail_exit; - } - - set_branchtargets(&v, mesa_instructions, num_instructions); - - if (ctx->_Shader->Flags & GLSL_DUMP) { - fprintf(stderr, "\n"); - fprintf(stderr, "GLSL IR for linked %s program %d:\n", target_string, - shader_program->Name); - _mesa_print_ir(stderr, shader->ir, NULL); - fprintf(stderr, "\n"); - fprintf(stderr, "\n"); - fprintf(stderr, "Mesa IR for linked %s program %d:\n", target_string, - shader_program->Name); - print_program(mesa_instructions, mesa_instruction_annotation, - num_instructions); - fflush(stderr); - } - - prog->arb.Instructions = mesa_instructions; - prog->arb.NumInstructions = num_instructions; - - /* Setting this to NULL prevents a possible double free in the fail_exit - * path (far below). - */ - mesa_instructions = NULL; - - do_set_program_inouts(shader->ir, prog, shader->Stage); - - prog->ShadowSamplers = shader->shadow_samplers; - prog->ExternalSamplersUsed = gl_external_samplers(prog); - _mesa_update_shader_textures_used(shader_program, prog); - - /* Set the gl_FragDepth layout. */ - if (target == GL_FRAGMENT_PROGRAM_ARB) { - prog->info.fs.depth_layout = shader_program->FragDepthLayout; - } - - _mesa_optimize_program(prog, prog); - - /* This has to be done last. Any operation that can cause - * prog->ParameterValues to get reallocated (e.g., anything that adds a - * program constant) has to happen before creating this linkage. - */ - associate_uniform_storage(ctx, shader_program, prog); - if (!shader_program->data->LinkStatus) { - goto fail_exit; - } - - return prog; - -fail_exit: - ralloc_free(mesa_instructions); - _mesa_reference_program(ctx, &shader->Program, NULL); - return NULL; -} extern "C" { |