diff options
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_action.c')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_action.c | 1182 |
1 files changed, 1182 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_action.c b/src/gallium/auxiliary/gallivm/lp_bld_action.c new file mode 100644 index 00000000000..0b6cc77bb7c --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_action.c @@ -0,0 +1,1182 @@ +/************************************************************************** + * + * Copyright 2010-2011 Advanced Micro Devices, Inc. + * Copyright 2009 VMware, Inc. + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * TGSI to LLVM IR translation. + * + * @author Jose Fonseca <jfonseca@vmware.com> + * @author Tom Stellard <thomas.stellard@amd.com> + * + * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, + * Brian Paul, and others. + */ + + +#include "lp_bld_action.h" + +#include "lp_bld_tgsi.h" +#include "lp_bld_arit.h" +#include "lp_bld_const.h" +#include "lp_bld_gather.h" +#include "lp_bld_logic.h" + +#include "tgsi/tgsi_exec.h" + +/* XXX: The CPU only defaults should be repaced by generic ones. In most + * cases, the CPU defaults are just wrappers around a function in + * lp_build_arit.c and these functions should be inlined here and the CPU + * generic code should be removed and placed elsewhere. + */ + +/* Default actions */ + +/* Generic fetch_arg functions */ + +static void scalar_unary_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + /* src0.x */ + emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0); + emit_data->arg_count = 1; + emit_data->dst_type = LLVMTypeOf(emit_data->args[0]); +} + +static void scalar_binary_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + /* src0.x */ + emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_X); + /* src1.x */ + emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, + 1, TGSI_CHAN_X); + emit_data->arg_count = 2; + emit_data->dst_type = LLVMTypeOf(emit_data->args[0]); +} + +/* TGSI_OPCODE_ADD */ +static void +add_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = LLVMBuildFAdd( + bld_base->base.gallivm->builder, + emit_data->args[0], emit_data->args[1], ""); +} + +/* TGSI_OPCODE_ARR */ +static void +arr_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_emit_llvm_unary(bld_base, + TGSI_OPCODE_ROUND, emit_data->args[0]); +} + +/* TGSI_OPCODE_CLAMP */ +static void +clamp_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp; + tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX, + emit_data->args[0], + emit_data->args[1]); + emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_MIN, tmp, emit_data->args[2]); +} + +/* DP* Helper */ + +static void +dp_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data, + unsigned dp_components) +{ + unsigned chan, src; + for (src = 0; src < 2; src++) { + for (chan = 0; chan < dp_components; chan++) { + emit_data->args[(src * dp_components) + chan] = + lp_build_emit_fetch(bld_base, emit_data->inst, src, chan); + } + } + emit_data->dst_type = bld_base->base.elem_type; +} + +/* TGSI_OPCODE_DP2 */ +static void +dp2_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + dp_fetch_args(bld_base, emit_data, 2); +} + +static void +dp2_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp0, tmp1; + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[0] /* src0.x */, + emit_data->args[2] /* src1.x */); + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[1] /* src0.y */, + emit_data->args[3] /* src1.y */); + emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_ADD, tmp0, tmp1); +} + +static struct lp_build_opcode_action dp2_action = { + .fetch_args = dp2_fetch_args, + .emit = dp2_emit +}; + +/* TGSI_OPCODE_DP2A */ +static void +dp2a_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + dp_fetch_args(bld_base, emit_data, 2); + emit_data->args[5] = lp_build_emit_fetch(bld_base, emit_data->inst, + 2, TGSI_CHAN_X); +} + +static void +dp2a_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp; + tmp = lp_build_emit_llvm(bld_base, TGSI_OPCODE_DP2, emit_data); + emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, + emit_data->args[5], tmp); +} + +static struct lp_build_opcode_action dp2a_action = { + .fetch_args = dp2a_fetch_args, + .emit = dp2a_emit +}; + +/* TGSI_OPCODE_DP3 */ +static void +dp3_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + dp_fetch_args(bld_base, emit_data, 3); +} + +static void +dp3_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp0, tmp1; + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[0] /* src0.x */, + emit_data->args[3] /* src1.x */); + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[1] /* src0.y */, + emit_data->args[4] /* src1.y */); + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp1, tmp0); + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[2] /* src0.z */, + emit_data->args[5] /* src1.z */); + emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_ADD, tmp0, tmp1); +} + +static struct lp_build_opcode_action dp3_action = { + .fetch_args = dp3_fetch_args, + .emit = dp3_emit +}; + +/* TGSI_OPCODDE_DP4 */ + +static void +dp4_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + dp_fetch_args(bld_base, emit_data, 4); +} + +static void +dp4_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp0, tmp1; + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[0] /* src0.x */, + emit_data->args[4] /* src1.x */); + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[1] /* src0.y */, + emit_data->args[5] /* src1.y */); + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1); + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[2] /* src0.z */, + emit_data->args[6] /* src1.z */); + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_ADD, tmp0, tmp1); + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[3] /* src0.w */, + emit_data->args[7] /* src1.w */); + emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_ADD, tmp0, tmp1); +} + +static struct lp_build_opcode_action dp4_action = { + .fetch_args = dp4_fetch_args, + .emit = dp4_emit +}; + +/* TGSI_OPCODE_DPH */ +static void +dph_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + dp_fetch_args(bld_base, emit_data, 4); + /* src0.w */ + emit_data->args[3] = bld_base->base.one; +} + +const struct lp_build_opcode_action dph_action = { + .fetch_args = dph_fetch_args, + .emit = dp4_emit +}; + +/* TGSI_OPCODE_DST */ +static void +dst_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + /* src0.y */ + emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_Y); + /* src0.z */ + emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_Z); + /* src1.y */ + emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, + 1, TGSI_CHAN_Y); + /* src1.w */ + emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst, + 1, TGSI_CHAN_W); +} + +static void +dst_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + /* dst.x */ + emit_data->output[TGSI_CHAN_X] = bld_base->base.one; + + /* dst.y */ + emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_MUL, + emit_data->args[0] /* src0.y */, + emit_data->args[2] /* src1.y */); + /* dst.z */ + emit_data->output[TGSI_CHAN_Z] = emit_data->args[1]; /* src0.z */ + + /* dst.w */ + emit_data->output[TGSI_CHAN_W] = emit_data->args[3]; /* src1.w */ +} + +static struct lp_build_opcode_action dst_action = { + .fetch_args = dst_fetch_args, + .emit = dst_emit +}; + +/* TGSI_OPCODE_END */ +static void +end_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + bld_base->pc = -1; +} + +/* TGSI_OPCODE_EXP */ + +static void +exp_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef floor_x; + + /* floor( src0.x ) */ + floor_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, + emit_data->args[0]); + + /* 2 ^ floor( src0.x ) */ + emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base, + TGSI_OPCODE_EX2, floor_x); + + /* src0.x - floor( src0.x ) */ + emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_SUB, emit_data->args[0] /* src0.x */, floor_x); + + /* 2 ^ src0.x */ + emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_unary(bld_base, + TGSI_OPCODE_EX2, emit_data->args[0] /* src0.x */); + + emit_data->output[TGSI_CHAN_W] = bld_base->base.one; +} + +const struct lp_build_opcode_action exp_action = { + .fetch_args = scalar_unary_fetch_args, + .emit = exp_emit +}; + +/* TGSI_OPCODE_FRC */ + +static void +frc_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp; + tmp = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, + emit_data->args[0]); + emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_SUB, emit_data->args[0], tmp); +} + +/* TGSI_OPCODE_KIL */ + +static void +kil_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + /* src0.x */ + emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_X); + /* src0.y */ + emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_Y); + /* src0.z */ + emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_Z); + /* src0.w */ + emit_data->args[3] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, TGSI_CHAN_W); + emit_data->arg_count = 4; + emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context); +} + +/* TGSI_OPCODE_KILP */ + +static void +kilp_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context); +} + +/* TGSI_OPCODE_LIT */ + +static void +lit_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + /* src0.x */ + emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X); + /* src0.y */ + emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_Y); + /* src0.w */ + emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); + emit_data->arg_count = 3; +} + +static void +lit_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp0, tmp2; + + /* dst.x */ + emit_data->output[TGSI_CHAN_X] = bld_base->base.one; + + /* dst. y */ + emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_MAX, + emit_data->args[0] /* src0.x */, + bld_base->base.zero); + + /* dst.z */ + /* XMM[1] = SrcReg[0].yyyy */ + LLVMValueRef tmp1 = emit_data->args[1]; + /* XMM[1] = max(XMM[1], 0) */ + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX, + tmp1, bld_base->base.zero); + /* XMM[2] = SrcReg[0].wwww */ + tmp2 = emit_data->args[2]; + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_POW, + tmp1, tmp2); + tmp0 = emit_data->args[0]; + emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_ternary(bld_base, + TGSI_OPCODE_CMP, + tmp0, bld_base->base.zero, tmp1); + /* dst.w */ + emit_data->output[TGSI_CHAN_W] = bld_base->base.one; +} + +static struct lp_build_opcode_action lit_action = { + .fetch_args = lit_fetch_args, + .emit = lit_emit +}; + +/* TGSI_OPCODE_LOG */ + +static void +log_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + + LLVMValueRef abs_x, log_abs_x, flr_log_abs_x, ex2_flr_log_abs_x; + + /* abs( src0.x) */ + abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS, + emit_data->args[0] /* src0.x */); + + /* log( abs( src0.x ) ) */ + log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_LG2, + abs_x); + + /* floor( log( abs( src0.x ) ) ) */ + flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, + log_abs_x); + /* dst.x */ + emit_data->output[TGSI_CHAN_X] = flr_log_abs_x; + + /* dst.y */ + ex2_flr_log_abs_x = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_EX2, + flr_log_abs_x); + + /* abs( src0.x ) / 2^( floor( lg2( abs( src0.x ) ) ) ) */ + emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_DIV, abs_x, ex2_flr_log_abs_x); + + /* dst.x */ + emit_data->output[TGSI_CHAN_Z] = log_abs_x; + + /* dst.w */ + emit_data->output[TGSI_CHAN_W] = bld_base->base.one; +} + +static struct lp_build_opcode_action log_action = { + .fetch_args = scalar_unary_fetch_args, + .emit = log_emit +}; + +/* TGSI_OPCODE_LRP */ + +static void +lrp_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp; + tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB, + emit_data->args[1], + emit_data->args[2]); + emit_data->output[emit_data->chan] = lp_build_emit_llvm_ternary(bld_base, + TGSI_OPCODE_MAD, emit_data->args[0], tmp, emit_data->args[2]); +} + +/* TGSI_OPCODE_MAD */ + +static void +mad_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef tmp; + tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, + emit_data->args[0], + emit_data->args[1]); + emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_ADD, tmp, emit_data->args[2]); +} + +/* TGSI_OPCODE_MOV */ + +static void +mov_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = emit_data->args[0]; +} + +/* TGSI_OPCODE_MUL */ +static void +mul_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_mul(&bld_base->base, + emit_data->args[0], emit_data->args[1]); +} + +/* TGSI_OPCODE_POW */ + +static void +pow_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base, + emit_data->args[0], emit_data->args[1]); +} + +static struct lp_build_opcode_action pow_action = { + .fetch_args = scalar_binary_fetch_args, + .emit = pow_emit +}; + +/* TGSI_OPCODE_RSQ */ + +static void +rsq_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->args[0] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS, + emit_data->args[0]); + if (bld_base->rsq_action.emit) { + bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base, emit_data); + } else { + emit_data->output[emit_data->chan] = bld_base->base.undef; + } +} + +const struct lp_build_opcode_action rsq_action = { + .fetch_args = scalar_unary_fetch_args, + .emit = rsq_emit + +}; + +/* TGSI_OPCODE_SCS */ +static void +scs_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + /* dst.x */ + emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base, + TGSI_OPCODE_COS, emit_data->args[0]); + /* dst.y */ + emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_unary(bld_base, + TGSI_OPCODE_SIN, emit_data->args[0]); + /* dst.z */ + emit_data->output[TGSI_CHAN_Z] = bld_base->base.zero; + + /* dst.w */ + emit_data->output[TGSI_CHAN_W] = bld_base->base.one; +} + +const struct lp_build_opcode_action scs_action = { + .fetch_args = scalar_unary_fetch_args, + .emit = scs_emit +}; + +/* TGSI_OPCODE_SFL */ + +static void +sfl_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = bld_base->base.zero; +} + +/* TGSI_OPCODE_STR */ + +static void +str_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = bld_base->base.one; +} + +/* TGSI_OPCODE_SUB */ +static void +sub_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = LLVMBuildFSub( + bld_base->base.gallivm->builder, + emit_data->args[0], + emit_data->args[1], ""); +} + +/* TGSI_OPCODE_XPD */ + +static void +xpd_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + dp_fetch_args(bld_base, emit_data, 3); +} + +/** + * (a * b) - (c * d) + */ +static LLVMValueRef +xpd_helper( + struct lp_build_tgsi_context * bld_base, + LLVMValueRef a, + LLVMValueRef b, + LLVMValueRef c, + LLVMValueRef d) +{ + LLVMValueRef tmp0, tmp1; + + tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, a, b); + tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, c, d); + + return lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB, tmp0, tmp1); +} + +static void +xpd_emit( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[TGSI_CHAN_X] = xpd_helper(bld_base, + emit_data->args[1] /* src0.y */, emit_data->args[5] /* src1.z */, + emit_data->args[4] /* src1.y */, emit_data->args[2] /* src0.z */); + + emit_data->output[TGSI_CHAN_Y] = xpd_helper(bld_base, + emit_data->args[2] /* src0.z */, emit_data->args[3] /* src1.x */, + emit_data->args[5] /* src1.z */, emit_data->args[0] /* src0.x */); + + emit_data->output[TGSI_CHAN_Z] = xpd_helper(bld_base, + emit_data->args[0] /* src0.x */, emit_data->args[4] /* src1.y */, + emit_data->args[3] /* src1.x */, emit_data->args[1] /* src0.y */); + + emit_data->output[TGSI_CHAN_W] = bld_base->base.one; +} + +const struct lp_build_opcode_action xpd_action = { + .fetch_args = xpd_fetch_args, + .emit = xpd_emit +}; + +void +lp_set_default_actions(struct lp_build_tgsi_context * bld_base) +{ + bld_base->op_actions[TGSI_OPCODE_DP2] = dp2_action; + bld_base->op_actions[TGSI_OPCODE_DP3] = dp3_action; + bld_base->op_actions[TGSI_OPCODE_DP4] = dp4_action; + bld_base->op_actions[TGSI_OPCODE_DP2A] = dp2a_action; + bld_base->op_actions[TGSI_OPCODE_DPH] = dph_action; + bld_base->op_actions[TGSI_OPCODE_DST] = dst_action; + bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action; + bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action; + bld_base->op_actions[TGSI_OPCODE_LOG] = log_action; + bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action; + bld_base->op_actions[TGSI_OPCODE_POW] = pow_action; + bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action; + bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action; + + bld_base->op_actions[TGSI_OPCODE_COS].fetch_args = scalar_unary_fetch_args; + bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args = scalar_unary_fetch_args; + bld_base->op_actions[TGSI_OPCODE_IF].fetch_args = scalar_unary_fetch_args; + bld_base->op_actions[TGSI_OPCODE_KIL].fetch_args = kil_fetch_args; + bld_base->op_actions[TGSI_OPCODE_KILP].fetch_args = kilp_fetch_args; + bld_base->op_actions[TGSI_OPCODE_RCP].fetch_args = scalar_unary_fetch_args; + bld_base->op_actions[TGSI_OPCODE_SIN].fetch_args = scalar_unary_fetch_args; + bld_base->op_actions[TGSI_OPCODE_LG2].fetch_args = scalar_unary_fetch_args; + + bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit; + bld_base->op_actions[TGSI_OPCODE_ARR].emit = arr_emit; + bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = clamp_emit; + bld_base->op_actions[TGSI_OPCODE_END].emit = end_emit; + bld_base->op_actions[TGSI_OPCODE_FRC].emit = frc_emit; + bld_base->op_actions[TGSI_OPCODE_LRP].emit = lrp_emit; + bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit; + bld_base->op_actions[TGSI_OPCODE_MOV].emit = mov_emit; + bld_base->op_actions[TGSI_OPCODE_MUL].emit = mul_emit; + bld_base->op_actions[TGSI_OPCODE_SFL].emit = sfl_emit; + bld_base->op_actions[TGSI_OPCODE_STR].emit = str_emit; + bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit; +} + +/* CPU Only default actions */ + +/* These actions are CPU only, because they could potentially output SSE + * intrinsics. + */ + +/* TGSI_OPCODE_ABS (CPU Only)*/ + +static void +abs_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_ADD (CPU Only) */ +static void +add_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_add(&bld_base->base, + emit_data->args[0], emit_data->args[1]); +} + +/* TGSI_OPCODE_CEIL (CPU Only) */ +static void +ceil_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_trunc(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_CMP (CPU Only) */ +static void +cmp_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef cond = lp_build_cmp(&bld_base->base, PIPE_FUNC_LESS, + emit_data->args[0], bld_base->base.zero); + emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base, + cond, emit_data->args[1], emit_data->args[2]); +} + +/* TGSI_OPCODE_CND (CPU Only) */ +static void +cnd_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef half, tmp; + half = lp_build_const_vec(bld_base->base.gallivm, bld_base->base.type, 0.5); + tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_GREATER, + emit_data->args[2], half); + emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base, + tmp, + emit_data->args[0], + emit_data->args[1]); +} + +/* TGSI_OPCODE_COS (CPU Only) */ +static void +cos_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_cos(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_DIV (CPU Only) */ +static void +div_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_div(&bld_base->base, + emit_data->args[0], emit_data->args[1]); +} + +/* TGSI_OPCODE_EX2 (CPU Only) */ +static void +ex2_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_exp2(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_EXP (CPU Only) */ +static void +exp_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + lp_build_exp2_approx(&bld_base->base, emit_data->args[0], + &emit_data->output[TGSI_CHAN_X], + &emit_data->output[TGSI_CHAN_Y], + &emit_data->output[TGSI_CHAN_Z]); + emit_data->output[TGSI_CHAN_W] = bld_base->base.one; +} + +/* TGSI_OPCODE_FLR (CPU Only) */ + +static void +flr_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_floor(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_LG2 (CPU Only) */ +static void +lg2_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_log2(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_LOG (CPU Only) */ +static void +log_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMValueRef p_floor_log2; + LLVMValueRef p_exp; + LLVMValueRef p_log2; + LLVMValueRef src0 = emit_data->args[0]; + + lp_build_log2_approx(&bld_base->base, src0, + &p_exp, &p_floor_log2, &p_log2); + + emit_data->output[TGSI_CHAN_X] = p_floor_log2; + + emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_DIV, + src0, p_exp); + emit_data->output[TGSI_CHAN_Z] = p_log2; + + emit_data->output[TGSI_CHAN_W] = bld_base->base.one; + +} + +/* TGSI_OPCODE_MAX (CPU Only) */ + +static void +max_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_max(&bld_base->base, + emit_data->args[0], emit_data->args[1]); +} + +/* TGSI_OPCODE_MIN (CPU Only) */ +static void +min_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_min(&bld_base->base, + emit_data->args[0], emit_data->args[1]); +} + +/* TGSI_OPCODE_POW (CPU Only) */ +static void +pow_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_pow(&bld_base->base, + emit_data->args[0], emit_data->args[1]); +} + + +/* TGSI_OPCODE_RCP (CPU Only) */ + +static void +rcp_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_rcp(&bld_base->base, + emit_data->args[0]); +} + +/* Reciprical squareroot (CPU Only) */ + +/* This is not the same as TGSI_OPCODE_RSQ, which requres the argument to be + * greater than or equal to 0 */ +static void +recip_sqrt_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_rsqrt(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_ROUND (CPU Only) */ +static void +round_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_round(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_SET Helper (CPU Only) */ + +static void +set_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data, + unsigned pipe_func) +{ + LLVMValueRef cond = lp_build_cmp(&bld_base->base, pipe_func, + emit_data->args[0], emit_data->args[1]); + emit_data->output[emit_data->chan] = lp_build_select(&bld_base->base, + cond, + bld_base->base.one, + bld_base->base.zero); +} + +/* TGSI_OPCODE_SEQ (CPU Only) */ + +static void +seq_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL); +} + +/* TGSI_OPCODE_SGE (CPU Only) */ +static void +sge_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL); +} + +/* TGSI_OPCODE_SGT (CPU Only)*/ + +static void +sgt_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GREATER); +} + +/* TGSI_OPCODE_SIN (CPU Only) */ +static void +sin_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_sin(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_SLE (CPU Only) */ +static void +sle_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LEQUAL); +} + +/* TGSI_OPCODE_SLT (CPU Only) */ + +static void +slt_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS); +} + +/* TGSI_OPCODE_SNE (CPU Only) */ + +static void +sne_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + set_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL); +} + +/* TGSI_OPCODE_SSG (CPU Only) */ + +static void +ssg_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->base, + emit_data->args[0]); +} + +/* TGSI_OPCODE_SUB (CPU Only) */ + +static void +sub_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->base, + emit_data->args[0], + emit_data->args[1]); +} + +/* TGSI_OPCODE_TRUNC (CPU Only) */ + +static void +trunc_emit_cpu( + const struct lp_build_opcode_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + emit_data->output[emit_data->chan] = lp_build_trunc(&bld_base->base, + emit_data->args[0]); +} + +void +lp_set_default_actions_cpu( + struct lp_build_tgsi_context * bld_base) +{ + lp_set_default_actions(bld_base); + bld_base->op_actions[TGSI_OPCODE_ABS].emit = abs_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_ADD].emit = add_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_ARL].emit = flr_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_CEIL].emit = ceil_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_CND].emit = cnd_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_COS].emit = cos_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_EXP].emit = exp_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_MAX].emit = max_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_MIN].emit = min_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_POW].emit = pow_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_ROUND].emit = round_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_SEQ].emit = seq_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_SGE].emit = sge_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_SGT].emit = sgt_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_SIN].emit = sin_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_SLE].emit = sle_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_SLT].emit = slt_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_SNE].emit = sne_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_SSG].emit = ssg_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit_cpu; + bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = trunc_emit_cpu; + + bld_base->rsq_action.emit = recip_sqrt_emit_cpu; +} |