diff options
Diffstat (limited to 'src/gallium/drivers/cell/ppu/cell_state_per_fragment.c')
-rw-r--r-- | src/gallium/drivers/cell/ppu/cell_state_per_fragment.c | 1432 |
1 files changed, 0 insertions, 1432 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c deleted file mode 100644 index dc33e7ccc2c..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ /dev/null @@ -1,1432 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file - * Generate code to perform all per-fragment operations. - * - * Code generated by these functions perform both alpha, depth, and stencil - * testing as well as alpha blending. - * - * \note - * Occlusion query is not supported, but this is the right place to add that - * support. - * - * \author Ian Romanick <idr@us.ibm.com> - */ - -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "cell_context.h" - -#include "rtasm/rtasm_ppc_spe.h" - - -/** - * Generate code to perform alpha testing. - * - * The code generated by this function uses the register specificed by - * \c mask as both an input and an output. - * - * \param dsa Current alpha-test state - * \param f Function to which code should be appended - * \param mask Index of register containing active fragment mask - * \param alphas Index of register containing per-fragment alpha values - * - * \note Emits a maximum of 6 instructions. - */ -static void -emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa, - struct spe_function *f, int mask, int alphas) -{ - /* If the alpha function is either NEVER or ALWAYS, there is no need to - * load the reference value into a register. ALWAYS is a fairly common - * case, and this optimization saves 2 instructions. - */ - if (dsa->alpha.enabled - && (dsa->alpha.func != PIPE_FUNC_NEVER) - && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { - int ref = spe_allocate_available_register(f); - int tmp_a = spe_allocate_available_register(f); - int tmp_b = spe_allocate_available_register(f); - union { - float f; - unsigned u; - } ref_val; - boolean complement = FALSE; - - ref_val.f = dsa->alpha.ref; - - spe_il(f, ref, ref_val.u & 0x0000ffff); - spe_ilh(f, ref, ref_val.u >> 16); - - switch (dsa->alpha.func) { - case PIPE_FUNC_NOTEQUAL: - complement = TRUE; - /* FALLTHROUGH */ - - case PIPE_FUNC_EQUAL: - spe_fceq(f, tmp_a, ref, alphas); - break; - - case PIPE_FUNC_LEQUAL: - complement = TRUE; - /* FALLTHROUGH */ - - case PIPE_FUNC_GREATER: - spe_fcgt(f, tmp_a, ref, alphas); - break; - - case PIPE_FUNC_LESS: - complement = TRUE; - /* FALLTHROUGH */ - - case PIPE_FUNC_GEQUAL: - spe_fcgt(f, tmp_a, ref, alphas); - spe_fceq(f, tmp_b, ref, alphas); - spe_or(f, tmp_a, tmp_b, tmp_a); - break; - - case PIPE_FUNC_ALWAYS: - case PIPE_FUNC_NEVER: - default: - assert(0); - break; - } - - if (complement) { - spe_andc(f, mask, mask, tmp_a); - } else { - spe_and(f, mask, mask, tmp_a); - } - - spe_release_register(f, ref); - spe_release_register(f, tmp_a); - spe_release_register(f, tmp_b); - } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) { - spe_il(f, mask, 0); - } -} - - -/** - * Generate code to perform Z testing. Four Z values are tested at once. - * \param dsa Current depth-test state - * \param f Function to which code should be appended - * \param mask Index of register to contain depth-pass mask - * \param stored Index of register containing values from depth buffer - * \param calculated Index of register containing per-fragment depth values - * - * \return - * If the calculated depth comparison mask is the actual mask, \c FALSE is - * returned. If the calculated depth comparison mask is the compliment of - * the actual mask, \c TRUE is returned. - * - * \note Emits a maximum of 3 instructions. - */ -static boolean -emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa, - struct spe_function *f, int mask, int stored, int calculated) -{ - unsigned func = (dsa->depth.enabled) - ? dsa->depth.func : PIPE_FUNC_ALWAYS; - int tmp = spe_allocate_available_register(f); - boolean compliment = FALSE; - - switch (func) { - case PIPE_FUNC_NEVER: - spe_il(f, mask, 0); - break; - - case PIPE_FUNC_NOTEQUAL: - compliment = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_EQUAL: - spe_ceq(f, mask, calculated, stored); - break; - - case PIPE_FUNC_LEQUAL: - compliment = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_GREATER: - spe_clgt(f, mask, calculated, stored); - break; - - case PIPE_FUNC_LESS: - compliment = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_GEQUAL: - spe_clgt(f, mask, calculated, stored); - spe_ceq(f, tmp, calculated, stored); - spe_or(f, mask, mask, tmp); - break; - - case PIPE_FUNC_ALWAYS: - spe_il(f, mask, ~0); - break; - - default: - assert(0); - break; - } - - spe_release_register(f, tmp); - return compliment; -} - - -/** - * Generate code to apply the stencil operation (after testing). - * \note Emits a maximum of 5 instructions. - * - * \warning - * Since \c out and \c in might be the same register, this routine cannot - * generate code that uses \c out as a temporary. - */ -static void -emit_stencil_op(struct spe_function *f, - int out, int in, int mask, unsigned op, unsigned ref) -{ - const int clamp = spe_allocate_available_register(f); - const int clamp_mask = spe_allocate_available_register(f); - const int result = spe_allocate_available_register(f); - - switch(op) { - case PIPE_STENCIL_OP_KEEP: - assert(0); - case PIPE_STENCIL_OP_ZERO: - spe_il(f, result, 0); - break; - case PIPE_STENCIL_OP_REPLACE: - spe_il(f, result, ref); - break; - case PIPE_STENCIL_OP_INCR: - /* clamp = [0xff, 0xff, 0xff, 0xff] */ - spe_il(f, clamp, 0x0ff); - /* result[i] = in[i] + 1 */ - spe_ai(f, result, in, 1); - /* clamp_mask[i] = (result[i] > 0xff) */ - spe_clgti(f, clamp_mask, result, 0x0ff); - /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */ - spe_selb(f, result, result, clamp, clamp_mask); - break; - case PIPE_STENCIL_OP_DECR: - spe_il(f, clamp, 0); - spe_ai(f, result, in, -1); - - /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned - * arithmetic. - */ - spe_clgti(f, clamp_mask, result, 0x0ff); - spe_selb(f, result, result, clamp, clamp_mask); - break; - case PIPE_STENCIL_OP_INCR_WRAP: - spe_ai(f, result, in, 1); - break; - case PIPE_STENCIL_OP_DECR_WRAP: - spe_ai(f, result, in, -1); - break; - case PIPE_STENCIL_OP_INVERT: - spe_nor(f, result, in, in); - break; - default: - assert(0); - } - - spe_selb(f, out, in, result, mask); - - spe_release_register(f, result); - spe_release_register(f, clamp_mask); - spe_release_register(f, clamp); -} - - -/** - * Generate code to do stencil test. Four pixels are tested at once. - * \param dsa Depth / stencil test state - * \param face 0 for front face, 1 for back face - * \param f Function to append instructions to - * \param mask Register containing mask of fragments passing the - * alpha test - * \param depth_mask Register containing mask of fragments passing the - * depth test - * \param depth_compliment Is \c depth_mask the compliment of the actual mask? - * \param stencil Register containing values from stencil buffer - * \param depth_pass Register to store mask of fragments passing stencil test - * and depth test - * - * \note - * Emits a maximum of 10 + (3 * 5) = 25 instructions. - */ -static int -emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, - struct pipe_stencil_ref *sr, - unsigned face, - struct spe_function *f, - int mask, - int depth_mask, - boolean depth_complement, - int stencil, - int depth_pass) -{ - int stencil_fail = spe_allocate_available_register(f); - int depth_fail = spe_allocate_available_register(f); - int stencil_mask = spe_allocate_available_register(f); - int stencil_pass = spe_allocate_available_register(f); - int face_stencil = spe_allocate_available_register(f); - int stencil_src = stencil; - const unsigned ref = (sr->ref_value[face] - & dsa->stencil[face].valuemask); - boolean complement = FALSE; - int stored; - int tmp = spe_allocate_available_register(f); - - - if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) - && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS) - && (dsa->stencil[face].valuemask != 0x0ff)) { - stored = spe_allocate_available_register(f); - spe_andi(f, stored, stencil, dsa->stencil[face].valuemask); - } else { - stored = stencil; - } - - - switch (dsa->stencil[face].func) { - case PIPE_FUNC_NEVER: - spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */ - break; - - case PIPE_FUNC_NOTEQUAL: - complement = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_EQUAL: - /* stencil_mask[i] = (stored[i] == ref) */ - spe_ceqi(f, stencil_mask, stored, ref); - break; - - case PIPE_FUNC_LEQUAL: - complement = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_GREATER: - complement = TRUE; - /* stencil_mask[i] = (stored[i] > ref) */ - spe_clgti(f, stencil_mask, stored, ref); - break; - - case PIPE_FUNC_LESS: - complement = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_GEQUAL: - /* stencil_mask[i] = (stored[i] > ref) */ - spe_clgti(f, stencil_mask, stored, ref); - /* tmp[i] = (stored[i] == ref) */ - spe_ceqi(f, tmp, stored, ref); - /* stencil_mask[i] = stencil_mask[i] | tmp[i] */ - spe_or(f, stencil_mask, stencil_mask, tmp); - break; - - case PIPE_FUNC_ALWAYS: - /* See comment below. */ - break; - - default: - assert(0); - break; - } - - if (stored != stencil) { - spe_release_register(f, stored); - } - spe_release_register(f, tmp); - - - /* ALWAYS is a very common stencil-test, so some effort is applied to - * optimize that case. The stencil-pass mask is the same as the input - * fragment mask. This makes the stencil-test (above) a no-op, and the - * input fragment mask can be "renamed" the stencil-pass mask. - */ - if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) { - spe_release_register(f, stencil_pass); - stencil_pass = mask; - } else { - if (complement) { - spe_andc(f, stencil_pass, mask, stencil_mask); - } else { - spe_and(f, stencil_pass, mask, stencil_mask); - } - } - - if (depth_complement) { - spe_andc(f, depth_pass, stencil_pass, depth_mask); - } else { - spe_and(f, depth_pass, stencil_pass, depth_mask); - } - - - /* Conditionally emit code to update the stencil value under various - * condititons. Note that there is no need to generate code under the - * following circumstances: - * - * - Stencil write mask is zero. - * - For stencil-fail if the stencil test is ALWAYS - * - For depth-fail if the stencil test is NEVER - * - For depth-pass if the stencil test is NEVER - * - Any of the 3 conditions if the operation is KEEP - */ - if (dsa->stencil[face].writemask != 0) { - if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS) - && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) { - if (complement) { - spe_and(f, stencil_fail, mask, stencil_mask); - } else { - spe_andc(f, stencil_fail, mask, stencil_mask); - } - - emit_stencil_op(f, face_stencil, stencil_src, stencil_fail, - dsa->stencil[face].fail_op, - sr->ref_value[face]); - - stencil_src = face_stencil; - } - - if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) - && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) { - if (depth_complement) { - spe_and(f, depth_fail, stencil_pass, depth_mask); - } else { - spe_andc(f, depth_fail, stencil_pass, depth_mask); - } - - emit_stencil_op(f, face_stencil, stencil_src, depth_fail, - dsa->stencil[face].zfail_op, - sr->ref_value[face]); - stencil_src = face_stencil; - } - - if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) - && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) { - emit_stencil_op(f, face_stencil, stencil_src, depth_pass, - dsa->stencil[face].zpass_op, - sr->ref_value[face]); - stencil_src = face_stencil; - } - } - - spe_release_register(f, stencil_fail); - spe_release_register(f, depth_fail); - spe_release_register(f, stencil_mask); - if (stencil_pass != mask) { - spe_release_register(f, stencil_pass); - } - - /* If all of the stencil operations were KEEP or the stencil write mask was - * zero, "stencil_src" will still be set to "stencil". In this case - * release the "face_stencil" register. Otherwise apply the stencil write - * mask to select bits from the calculated stencil value and the previous - * stencil value. - */ - if (stencil_src == stencil) { - spe_release_register(f, face_stencil); - } else if (dsa->stencil[face].writemask != 0x0ff) { - int tmp = spe_allocate_available_register(f); - - spe_il(f, tmp, dsa->stencil[face].writemask); - spe_selb(f, stencil_src, stencil, stencil_src, tmp); - - spe_release_register(f, tmp); - } - - return stencil_src; -} - - -void -cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa, - struct pipe_stencil_ref *sr) -{ - struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base; - struct spe_function *const f = &cdsa->code; - - /* This code generates a maximum of 6 (alpha test) + 3 (depth test) - * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round - * up to 64 to make it a happy power-of-two. - */ - spe_init_func(f, SPE_INST_SIZE * 64); - - - /* Allocate registers for the function's input parameters. Cleverly (and - * clever code is usually dangerous, but I couldn't resist) the generated - * function returns a structure. Returned structures start with register - * 3, and the structure fields are ordered to match up exactly with the - * input parameters. - */ - int mask = spe_allocate_register(f, 3); - int depth = spe_allocate_register(f, 4); - int stencil = spe_allocate_register(f, 5); - int zvals = spe_allocate_register(f, 6); - int frag_a = spe_allocate_register(f, 7); - int facing = spe_allocate_register(f, 8); - - int depth_mask = spe_allocate_available_register(f); - - boolean depth_complement; - - - emit_alpha_test(dsa, f, mask, frag_a); - - depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals); - - if (dsa->stencil[0].enabled) { - const int front_depth_pass = spe_allocate_available_register(f); - int front_stencil = emit_stencil_test(dsa, sr, 0, f, mask, - depth_mask, depth_complement, - stencil, front_depth_pass); - - if (dsa->stencil[1].enabled) { - const int back_depth_pass = spe_allocate_available_register(f); - int back_stencil = emit_stencil_test(dsa, sr, 1, f, mask, - depth_mask, depth_complement, - stencil, back_depth_pass); - - /* If the front facing stencil value and the back facing stencil - * value are stored in the same register, there is no need to select - * a value based on the facing. This can happen if the stencil value - * was not modified due to the write masks being zero, the stencil - * operations being KEEP, etc. - */ - if (front_stencil != back_stencil) { - spe_selb(f, stencil, back_stencil, front_stencil, facing); - } - - if (back_stencil != stencil) { - spe_release_register(f, back_stencil); - } - - if (front_stencil != stencil) { - spe_release_register(f, front_stencil); - } - - spe_selb(f, mask, back_depth_pass, front_depth_pass, facing); - - spe_release_register(f, back_depth_pass); - } else { - if (front_stencil != stencil) { - spe_or(f, stencil, front_stencil, front_stencil); - spe_release_register(f, front_stencil); - } - spe_or(f, mask, front_depth_pass, front_depth_pass); - } - - spe_release_register(f, front_depth_pass); - } else if (dsa->depth.enabled) { - if (depth_complement) { - spe_andc(f, mask, mask, depth_mask); - } else { - spe_and(f, mask, mask, depth_mask); - } - } - - if (dsa->depth.writemask) { - spe_selb(f, depth, depth, zvals, mask); - } - - spe_bi(f, 0, 0, 0); /* return from function call */ - - -#if 0 - { - const uint32_t *p = f->store; - unsigned i; - - printf("# alpha (%sabled)\n", - (dsa->alpha.enabled) ? "en" : "dis"); - printf("# func: %u\n", dsa->alpha.func); - printf("# ref: %.2f\n", dsa->alpha.ref); - - printf("# depth (%sabled)\n", - (dsa->depth.enabled) ? "en" : "dis"); - printf("# func: %u\n", dsa->depth.func); - - for (i = 0; i < 2; i++) { - printf("# %s stencil (%sabled)\n", - (i == 0) ? "front" : "back", - (dsa->stencil[i].enabled) ? "en" : "dis"); - - printf("# func: %u\n", dsa->stencil[i].func); - printf("# op (sf, zf, zp): %u %u %u\n", - dsa->stencil[i].fail_op, - dsa->stencil[i].zfail_op, - dsa->stencil[i].zpass_op); - printf("# ref value / value mask / write mask: %02x %02x %02x\n", - sr->ref_value[i], - dsa->stencil[i].valuemask, - dsa->stencil[i].writemask); - } - - printf("\t.text\n"); - for (/* empty */; p < f->csr; p++) { - printf("\t.long\t0x%04x\n", *p); - } - fflush(stdout); - } -#endif -} - - -/** - * \note Emits a maximum of 3 instructions - */ -static int -emit_alpha_factor_calculation(struct spe_function *f, - unsigned factor, - int src_alpha, int dst_alpha, int const_alpha) -{ - int factor_reg; - int tmp; - - - switch (factor) { - case PIPE_BLENDFACTOR_ONE: - factor_reg = -1; - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA: - factor_reg = spe_allocate_available_register(f); - - spe_or(f, factor_reg, src_alpha, src_alpha); - break; - - case PIPE_BLENDFACTOR_DST_ALPHA: - factor_reg = dst_alpha; - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - factor_reg = -1; - break; - - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - factor_reg = spe_allocate_available_register(f); - - tmp = spe_allocate_available_register(f); - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor_reg, tmp, const_alpha); - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_CONST_ALPHA: - factor_reg = const_alpha; - break; - - case PIPE_BLENDFACTOR_ZERO: - factor_reg = -1; - break; - - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - tmp = spe_allocate_available_register(f); - factor_reg = spe_allocate_available_register(f); - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor_reg, tmp, src_alpha); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - tmp = spe_allocate_available_register(f); - factor_reg = spe_allocate_available_register(f); - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor_reg, tmp, dst_alpha); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - default: - assert(0); - factor_reg = -1; - break; - } - - return factor_reg; -} - - -/** - * \note Emits a maximum of 6 instructions - */ -static void -emit_color_factor_calculation(struct spe_function *f, - unsigned sF, unsigned mask, - const int *src, - const int *dst, - const int *const_color, - int *factor) -{ - int tmp; - unsigned i; - - - factor[0] = -1; - factor[1] = -1; - factor[2] = -1; - factor[3] = -1; - - switch (sF) { - case PIPE_BLENDFACTOR_ONE: - break; - - case PIPE_BLENDFACTOR_SRC_COLOR: - for (i = 0; i < 3; ++i) { - if ((mask & (1U << i)) != 0) { - factor[i] = spe_allocate_available_register(f); - spe_or(f, factor[i], src[i], src[i]); - } - } - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA: - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - spe_or(f, factor[0], src[3], src[3]); - break; - - case PIPE_BLENDFACTOR_DST_ALPHA: - factor[0] = dst[3]; - factor[1] = dst[3]; - factor[2] = dst[3]; - break; - - case PIPE_BLENDFACTOR_DST_COLOR: - factor[0] = dst[0]; - factor[1] = dst[1]; - factor[2] = dst[2]; - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - tmp = spe_allocate_available_register(f); - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - /* Alpha saturate means min(As, 1-Ad). - */ - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, tmp, tmp, dst[3]); - spe_fcgt(f, factor[0], tmp, src[3]); - spe_selb(f, factor[0], src[3], tmp, factor[0]); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - tmp = spe_allocate_available_register(f); - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - - for (i = 0; i < 3; i++) { - factor[i] = spe_allocate_available_register(f); - - spe_fs(f, factor[i], tmp, const_color[i]); - } - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_CONST_COLOR: - for (i = 0; i < 3; i++) { - factor[i] = const_color[i]; - } - break; - - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - tmp = spe_allocate_available_register(f); - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor[0], tmp, const_color[3]); - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_CONST_ALPHA: - factor[0] = const_color[3]; - factor[1] = factor[0]; - factor[2] = factor[0]; - break; - - case PIPE_BLENDFACTOR_ZERO: - break; - - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - tmp = spe_allocate_available_register(f); - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - - for (i = 0; i < 3; ++i) { - if ((mask & (1U << i)) != 0) { - factor[i] = spe_allocate_available_register(f); - spe_fs(f, factor[i], tmp, src[i]); - } - } - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - tmp = spe_allocate_available_register(f); - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor[0], tmp, src[3]); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - tmp = spe_allocate_available_register(f); - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor[0], tmp, dst[3]); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_DST_COLOR: - tmp = spe_allocate_available_register(f); - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - - for (i = 0; i < 3; ++i) { - if ((mask & (1U << i)) != 0) { - factor[i] = spe_allocate_available_register(f); - spe_fs(f, factor[i], tmp, dst[i]); - } - } - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - default: - assert(0); - } -} - - -static void -emit_blend_calculation(struct spe_function *f, - unsigned func, unsigned sF, unsigned dF, - int src, int src_factor, int dst, int dst_factor) -{ - int tmp = spe_allocate_available_register(f); - - switch (func) { - case PIPE_BLEND_ADD: - if (sF == PIPE_BLENDFACTOR_ONE) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - /* Do nothing. */ - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_fa(f, src, src, dst); - } - } else if (sF == PIPE_BLENDFACTOR_ZERO) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_il(f, src, 0); - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_or(f, src, dst, dst); - } else { - spe_fm(f, src, dst, dst_factor); - } - } else if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_fm(f, src, src, src_factor); - } else { - spe_fm(f, tmp, dst, dst_factor); - spe_fma(f, src, src, src_factor, tmp); - } - break; - - case PIPE_BLEND_SUBTRACT: - if (sF == PIPE_BLENDFACTOR_ONE) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - /* Do nothing. */ - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_fs(f, src, src, dst); - } - } else if (sF == PIPE_BLENDFACTOR_ZERO) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_il(f, src, 0); - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_il(f, tmp, 0); - spe_fs(f, src, tmp, dst); - } else { - spe_fm(f, src, dst, dst_factor); - } - } else if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_fm(f, src, src, src_factor); - } else { - spe_fm(f, tmp, dst, dst_factor); - spe_fms(f, src, src, src_factor, tmp); - } - break; - - case PIPE_BLEND_REVERSE_SUBTRACT: - if (sF == PIPE_BLENDFACTOR_ONE) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_il(f, tmp, 0); - spe_fs(f, src, tmp, src); - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_fs(f, src, dst, src); - } - } else if (sF == PIPE_BLENDFACTOR_ZERO) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_il(f, src, 0); - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_or(f, src, dst, dst); - } else { - spe_fm(f, src, dst, dst_factor); - } - } else if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_fm(f, src, src, src_factor); - } else { - spe_fm(f, tmp, src, src_factor); - spe_fms(f, src, src, dst_factor, tmp); - } - break; - - case PIPE_BLEND_MIN: - spe_cgt(f, tmp, src, dst); - spe_selb(f, src, src, dst, tmp); - break; - - case PIPE_BLEND_MAX: - spe_cgt(f, tmp, src, dst); - spe_selb(f, src, dst, src, tmp); - break; - - default: - assert(0); - } - - spe_release_register(f, tmp); -} - - -/** - * Generate code to perform alpha blending on the SPE - */ -void -cell_generate_alpha_blend(struct cell_blend_state *cb) -{ - struct pipe_blend_state *const b = &cb->base; - struct spe_function *const f = &cb->code; - - /* This code generates a maximum of 3 (source alpha factor) - * + 3 (destination alpha factor) + (3 * 6) (source color factor) - * + (3 * 6) (destination color factor) + (4 * 2) (blend equation) - * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to - * make it a happy power-of-two. - */ - spe_init_func(f, SPE_INST_SIZE * 64); - - - const int frag[4] = { - spe_allocate_register(f, 3), - spe_allocate_register(f, 4), - spe_allocate_register(f, 5), - spe_allocate_register(f, 6), - }; - const int pixel[4] = { - spe_allocate_register(f, 7), - spe_allocate_register(f, 8), - spe_allocate_register(f, 9), - spe_allocate_register(f, 10), - }; - const int const_color[4] = { - spe_allocate_register(f, 11), - spe_allocate_register(f, 12), - spe_allocate_register(f, 13), - spe_allocate_register(f, 14), - }; - unsigned func[4]; - unsigned sF[4]; - unsigned dF[4]; - unsigned i; - int src_factor[4]; - int dst_factor[4]; - - - /* Does the selected blend mode make use of the source / destination - * color (RGB) blend factors? - */ - boolean need_color_factor = b->rt[0].blend_enable - && (b->rt[0].rgb_func != PIPE_BLEND_MIN) - && (b->rt[0].rgb_func != PIPE_BLEND_MAX); - - /* Does the selected blend mode make use of the source / destination - * alpha blend factors? - */ - boolean need_alpha_factor = b->rt[0].blend_enable - && (b->rt[0].alpha_func != PIPE_BLEND_MIN) - && (b->rt[0].alpha_func != PIPE_BLEND_MAX); - - - if (b->rt[0].blend_enable) { - sF[0] = b->rt[0].rgb_src_factor; - sF[1] = sF[0]; - sF[2] = sF[0]; - switch (b->rt[0].alpha_src_factor & 0x0f) { - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - sF[3] = PIPE_BLENDFACTOR_ONE; - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - case PIPE_BLENDFACTOR_DST_COLOR: - case PIPE_BLENDFACTOR_CONST_COLOR: - case PIPE_BLENDFACTOR_SRC1_COLOR: - sF[3] = b->rt[0].alpha_src_factor + 1; - break; - default: - sF[3] = b->rt[0].alpha_src_factor; - } - - dF[0] = b->rt[0].rgb_dst_factor; - dF[1] = dF[0]; - dF[2] = dF[0]; - switch (b->rt[0].alpha_dst_factor & 0x0f) { - case PIPE_BLENDFACTOR_SRC_COLOR: - case PIPE_BLENDFACTOR_DST_COLOR: - case PIPE_BLENDFACTOR_CONST_COLOR: - case PIPE_BLENDFACTOR_SRC1_COLOR: - dF[3] = b->rt[0].alpha_dst_factor + 1; - break; - default: - dF[3] = b->rt[0].alpha_dst_factor; - } - - func[0] = b->rt[0].rgb_func; - func[1] = func[0]; - func[2] = func[0]; - func[3] = b->rt[0].alpha_func; - } else { - sF[0] = PIPE_BLENDFACTOR_ONE; - sF[1] = PIPE_BLENDFACTOR_ONE; - sF[2] = PIPE_BLENDFACTOR_ONE; - sF[3] = PIPE_BLENDFACTOR_ONE; - dF[0] = PIPE_BLENDFACTOR_ZERO; - dF[1] = PIPE_BLENDFACTOR_ZERO; - dF[2] = PIPE_BLENDFACTOR_ZERO; - dF[3] = PIPE_BLENDFACTOR_ZERO; - - func[0] = PIPE_BLEND_ADD; - func[1] = PIPE_BLEND_ADD; - func[2] = PIPE_BLEND_ADD; - func[3] = PIPE_BLEND_ADD; - } - - - /* If alpha writing is enabled and the alpha blend mode requires use of - * the alpha factor, calculate the alpha factor. - */ - if (((b->rt[0].colormask & 8) != 0) && need_alpha_factor) { - src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3], - frag[3], pixel[3]); - - /* If the alpha destination blend factor is the same as the alpha source - * blend factor, re-use the previously calculated value. - */ - dst_factor[3] = (dF[3] == sF[3]) - ? src_factor[3] - : emit_alpha_factor_calculation(f, dF[3], const_color[3], - frag[3], pixel[3]); - } - - - if (sF[0] == sF[3]) { - src_factor[0] = src_factor[3]; - src_factor[1] = src_factor[3]; - src_factor[2] = src_factor[3]; - } else if (sF[0] == dF[3]) { - src_factor[0] = dst_factor[3]; - src_factor[1] = dst_factor[3]; - src_factor[2] = dst_factor[3]; - } else if (need_color_factor) { - emit_color_factor_calculation(f, - b->rt[0].rgb_src_factor, - b->rt[0].colormask, - frag, pixel, const_color, src_factor); - } - - - if (dF[0] == sF[3]) { - dst_factor[0] = src_factor[3]; - dst_factor[1] = src_factor[3]; - dst_factor[2] = src_factor[3]; - } else if (dF[0] == dF[3]) { - dst_factor[0] = dst_factor[3]; - dst_factor[1] = dst_factor[3]; - dst_factor[2] = dst_factor[3]; - } else if (dF[0] == sF[0]) { - dst_factor[0] = src_factor[0]; - dst_factor[1] = src_factor[1]; - dst_factor[2] = src_factor[2]; - } else if (need_color_factor) { - emit_color_factor_calculation(f, - b->rt[0].rgb_dst_factor, - b->rt[0].colormask, - frag, pixel, const_color, dst_factor); - } - - - - for (i = 0; i < 4; ++i) { - if ((b->rt[0].colormask & (1U << i)) != 0) { - emit_blend_calculation(f, - func[i], sF[i], dF[i], - frag[i], src_factor[i], - pixel[i], dst_factor[i]); - } - } - - spe_bi(f, 0, 0, 0); - -#if 0 - { - const uint32_t *p = f->store; - - printf("# %u instructions\n", f->csr - f->store); - printf("# blend (%sabled)\n", - (cb->base.blend_enable) ? "en" : "dis"); - printf("# RGB func / sf / df: %u %u %u\n", - cb->base.rgb_func, - cb->base.rgb_src_factor, - cb->base.rgb_dst_factor); - printf("# ALP func / sf / df: %u %u %u\n", - cb->base.alpha_func, - cb->base.alpha_src_factor, - cb->base.alpha_dst_factor); - - printf("\t.text\n"); - for (/* empty */; p < f->csr; p++) { - printf("\t.long\t0x%04x\n", *p); - } - fflush(stdout); - } -#endif -} - - -static int -PC_OFFSET(const struct spe_function *f, const void *d) -{ - const intptr_t pc = (intptr_t) &f->store[f->num_inst]; - const intptr_t ea = ~0x0f & (intptr_t) d; - - return (ea - pc) >> 2; -} - - -/** - * Generate code to perform color conversion and logic op - * - * \bug - * The code generated by this function should also perform dithering. - * - * \bug - * The code generated by this function should also perform color-write - * masking. - * - * \bug - * Only two framebuffer formats are supported at this time. - */ -void -cell_generate_logic_op(struct spe_function *f, - const struct pipe_blend_state *blend, - struct pipe_surface *surf) -{ - const unsigned logic_op = (blend->logicop_enable) - ? blend->logicop_func : PIPE_LOGICOP_COPY; - - /* This code generates a maximum of 37 instructions. An additional 32 - * bytes (equiv. to 8 instructions) are needed for data storage. Round up - * to 64 to make it a happy power-of-two. - */ - spe_init_func(f, SPE_INST_SIZE * 64); - - - /* Pixel colors in framebuffer format in AoS layout. - */ - const int pixel[4] = { - spe_allocate_register(f, 3), - spe_allocate_register(f, 4), - spe_allocate_register(f, 5), - spe_allocate_register(f, 6), - }; - - /* Fragment colors stored as floats in SoA layout. - */ - const int frag[4] = { - spe_allocate_register(f, 7), - spe_allocate_register(f, 8), - spe_allocate_register(f, 9), - spe_allocate_register(f, 10), - }; - - const int mask = spe_allocate_register(f, 11); - - - /* Short-circuit the noop and invert cases. - */ - if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->rt[0].colormask == 0)) { - spe_bi(f, 0, 0, 0); - return; - } else if (logic_op == PIPE_LOGICOP_INVERT) { - spe_nor(f, pixel[0], pixel[0], pixel[0]); - spe_nor(f, pixel[1], pixel[1], pixel[1]); - spe_nor(f, pixel[2], pixel[2], pixel[2]); - spe_nor(f, pixel[3], pixel[3], pixel[3]); - spe_bi(f, 0, 0, 0); - return; - } - - - const int tmp[4] = { - spe_allocate_available_register(f), - spe_allocate_available_register(f), - spe_allocate_available_register(f), - spe_allocate_available_register(f), - }; - - const int shuf_xpose_hi = spe_allocate_available_register(f); - const int shuf_xpose_lo = spe_allocate_available_register(f); - const int shuf_color = spe_allocate_available_register(f); - - - /* Pointer to the begining of the function's private data area. - */ - uint32_t *const data = ((uint32_t *) f->store) + (64 - 8); - - - /* Convert fragment colors to framebuffer format in AoS layout. - */ - switch (surf->format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - data[0] = 0x00010203; - data[1] = 0x10111213; - data[2] = 0x04050607; - data[3] = 0x14151617; - data[4] = 0x0c000408; - data[5] = 0x80808080; - data[6] = 0x80808080; - data[7] = 0x80808080; - break; - case PIPE_FORMAT_A8R8G8B8_UNORM: - data[0] = 0x03020100; - data[1] = 0x13121110; - data[2] = 0x07060504; - data[3] = 0x17161514; - data[4] = 0x0804000c; - data[5] = 0x80808080; - data[6] = 0x80808080; - data[7] = 0x80808080; - break; - default: - fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()"); - ASSERT(0); - } - - spe_ilh(f, tmp[0], 0x0808); - spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0)); - spe_lqr(f, shuf_color, PC_OFFSET(f, data+4)); - spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]); - - spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi); - spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo); - spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi); - spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo); - - spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi); - spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo); - spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi); - spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo); - - spe_cfltu(f, frag[0], frag[0], 32); - spe_cfltu(f, frag[1], frag[1], 32); - spe_cfltu(f, frag[2], frag[2], 32); - spe_cfltu(f, frag[3], frag[3], 32); - - spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color); - spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color); - spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color); - spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color); - - - /* If logic op is enabled, perform the requested logical operation on the - * converted fragment colors and the pixel colors. - */ - switch (logic_op) { - case PIPE_LOGICOP_CLEAR: - spe_il(f, frag[0], 0); - spe_il(f, frag[1], 0); - spe_il(f, frag[2], 0); - spe_il(f, frag[3], 0); - break; - case PIPE_LOGICOP_NOR: - spe_nor(f, frag[0], frag[0], pixel[0]); - spe_nor(f, frag[1], frag[1], pixel[1]); - spe_nor(f, frag[2], frag[2], pixel[2]); - spe_nor(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_AND_INVERTED: - spe_andc(f, frag[0], pixel[0], frag[0]); - spe_andc(f, frag[1], pixel[1], frag[1]); - spe_andc(f, frag[2], pixel[2], frag[2]); - spe_andc(f, frag[3], pixel[3], frag[3]); - break; - case PIPE_LOGICOP_COPY_INVERTED: - spe_nor(f, frag[0], frag[0], frag[0]); - spe_nor(f, frag[1], frag[1], frag[1]); - spe_nor(f, frag[2], frag[2], frag[2]); - spe_nor(f, frag[3], frag[3], frag[3]); - break; - case PIPE_LOGICOP_AND_REVERSE: - spe_andc(f, frag[0], frag[0], pixel[0]); - spe_andc(f, frag[1], frag[1], pixel[1]); - spe_andc(f, frag[2], frag[2], pixel[2]); - spe_andc(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_XOR: - spe_xor(f, frag[0], frag[0], pixel[0]); - spe_xor(f, frag[1], frag[1], pixel[1]); - spe_xor(f, frag[2], frag[2], pixel[2]); - spe_xor(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_NAND: - spe_nand(f, frag[0], frag[0], pixel[0]); - spe_nand(f, frag[1], frag[1], pixel[1]); - spe_nand(f, frag[2], frag[2], pixel[2]); - spe_nand(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_AND: - spe_and(f, frag[0], frag[0], pixel[0]); - spe_and(f, frag[1], frag[1], pixel[1]); - spe_and(f, frag[2], frag[2], pixel[2]); - spe_and(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_EQUIV: - spe_eqv(f, frag[0], frag[0], pixel[0]); - spe_eqv(f, frag[1], frag[1], pixel[1]); - spe_eqv(f, frag[2], frag[2], pixel[2]); - spe_eqv(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_OR_INVERTED: - spe_orc(f, frag[0], pixel[0], frag[0]); - spe_orc(f, frag[1], pixel[1], frag[1]); - spe_orc(f, frag[2], pixel[2], frag[2]); - spe_orc(f, frag[3], pixel[3], frag[3]); - break; - case PIPE_LOGICOP_COPY: - break; - case PIPE_LOGICOP_OR_REVERSE: - spe_orc(f, frag[0], frag[0], pixel[0]); - spe_orc(f, frag[1], frag[1], pixel[1]); - spe_orc(f, frag[2], frag[2], pixel[2]); - spe_orc(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_OR: - spe_or(f, frag[0], frag[0], pixel[0]); - spe_or(f, frag[1], frag[1], pixel[1]); - spe_or(f, frag[2], frag[2], pixel[2]); - spe_or(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_SET: - spe_il(f, frag[0], ~0); - spe_il(f, frag[1], ~0); - spe_il(f, frag[2], ~0); - spe_il(f, frag[3], ~0); - break; - - /* These two cases are short-circuited above. - */ - case PIPE_LOGICOP_INVERT: - case PIPE_LOGICOP_NOOP: - default: - assert(0); - } - - - /* Apply fragment mask. - */ - spe_ilh(f, tmp[0], 0x0000); - spe_ilh(f, tmp[1], 0x0404); - spe_ilh(f, tmp[2], 0x0808); - spe_ilh(f, tmp[3], 0x0c0c); - - spe_shufb(f, tmp[0], mask, mask, tmp[0]); - spe_shufb(f, tmp[1], mask, mask, tmp[1]); - spe_shufb(f, tmp[2], mask, mask, tmp[2]); - spe_shufb(f, tmp[3], mask, mask, tmp[3]); - - spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]); - spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]); - spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]); - spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]); - - spe_bi(f, 0, 0, 0); - -#if 0 - { - const uint32_t *p = f->store; - unsigned i; - - printf("# %u instructions\n", f->csr - f->store); - - printf("\t.text\n"); - for (i = 0; i < 64; i++) { - printf("\t.long\t0x%04x\n", p[i]); - } - fflush(stdout); - } -#endif -} |