/************************************************************************** * * Copyright 2012 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ #include "pipe/p_state.h" #include "util/u_debug.h" #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_arit.h" #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_swizzle.h" #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_pack.h" #include "lp_bld_blend.h" /** * Is (a OP b) == (b OP a)? */ boolean lp_build_blend_func_commutative(unsigned func) { switch (func) { case PIPE_BLEND_ADD: case PIPE_BLEND_MIN: case PIPE_BLEND_MAX: return TRUE; case PIPE_BLEND_SUBTRACT: case PIPE_BLEND_REVERSE_SUBTRACT: return FALSE; default: assert(0); return TRUE; } } /** * Whether the blending functions are the reverse of each other. */ boolean lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func) { if (rgb_func == alpha_func) return FALSE; if (rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT) return TRUE; if (rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT) return TRUE; return FALSE; } /** * Whether the blending factors are complementary of each other. */ static inline boolean lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor) { STATIC_ASSERT((PIPE_BLENDFACTOR_ZERO ^ 0x10) == PIPE_BLENDFACTOR_ONE); STATIC_ASSERT((PIPE_BLENDFACTOR_CONST_COLOR ^ 0x10) == PIPE_BLENDFACTOR_INV_CONST_COLOR); return dst_factor == (src_factor ^ 0x10); } /** * Whether this is a inverse blend factor */ static inline boolean is_inverse_factor(unsigned factor) { STATIC_ASSERT(PIPE_BLENDFACTOR_ZERO == 0x11); return factor > 0x11; } /** * Calculates the (expanded to wider type) multiplication * of 2 normalized numbers. */ static void lp_build_mul_norm_expand(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b, LLVMValueRef *resl, LLVMValueRef *resh, boolean signedness_differs) { const struct lp_type type = bld->type; struct lp_type wide_type = lp_wider_type(type); struct lp_type wide_type2 = wide_type; struct lp_type type2 = type; LLVMValueRef al, ah, bl, bh; assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); assert(!type.floating && !type.fixed && type.norm); if (a == bld->zero || b == bld->zero) { LLVMValueRef zero = LLVMConstNull(lp_build_vec_type(bld->gallivm, wide_type)); *resl = zero; *resh = zero; return; } if (signedness_differs) { type2.sign = !type.sign; wide_type2.sign = !wide_type2.sign; } lp_build_unpack2_native(bld->gallivm, type, wide_type, a, &al, &ah); lp_build_unpack2_native(bld->gallivm, type2, wide_type2, b, &bl, &bh); *resl = lp_build_mul_norm(bld->gallivm, wide_type, al, bl); *resh = lp_build_mul_norm(bld->gallivm, wide_type, ah, bh); } /** * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml */ LLVMValueRef lp_build_blend_func(struct lp_build_context *bld, unsigned func, LLVMValueRef term1, LLVMValueRef term2) { switch (func) { case PIPE_BLEND_ADD: return lp_build_add(bld, term1, term2); case PIPE_BLEND_SUBTRACT: return lp_build_sub(bld, term1, term2); case PIPE_BLEND_REVERSE_SUBTRACT: return lp_build_sub(bld, term2, term1); case PIPE_BLEND_MIN: return lp_build_min(bld, term1, term2); case PIPE_BLEND_MAX: return lp_build_max(bld, term1, term2); default: assert(0); return bld->zero; } } /** * Performs optimisations and blending independent of SoA/AoS * * @param func the blend function * @param factor_src PIPE_BLENDFACTOR_xxx * @param factor_dst PIPE_BLENDFACTOR_xxx * @param src source rgba * @param dst dest rgba * @param src_factor src factor computed value * @param dst_factor dst factor computed value * @param not_alpha_dependent same factors accross all channels of src/dst * * not_alpha_dependent should be: * SoA: always true as it is only one channel at a time * AoS: rgb_src_factor == alpha_src_factor && rgb_dst_factor == alpha_dst_factor * * Note that pretty much every possible optimisation can only be done on non-unorm targets * due to unorm values not going above 1.0 meaning factorisation can change results. * e.g. (0.9 * 0.9) + (0.9 * 0.9) != 0.9 * (0.9 + 0.9) as result of + is always <= 1. */ LLVMValueRef lp_build_blend(struct lp_build_context *bld, unsigned func, unsigned factor_src, unsigned factor_dst, LLVMValueRef src, LLVMValueRef dst, LLVMValueRef src_factor, LLVMValueRef dst_factor, boolean not_alpha_dependent, boolean optimise_only) { LLVMValueRef result, src_term, dst_term; /* If we are not alpha dependent we can mess with the src/dst factors */ if (not_alpha_dependent) { if (lp_build_blend_factor_complementary(factor_src, factor_dst)) { if (func == PIPE_BLEND_ADD) { if (factor_src < factor_dst) { return lp_build_lerp(bld, src_factor, dst, src, 0); } else { return lp_build_lerp(bld, dst_factor, src, dst, 0); } } else if (bld->type.floating && func == PIPE_BLEND_SUBTRACT) { result = lp_build_add(bld, src, dst); if (factor_src < factor_dst) { result = lp_build_mul(bld, result, src_factor); return lp_build_sub(bld, result, dst); } else { result = lp_build_mul(bld, result, dst_factor); return lp_build_sub(bld, src, result); } } else if (bld->type.floating && func == PIPE_BLEND_REVERSE_SUBTRACT) { result = lp_build_add(bld, src, dst); if (factor_src < factor_dst) { result = lp_build_mul(bld, result, src_factor); return lp_build_sub(bld, dst, result); } else { result = lp_build_mul(bld, result, dst_factor); return lp_build_sub(bld, result, src); } } } if (bld->type.floating && factor_src == factor_dst) { if (func == PIPE_BLEND_ADD || func == PIPE_BLEND_SUBTRACT || func == PIPE_BLEND_REVERSE_SUBTRACT) { LLVMValueRef result; result = lp_build_blend_func(bld, func, src, dst); return lp_build_mul(bld, result, src_factor); } } } if (optimise_only) return NULL; if ((bld->type.norm && bld->type.sign) && (is_inverse_factor(factor_src) || is_inverse_factor(factor_dst))) { /* * With snorm blending, the inverse blend factors range from [0,2] * instead of [-1,1], so the ordinary signed normalized arithmetic * doesn't quite work. Unpack must be unsigned, and the add/sub * must be done with wider type. * (Note that it's not quite obvious what the blend equation wrt to * clamping should actually be based on GL spec in this case, but * really the incoming src values are clamped to [-1,1] (the dst is * always clamped already), and then NO further clamping occurs until * the end.) */ struct lp_build_context bldw; struct lp_type wide_type = lp_wider_type(bld->type); LLVMValueRef src_terml, src_termh, dst_terml, dst_termh; LLVMValueRef resl, resh; /* * We don't need saturate math for the sub/add, since we have * x+1 bit numbers in x*2 wide type (result is x+2 bits). * (Doesn't really matter on x86 sse2 though as we use saturated * intrinsics.) */ wide_type.norm = 0; lp_build_context_init(&bldw, bld->gallivm, wide_type); /* * XXX This is a bit hackish. Note that -128 really should * be -1.0, the same as -127. However, we did not actually clamp * things anywhere (relying on pack intrinsics instead) therefore * we will get -128, and the inverted factor then 255. But the mul * can overflow in this case (rather the rounding fixups for the mul, * -128*255 will be positive). * So we clamp the src and dst up here but only when necessary (we * should do this before calculating blend factors but it's enough * for avoiding overflow). */ if (is_inverse_factor(factor_src)) { src = lp_build_max(bld, src, lp_build_const_vec(bld->gallivm, bld->type, -1.0)); } if (is_inverse_factor(factor_dst)) { dst = lp_build_max(bld, dst, lp_build_const_vec(bld->gallivm, bld->type, -1.0)); } lp_build_mul_norm_expand(bld, src, src_factor, &src_terml, &src_termh, is_inverse_factor(factor_src) ? TRUE : FALSE); lp_build_mul_norm_expand(bld, dst, dst_factor, &dst_terml, &dst_termh, is_inverse_factor(factor_dst) ? TRUE : FALSE); resl = lp_build_blend_func(&bldw, func, src_terml, dst_terml); resh = lp_build_blend_func(&bldw, func, src_termh, dst_termh); /* * XXX pack2_native is not ok because the values have to be in dst * range. We need native pack though for the correct order on avx2. * Will break on everything not implementing clamping pack intrinsics * (i.e. everything but sse2 and altivec). */ return lp_build_pack2_native(bld->gallivm, wide_type, bld->type, resl, resh); } else { src_term = lp_build_mul(bld, src, src_factor); dst_term = lp_build_mul(bld, dst, dst_factor); return lp_build_blend_func(bld, func, src_term, dst_term); } } void lp_build_alpha_to_coverage(struct gallivm_state *gallivm, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, boolean do_branch) { struct lp_build_context bld; LLVMValueRef test; LLVMValueRef alpha_ref_value; lp_build_context_init(&bld, gallivm, type); alpha_ref_value = lp_build_const_vec(gallivm, type, 0.5); test = lp_build_cmp(&bld, PIPE_FUNC_GREATER, alpha, alpha_ref_value); lp_build_name(test, "alpha_to_coverage"); lp_build_mask_update(mask, test); if (do_branch) lp_build_mask_check(mask); }