diff options
Diffstat (limited to 'src/gallium/drivers')
27 files changed, 1058 insertions, 1019 deletions
diff --git a/src/gallium/drivers/llvmpipe/.gitignore b/src/gallium/drivers/llvmpipe/.gitignore index f6973b54d2c..21cd3cf2ed2 100644 --- a/src/gallium/drivers/llvmpipe/.gitignore +++ b/src/gallium/drivers/llvmpipe/.gitignore @@ -4,4 +4,3 @@ lp_test_blend lp_test_conv lp_test_format lp_test_printf -lp_test_round diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 26fbde9a169..ef16fc7d882 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -55,8 +55,7 @@ PROGS := lp_test_format \ lp_test_arit \ lp_test_blend \ lp_test_conv \ - lp_test_printf \ - lp_test_round + lp_test_printf # Need this for the lp_test_*.o files CLEAN_EXTRA = *.o diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 85560a1c716..cea44a78679 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -94,7 +94,6 @@ if not env['embedded']: if not env['msvc']: tests.append('arit') - tests.append('round') for test in tests: testname = 'lp_test_' + test diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 87a6a2751d4..8efa75c01d3 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -59,6 +59,7 @@ #include "pipe/p_state.h" #include "util/u_format.h" +#include "util/u_cpu_detect.h" #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_arit.h" @@ -102,7 +103,16 @@ lp_build_stencil_test_single(struct lp_build_context *bld, struct lp_type type = bld->type; LLVMValueRef res; - assert(type.sign); + /* + * SSE2 has intrinsics for signed comparisons, but not unsigned ones. Values + * are between 0..255 so ensure we generate the fastest comparisons for + * wider elements. + */ + if (type.width <= 8) { + assert(!type.sign); + } else { + assert(type.sign); + } assert(stencil->enabled); @@ -424,29 +434,86 @@ lp_build_occlusion_count(struct gallivm_state *gallivm, LLVMBuilderRef builder = gallivm->builder; LLVMContextRef context = gallivm->context; LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1); - LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv"); - LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8TypeInContext(context), 16); - LLVMValueRef counti = LLVMBuildBitCast(builder, countv, i8v16, "counti"); - LLVMValueRef maskarray[4] = { - lp_build_const_int32(gallivm, 0), - lp_build_const_int32(gallivm, 4), - lp_build_const_int32(gallivm, 8), - lp_build_const_int32(gallivm, 12) - }; - LLVMValueRef shufflemask = LLVMConstVector(maskarray, 4); - LLVMValueRef shufflev = LLVMBuildShuffleVector(builder, counti, LLVMGetUndef(i8v16), shufflemask, "shufflev"); - LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32TypeInContext(context), "shuffle"); - LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32TypeInContext(context), shuffle); - LLVMValueRef orig = LLVMBuildLoad(builder, counter, "orig"); - LLVMValueRef incr = LLVMBuildAdd(builder, orig, count, "incr"); - LLVMBuildStore(builder, incr, counter); + LLVMValueRef count, newcount; + + assert(type.length <= 16); + assert(type.floating); + + if(util_cpu_caps.has_sse && type.length == 4) { + const char *movmskintr = "llvm.x86.sse.movmsk.ps"; + const char *popcntintr = "llvm.ctpop.i32"; + LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, + lp_build_vec_type(gallivm, type), ""); + bits = lp_build_intrinsic_unary(builder, movmskintr, + LLVMInt32TypeInContext(context), bits); + count = lp_build_intrinsic_unary(builder, popcntintr, + LLVMInt32TypeInContext(context), bits); + } + else if(util_cpu_caps.has_avx && type.length == 8) { + const char *movmskintr = "llvm.x86.avx.movmsk.ps.256"; + const char *popcntintr = "llvm.ctpop.i32"; + LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, + lp_build_vec_type(gallivm, type), ""); + bits = lp_build_intrinsic_unary(builder, movmskintr, + LLVMInt32TypeInContext(context), bits); + count = lp_build_intrinsic_unary(builder, popcntintr, + LLVMInt32TypeInContext(context), bits); + } + else { + unsigned i; + LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv"); + LLVMTypeRef counttype = LLVMIntTypeInContext(context, type.length * 8); + LLVMTypeRef i8vntype = LLVMVectorType(LLVMInt8TypeInContext(context), type.length * 4); + LLVMValueRef shufflev, countd; + LLVMValueRef shuffles[16]; + const char *popcntintr = NULL; + + countv = LLVMBuildBitCast(builder, countv, i8vntype, ""); + + for (i = 0; i < type.length; i++) { + shuffles[i] = lp_build_const_int32(gallivm, 4*i); + } + + shufflev = LLVMConstVector(shuffles, type.length); + countd = LLVMBuildShuffleVector(builder, countv, LLVMGetUndef(i8vntype), shufflev, ""); + countd = LLVMBuildBitCast(builder, countd, counttype, "countd"); + + /* + * XXX FIXME + * this is bad on cpus without popcount (on x86 supported by intel + * nehalem, amd barcelona, and up - not tied to sse42). + * Would be much faster to just sum the 4 elements of the vector with + * some horizontal add (shuffle/add/shuffle/add after the initial and). + */ + switch (type.length) { + case 4: + popcntintr = "llvm.ctpop.i32"; + break; + case 8: + popcntintr = "llvm.ctpop.i64"; + break; + case 16: + popcntintr = "llvm.ctpop.i128"; + break; + default: + assert(0); + } + count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd); + + if (type.length > 4) { + count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 32), ""); + } + } + newcount = LLVMBuildLoad(builder, counter, "origcount"); + newcount = LLVMBuildAdd(builder, newcount, count, "newcount"); + LLVMBuildStore(builder, newcount, counter); } /** * Generate code for performing depth and/or stencil tests. - * We operate on a vector of values (typically a 2x2 quad). + * We operate on a vector of values (typically n 2x2 quads). * * \param depth the depth test state * \param stencil the front/back stencil state @@ -454,9 +521,9 @@ lp_build_occlusion_count(struct gallivm_state *gallivm, * \param format_desc description of the depth/stencil surface * \param mask the alive/dead pixel mask for the quad (vector) * \param stencil_refs the front/back stencil ref values (scalar) - * \param z_src the incoming depth/stencil values (a 2x2 quad, float32) + * \param z_src the incoming depth/stencil values (n 2x2 quad values, float32) * \param zs_dst_ptr pointer to depth/stencil values in framebuffer - * \param facing contains boolean value indicating front/back facing polygon + * \param face contains boolean value indicating front/back facing polygon */ void lp_build_depth_stencil_test(struct gallivm_state *gallivm, @@ -507,6 +574,12 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, assert(z_type.width == z_src_type.width); assert(z_type.length == z_src_type.length); + /* FIXME: for non-float depth/stencil might generate better code + * if we'd always split it up to use 128bit operations. + * For stencil we'd almost certainly want to pack to 8xi16 values, + * for z just run twice. + */ + /* Sanity checking */ { const unsigned z_swizzle = format_desc->swizzle[0]; @@ -548,7 +621,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, lp_build_context_init(&z_bld, gallivm, z_type); /* Setup build context for stencil vals */ - s_type = lp_type_int_vec(z_type.width); + s_type = lp_int_type(z_type); lp_build_context_init(&s_bld, gallivm, s_type); /* Load current z/stencil value from z/stencil buffer */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 0d51ccb0349..d108f35f719 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -61,6 +61,9 @@ * # | # | # * ################# * + * If we iterate over multiple quads at once, quads 01 and 23 are processed + * together. + * * Within each quad, we have four pixels which are represented in SOA * order: * @@ -72,6 +75,10 @@ * * So the green channel (for example) of the four pixels is stored in * a single vector register: {g0, g1, g2, g3}. + * The order stays the same even with multiple quads: + * 0 1 4 5 + * 2 3 6 7 + * is stored as g0..g7 */ @@ -102,8 +109,8 @@ #define PERSPECTIVE_DIVIDE_PER_QUAD 0 -static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; -static const unsigned char quad_offset_y[4] = {0, 0, 1, 1}; +static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3}; +static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3}; static void @@ -115,132 +122,353 @@ attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix); } - -/** - * Initialize the bld->a0, dadx, dady fields. This involves fetching - * those values from the arrays which are passed into the JIT function. +/* Much easier, and significantly less instructions in the per-stamp + * part (less than half) but overall more instructions so a loss if + * most quads are active. Might be a win though with larger vectors. + * No ability to do per-quad divide (doable but not implemented) + * Could be made to work with passed in pixel offsets (i.e. active quad merging). */ static void -coeffs_init(struct lp_build_interp_soa_context *bld, - LLVMValueRef a0_ptr, - LLVMValueRef dadx_ptr, - LLVMValueRef dady_ptr) +coeffs_init_simple(struct lp_build_interp_soa_context *bld, + LLVMValueRef a0_ptr, + LLVMValueRef dadx_ptr, + LLVMValueRef dady_ptr) { struct lp_build_context *coeff_bld = &bld->coeff_bld; + struct lp_build_context *setup_bld = &bld->setup_bld; struct gallivm_state *gallivm = coeff_bld->gallivm; LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef zero = LLVMConstNull(coeff_bld->elem_type); - LLVMValueRef one = LLVMConstReal(coeff_bld->elem_type, 1.0); - LLVMValueRef i0 = lp_build_const_int32(gallivm, 0); - LLVMValueRef i1 = lp_build_const_int32(gallivm, 1); - LLVMValueRef i2 = lp_build_const_int32(gallivm, 2); - LLVMValueRef i3 = lp_build_const_int32(gallivm, 3); unsigned attrib; - unsigned chan; - - /* TODO: Use more vector operations */ for (attrib = 0; attrib < bld->num_attribs; ++attrib) { + /* + * always fetch all 4 values for performance/simplicity + * Note: we do that here because it seems to generate better + * code. It generates a lot of moves initially but less + * moves later. As far as I can tell this looks like a + * llvm issue, instead of simply reloading the values from + * the passed in pointers it if it runs out of registers + * it spills/reloads them. Maybe some optimization passes + * would help. + * Might want to investigate this again later. + */ + const unsigned interp = bld->interp[attrib]; + LLVMValueRef index = lp_build_const_int32(gallivm, + attrib * TGSI_NUM_CHANNELS); + LLVMValueRef ptr; + LLVMValueRef dadxaos = setup_bld->zero; + LLVMValueRef dadyaos = setup_bld->zero; + LLVMValueRef a0aos = setup_bld->zero; + + switch (interp) { + case LP_INTERP_PERSPECTIVE: + /* fall-through */ + + case LP_INTERP_LINEAR: + ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""); + ptr = LLVMBuildBitCast(builder, ptr, + LLVMPointerType(setup_bld->vec_type, 0), ""); + dadxaos = LLVMBuildLoad(builder, ptr, ""); + + ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, ""); + ptr = LLVMBuildBitCast(builder, ptr, + LLVMPointerType(setup_bld->vec_type, 0), ""); + dadyaos = LLVMBuildLoad(builder, ptr, ""); + + attrib_name(dadxaos, attrib, 0, ".dadxaos"); + attrib_name(dadyaos, attrib, 0, ".dadyaos"); + /* fall-through */ + + case LP_INTERP_CONSTANT: + case LP_INTERP_FACING: + ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, ""); + ptr = LLVMBuildBitCast(builder, ptr, + LLVMPointerType(setup_bld->vec_type, 0), ""); + a0aos = LLVMBuildLoad(builder, ptr, ""); + attrib_name(a0aos, attrib, 0, ".a0aos"); + break; + + case LP_INTERP_POSITION: + /* Nothing to do as the position coeffs are already setup in slot 0 */ + continue; + + default: + assert(0); + break; + } + bld->a0aos[attrib] = a0aos; + bld->dadxaos[attrib] = dadxaos; + bld->dadyaos[attrib] = dadyaos; + } +} + +/** + * Interpolate the shader input attribute values. + * This is called for each (group of) quad(s). + */ +static void +attribs_update_simple(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, + int quad_start_index, + int start, + int end) +{ + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *coeff_bld = &bld->coeff_bld; + struct lp_build_context *setup_bld = &bld->setup_bld; + LLVMValueRef oow = NULL; + unsigned attrib, i; + LLVMValueRef pixoffx; + LLVMValueRef pixoffy; + unsigned num_pix = coeff_bld->type.length; + + /* could do this with code-generated passed in pixel offsets */ + pixoffx = coeff_bld->undef; + pixoffy = coeff_bld->undef; + for (i = 0; i < coeff_bld->type.length; i++) { + LLVMValueRef nr = lp_build_const_int32(gallivm, i); + LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] + + (quad_start_index & 1) * 2); + LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] + + (quad_start_index & 2)); + pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, ""); + pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, ""); + } + + pixoffx = LLVMBuildFAdd(builder, pixoffx, + lp_build_broadcast_scalar(coeff_bld, bld->x), ""); + pixoffy = LLVMBuildFAdd(builder, pixoffy, + lp_build_broadcast_scalar(coeff_bld, bld->y), ""); + + for (attrib = start; attrib < end; attrib++) { const unsigned mask = bld->mask[attrib]; const unsigned interp = bld->interp[attrib]; - for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + unsigned chan; + + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { if (mask & (1 << chan)) { - LLVMValueRef index = lp_build_const_int32(gallivm, - attrib * TGSI_NUM_CHANNELS + chan); - LLVMValueRef a0 = zero; - LLVMValueRef dadx = zero; - LLVMValueRef dady = zero; - LLVMValueRef dadxy = zero; - LLVMValueRef dadq; - LLVMValueRef dadq2; - LLVMValueRef a; + LLVMValueRef index; + LLVMValueRef dadx = coeff_bld->zero; + LLVMValueRef dady = coeff_bld->zero; + LLVMValueRef a = coeff_bld->zero; + index = lp_build_const_int32(gallivm, chan); switch (interp) { case LP_INTERP_PERSPECTIVE: /* fall-through */ case LP_INTERP_LINEAR: if (attrib == 0 && chan == 0) { - dadxy = dadx = one; + dadx = coeff_bld->one; } else if (attrib == 0 && chan == 1) { - dadxy = dady = one; + dady = coeff_bld->one; } else { - dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), ""); - dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), ""); - dadxy = LLVMBuildFAdd(builder, dadx, dady, ""); - attrib_name(dadx, attrib, chan, ".dadx"); - attrib_name(dady, attrib, chan, ".dady"); - attrib_name(dadxy, attrib, chan, ".dadxy"); + dadx = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, bld->dadxaos[attrib], + index); + dady = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, bld->dadyaos[attrib], + index); + a = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, bld->a0aos[attrib], + index); } - /* fall-through */ + /* + * a = a0 + (x * dadx + y * dady) + */ + dadx = LLVMBuildFMul(builder, dadx, pixoffx, ""); + dady = LLVMBuildFMul(builder, dady, pixoffy, ""); + a = LLVMBuildFAdd(builder, a, dadx, ""); + a = LLVMBuildFAdd(builder, a, dady, ""); + + if (interp == LP_INTERP_PERSPECTIVE) { + if (oow == NULL) { + LLVMValueRef w = bld->attribs[0][3]; + assert(attrib != 0); + assert(bld->mask[0] & TGSI_WRITEMASK_W); + oow = lp_build_rcp(coeff_bld, w); + } + a = lp_build_mul(coeff_bld, a, oow); + } + break; case LP_INTERP_CONSTANT: case LP_INTERP_FACING: - a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), ""); - attrib_name(a0, attrib, chan, ".a0"); + a = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, bld->a0aos[attrib], + index); break; case LP_INTERP_POSITION: - /* Nothing to do as the position coeffs are already setup in slot 0 */ - continue; + assert(attrib > 0); + a = bld->attribs[0][chan]; + break; default: assert(0); break; } - /* - * dadq = {0, dadx, dady, dadx + dady} - */ + if ((attrib == 0) && (chan == 2)){ + /* FIXME: Depth values can exceed 1.0, due to the fact that + * setup interpolation coefficients refer to (0,0) which causes + * precision loss. So we must clamp to 1.0 here to avoid artifacts + */ + a = lp_build_min(coeff_bld, a, coeff_bld->one); + } + bld->attribs[attrib][chan] = a; + } + } + } +} - dadq = coeff_bld->undef; - dadq = LLVMBuildInsertElement(builder, dadq, zero, i0, ""); - dadq = LLVMBuildInsertElement(builder, dadq, dadx, i1, ""); - dadq = LLVMBuildInsertElement(builder, dadq, dady, i2, ""); - dadq = LLVMBuildInsertElement(builder, dadq, dadxy, i3, ""); +/** + * Initialize the bld->a, dadq fields. This involves fetching + * those values from the arrays which are passed into the JIT function. + */ +static void +coeffs_init(struct lp_build_interp_soa_context *bld, + LLVMValueRef a0_ptr, + LLVMValueRef dadx_ptr, + LLVMValueRef dady_ptr) +{ + struct lp_build_context *coeff_bld = &bld->coeff_bld; + struct lp_build_context *setup_bld = &bld->setup_bld; + struct gallivm_state *gallivm = coeff_bld->gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef pixoffx, pixoffy; + unsigned attrib; + unsigned chan; + unsigned i; + + pixoffx = coeff_bld->undef; + pixoffy = coeff_bld->undef; + for (i = 0; i < coeff_bld->type.length; i++) { + LLVMValueRef nr = lp_build_const_int32(gallivm, i); + LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i]); + LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i]); + pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, ""); + pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, ""); + } - /* - * dadq2 = 2 * dq - */ - dadq2 = LLVMBuildFAdd(builder, dadq, dadq, ""); + for (attrib = 0; attrib < bld->num_attribs; ++attrib) { + const unsigned mask = bld->mask[attrib]; + const unsigned interp = bld->interp[attrib]; + LLVMValueRef index = lp_build_const_int32(gallivm, + attrib * TGSI_NUM_CHANNELS); + LLVMValueRef ptr; + LLVMValueRef dadxaos = setup_bld->zero; + LLVMValueRef dadyaos = setup_bld->zero; + LLVMValueRef a0aos = setup_bld->zero; + + /* always fetch all 4 values for performance/simplicity */ + switch (interp) { + case LP_INTERP_PERSPECTIVE: + /* fall-through */ + + case LP_INTERP_LINEAR: + ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""); + ptr = LLVMBuildBitCast(builder, ptr, + LLVMPointerType(setup_bld->vec_type, 0), ""); + dadxaos = LLVMBuildLoad(builder, ptr, ""); + + ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, ""); + ptr = LLVMBuildBitCast(builder, ptr, + LLVMPointerType(setup_bld->vec_type, 0), ""); + dadyaos = LLVMBuildLoad(builder, ptr, ""); + + attrib_name(dadxaos, attrib, 0, ".dadxaos"); + attrib_name(dadyaos, attrib, 0, ".dadyaos"); + /* fall-through */ + + case LP_INTERP_CONSTANT: + case LP_INTERP_FACING: + ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, ""); + ptr = LLVMBuildBitCast(builder, ptr, + LLVMPointerType(setup_bld->vec_type, 0), ""); + a0aos = LLVMBuildLoad(builder, ptr, ""); + attrib_name(a0aos, attrib, 0, ".a0aos"); + break; + + case LP_INTERP_POSITION: + /* Nothing to do as the position coeffs are already setup in slot 0 */ + continue; + + default: + assert(0); + break; + } - /* - * a = a0 + (x * dadx + y * dady) - */ + /* + * a = a0 + (x * dadx + y * dady) + * a0aos is the attrib value at top left corner of stamp + */ + if (interp != LP_INTERP_CONSTANT && + interp != LP_INTERP_FACING) { + LLVMValueRef axaos, ayaos; + axaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->x), + dadxaos, ""); + ayaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->y), + dadyaos, ""); + a0aos = LLVMBuildFAdd(builder, a0aos, ayaos, ""); + a0aos = LLVMBuildFAdd(builder, a0aos, axaos, ""); + } + + /* + * dadq = {0, dadx, dady, dadx + dady} + * for two quads (side by side) this is: + * {0, dadx, dady, dadx+dady, 2*dadx, 2*dadx+dady, 3*dadx+dady} + */ + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + /* this generates a CRAPLOAD of shuffles... */ + if (mask & (1 << chan)) { + LLVMValueRef dadx, dady; + LLVMValueRef dadq, dadq2; + LLVMValueRef a; + LLVMValueRef chan_index = lp_build_const_int32(gallivm, chan); if (attrib == 0 && chan == 0) { - a = bld->x; + a = lp_build_broadcast_scalar(coeff_bld, bld->x); + dadx = coeff_bld->one; + dady = coeff_bld->zero; } else if (attrib == 0 && chan == 1) { - a = bld->y; + a = lp_build_broadcast_scalar(coeff_bld, bld->y); + dady = coeff_bld->one; + dadx = coeff_bld->zero; } else { - a = a0; - if (interp != LP_INTERP_CONSTANT && - interp != LP_INTERP_FACING) { - LLVMValueRef ax, ay, axy; - ax = LLVMBuildFMul(builder, bld->x, dadx, ""); - ay = LLVMBuildFMul(builder, bld->y, dady, ""); - axy = LLVMBuildFAdd(builder, ax, ay, ""); - a = LLVMBuildFAdd(builder, a, axy, ""); - } - } + dadx = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, dadxaos, chan_index); + dady = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, dadyaos, chan_index); - /* - * a = {a, a, a, a} - */ + /* + * a = {a, a, a, a} + */ + a = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, a0aos, chan_index); + } - a = lp_build_broadcast(gallivm, coeff_bld->vec_type, a); + dadx = LLVMBuildFMul(builder, dadx, pixoffx, ""); + dady = LLVMBuildFMul(builder, dady, pixoffy, ""); + dadq = LLVMBuildFAdd(builder, dadx, dady, ""); /* - * Compute the attrib values on the upper-left corner of each quad. + * Compute the attrib values on the upper-left corner of each + * group of quads. + * Note that if we process 2 quads at once this doesn't + * really exactly to what we want. + * We need to access elem 0 and 2 respectively later if we process + * 2 quads at once. */ if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { + dadq2 = LLVMBuildFAdd(builder, dadq, dadq, ""); a = LLVMBuildFAdd(builder, a, dadq2, ""); } @@ -249,6 +477,12 @@ coeffs_init(struct lp_build_interp_soa_context *bld, * a *= 1 / w */ + /* + * XXX since we're only going to access elements 0,2 out of 8 + * if we have 8-wide vectors we should do the division only 4-wide. + * a is really a 2-elements in a 4-wide vector disguised as 8-wide + * in this case. + */ if (interp == LP_INTERP_PERSPECTIVE) { LLVMValueRef w = bld->a[0][3]; assert(attrib != 0); @@ -279,18 +513,18 @@ coeffs_init(struct lp_build_interp_soa_context *bld, static void attribs_update(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, - int quad_index, + int quad_start_index, int start, int end) { LLVMBuilderRef builder = gallivm->builder; struct lp_build_context *coeff_bld = &bld->coeff_bld; - LLVMValueRef shuffle = lp_build_const_int_vec(gallivm, coeff_bld->type, quad_index); + LLVMValueRef shuffle = lp_build_const_int_vec(gallivm, coeff_bld->type, quad_start_index); LLVMValueRef oow = NULL; unsigned attrib; unsigned chan; - assert(quad_index < 4); + assert(quad_start_index < 4); for(attrib = start; attrib < end; ++attrib) { const unsigned mask = bld->mask[attrib]; @@ -412,6 +646,7 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, LLVMValueRef y0) { struct lp_type coeff_type; + struct lp_type setup_type; unsigned attrib; unsigned chan; @@ -421,19 +656,26 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, coeff_type.floating = TRUE; coeff_type.sign = TRUE; coeff_type.width = 32; - coeff_type.length = TGSI_QUAD_SIZE; + coeff_type.length = type.length; + + memset(&setup_type, 0, sizeof setup_type); + setup_type.floating = TRUE; + setup_type.sign = TRUE; + setup_type.width = 32; + setup_type.length = TGSI_NUM_CHANNELS; + /* XXX: we don't support interpolating into any other types */ assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0); lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type); + lp_build_context_init(&bld->setup_bld, gallivm, setup_type); /* For convenience */ bld->pos = bld->attribs[0]; bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1]; /* Position */ - bld->num_attribs = 1; bld->mask[0] = TGSI_WRITEMASK_XYZW; bld->interp[0] = LP_INTERP_LINEAR; @@ -453,7 +695,12 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, pos_init(bld, x0, y0); - coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); + if (coeff_type.length > 4) { + coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr); + } + else { + coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); + } } @@ -463,20 +710,30 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, void lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, - int quad_index) + int quad_start_index) { - assert(quad_index < 4); + assert(quad_start_index < 4); - attribs_update(bld, gallivm, quad_index, 1, bld->num_attribs); + if (bld->coeff_bld.type.length > 4) { + attribs_update_simple(bld, gallivm, quad_start_index, 1, bld->num_attribs); + } + else { + attribs_update(bld, gallivm, quad_start_index, 1, bld->num_attribs); + } } void lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, - int quad_index) + int quad_start_index) { - assert(quad_index < 4); + assert(quad_start_index < 4); - attribs_update(bld, gallivm, quad_index, 0, 1); + if (bld->coeff_bld.type.length > 4) { + attribs_update_simple(bld, gallivm, quad_start_index, 0, 1); + } + else { + attribs_update(bld, gallivm, quad_start_index, 0, 1); + } } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index 6970a9b8c2c..f293b582318 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -79,6 +79,7 @@ struct lp_build_interp_soa_context { /* TGSI_QUAD_SIZE x float */ struct lp_build_context coeff_bld; + struct lp_build_context setup_bld; unsigned num_attribs; unsigned mask[1 + PIPE_MAX_SHADER_INPUTS]; /**< TGSI_WRITE_MASK_x */ @@ -87,8 +88,11 @@ struct lp_build_interp_soa_context LLVMValueRef x; LLVMValueRef y; - LLVMValueRef a [1 + PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; + LLVMValueRef a[1 + PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; LLVMValueRef dadq[1 + PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; + LLVMValueRef a0aos[1 + PIPE_MAX_SHADER_INPUTS]; + LLVMValueRef dadxaos[1 + PIPE_MAX_SHADER_INPUTS]; + LLVMValueRef dadyaos[1 + PIPE_MAX_SHADER_INPUTS]; LLVMValueRef oow; @@ -118,12 +122,12 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, void lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, - int quad_index); + int quad_start_index); void lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, - int quad_index); + int quad__start_index); #endif /* LP_BLD_INTERP_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 9e4c7d6734e..07cea9158c3 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -51,42 +51,6 @@ unsigned llvmpipe_variant_count; -/** - * This function is called by the gallivm "garbage collector" when - * the LLVM global data structures are freed. We must free all LLVM-related - * data. Specifically, all JIT'd shader variants. - */ -static void -garbage_collect_callback(void *cb_data) -{ - struct llvmpipe_context *lp = (struct llvmpipe_context *) cb_data; - struct lp_fs_variant_list_item *li; - - /* Free all the context's shader variants */ - li = first_elem(&lp->fs_variants_list); - while (!at_end(&lp->fs_variants_list, li)) { - struct lp_fs_variant_list_item *next = next_elem(li); - llvmpipe_remove_shader_variant(lp, li->base); - li = next; - } - - /* Free all the context's primitive setup variants */ - lp_delete_setup_variants(lp); - - /* release references to setup variants, shaders */ - lp_setup_set_setup_variant(lp->setup, NULL); - lp_setup_set_fs_variant(lp->setup, NULL); - lp_setup_reset(lp->setup); - - /* This type will be recreated upon demand */ - lp->jit_context_ptr_type = NULL; - - /* mark all state as dirty to ensure new shaders are jit'd, etc. */ - lp->dirty = ~0; -} - - - static void llvmpipe_destroy( struct pipe_context *pipe ) { struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); @@ -94,9 +58,6 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) lp_print_counters(); - gallivm_remove_garbage_collector_callback(garbage_collect_callback, - llvmpipe); - /* This will also destroy llvmpipe->setup: */ if (llvmpipe->draw) @@ -128,8 +89,6 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) lp_delete_setup_variants(llvmpipe); - gallivm_destroy(llvmpipe->gallivm); - align_free( llvmpipe ); } @@ -195,12 +154,10 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) llvmpipe_init_context_resource_funcs( &llvmpipe->pipe ); llvmpipe_init_surface_functions(llvmpipe); - llvmpipe->gallivm = gallivm_create(); - /* * Create drawing context and plug our rendering stage into it. */ - llvmpipe->draw = draw_create_gallivm(&llvmpipe->pipe, llvmpipe->gallivm); + llvmpipe->draw = draw_create(&llvmpipe->pipe); if (!llvmpipe->draw) goto fail; @@ -226,9 +183,6 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) lp_reset_counters(); - gallivm_register_garbage_collector_callback(garbage_collect_callback, - llvmpipe); - return &llvmpipe->pipe; fail: diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index d4750705b43..d0220e188cf 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -131,10 +131,6 @@ struct llvmpipe_context { unsigned nr_fs_variants; unsigned nr_fs_instrs; - /** JIT code generation */ - struct gallivm_state *gallivm; - LLVMTypeRef jit_context_ptr_type; - struct lp_setup_variant_list_item setup_variants_list; unsigned nr_setup_variants; diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index 42430550ea6..964b792b739 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -54,13 +54,6 @@ llvmpipe_flush( struct pipe_context *pipe, /* ask the setup module to flush */ lp_setup_flush(llvmpipe->setup, fence, reason); - - if (llvmpipe_variant_count > 1000) { - /* time to do a garbage collection */ - gallivm_garbage_collect(llvmpipe->gallivm); - llvmpipe_variant_count = 0; - } - /* Enable to dump BMPs of the color/depth buffers each frame */ if (0) { static unsigned frame_no = 1; diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index eb1db84e4b8..7a85eab41a0 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -41,7 +41,7 @@ static void -lp_jit_create_types(struct llvmpipe_context *lp) +lp_jit_create_types(struct lp_fragment_shader_variant *lp) { struct gallivm_state *gallivm = lp->gallivm; LLVMContextRef lc = gallivm->context; @@ -183,11 +183,9 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) } -LLVMTypeRef -lp_jit_get_context_type(struct llvmpipe_context *lp) +void +lp_jit_init_types(struct lp_fragment_shader_variant *lp) { if (!lp->jit_context_ptr_type) lp_jit_create_types(lp); - - return lp->jit_context_ptr_type; } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 04e8dd5267b..584d2c8fd81 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -42,6 +42,7 @@ #include "lp_texture.h" +struct lp_fragment_shader_variant; struct llvmpipe_screen; @@ -164,8 +165,8 @@ void lp_jit_screen_init(struct llvmpipe_screen *screen); -LLVMTypeRef -lp_jit_get_context_type(struct llvmpipe_context *lp); +void +lp_jit_init_types(struct lp_fragment_shader_variant *lp); #endif /* LP_JIT_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_memory.c b/src/gallium/drivers/llvmpipe/lp_memory.c index 0f55d4a80ae..85f73e54ac4 100644 --- a/src/gallium/drivers/llvmpipe/lp_memory.c +++ b/src/gallium/drivers/llvmpipe/lp_memory.c @@ -36,10 +36,12 @@ * number of threads or using a smaller tilesize when multiple * colorbuffers are bound. */ -PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4]; +PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN) +uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4]; /* A single dummy tile used in a couple of out-of-memory situations. */ -PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4]; +PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN) +uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4]; diff --git a/src/gallium/drivers/llvmpipe/lp_memory.h b/src/gallium/drivers/llvmpipe/lp_memory.h index f7418f5e087..5552c2908e1 100644 --- a/src/gallium/drivers/llvmpipe/lp_memory.h +++ b/src/gallium/drivers/llvmpipe/lp_memory.h @@ -32,9 +32,12 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" #include "lp_limits.h" +#include "gallivm/lp_bld_type.h" -extern PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4]; +extern PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN) +uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4]; -extern PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4]; +extern PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN) +uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4]; #endif /* LP_MEMORY_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 09af0274d7a..d743d7689ae 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -42,6 +42,7 @@ #include "lp_tile_soa.h" #include "gallivm/lp_bld_debug.h" #include "lp_scene.h" +#include "lp_tex_sample.h" #ifdef DEBUG diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 03d15f6e2b0..54f45357fdc 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -97,56 +97,56 @@ #include "lp_state_fs.h" -#include <llvm-c/Analysis.h> -#include <llvm-c/BitWriter.h> - - /** Fragment shader number (for debugging) */ static unsigned fs_no = 0; /** - * Expand the relevent bits of mask_input to a 4-dword mask for the - * four pixels in a 2x2 quad. This will set the four elements of the + * Expand the relevant bits of mask_input to a n*4-dword mask for the + * n*four pixels in n 2x2 quads. This will set the n*four elements of the * quad mask vector to 0 or ~0. + * Grouping is 01, 23 for 2 quad mode hence only 0 and 2 are valid + * quad arguments with fs length 8. * - * \param quad which quad of the quad group to test, in [0,3] + * \param first_quad which quad(s) of the quad group to test, in [0,3] * \param mask_input bitwise mask for the whole 4x4 stamp */ static LLVMValueRef generate_quad_mask(struct gallivm_state *gallivm, struct lp_type fs_type, - unsigned quad, + unsigned first_quad, LLVMValueRef mask_input) /* int32 */ { LLVMBuilderRef builder = gallivm->builder; struct lp_type mask_type; LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); - LLVMValueRef bits[4]; + LLVMValueRef bits[16]; LLVMValueRef mask; - int shift; + int shift, i; /* * XXX: We'll need a different path for 16 x u8 */ assert(fs_type.width == 32); - assert(fs_type.length == 4); + assert(fs_type.length <= Elements(bits)); mask_type = lp_int_type(fs_type); /* * mask_input >>= (quad * 4) */ - switch (quad) { + switch (first_quad) { case 0: shift = 0; break; case 1: + assert(fs_type.length == 4); shift = 2; break; case 2: shift = 8; break; case 3: + assert(fs_type.length == 4); shift = 10; break; default: @@ -166,12 +166,14 @@ generate_quad_mask(struct gallivm_state *gallivm, lp_build_vec_type(gallivm, mask_type), mask_input); - bits[0] = LLVMConstInt(i32t, 1 << 0, 0); - bits[1] = LLVMConstInt(i32t, 1 << 1, 0); - bits[2] = LLVMConstInt(i32t, 1 << 4, 0); - bits[3] = LLVMConstInt(i32t, 1 << 5, 0); - - mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, 4), ""); + for (i = 0; i < fs_type.length / 4; i++) { + unsigned j = 2 * (i % 2) + (i / 2) * 8; + bits[4*i + 0] = LLVMConstInt(i32t, 1 << (j + 0), 0); + bits[4*i + 1] = LLVMConstInt(i32t, 1 << (j + 1), 0); + bits[4*i + 2] = LLVMConstInt(i32t, 1 << (j + 4), 0); + bits[4*i + 3] = LLVMConstInt(i32t, 1 << (j + 5), 0); + } + mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, fs_type.length), ""); /* * mask = mask != 0 ? ~0 : 0 @@ -300,7 +302,7 @@ generate_fs(struct gallivm_state *gallivm, /* do triangle edge testing */ if (partial_mask) { *pmask = generate_quad_mask(gallivm, type, - i, mask_input); + i*type.length/4, mask_input); } else { *pmask = lp_build_const_int_vec(gallivm, type, ~0); @@ -312,7 +314,7 @@ generate_fs(struct gallivm_state *gallivm, if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader) lp_build_mask_check(&mask); - lp_build_interp_soa_update_pos(interp, gallivm, i); + lp_build_interp_soa_update_pos(interp, gallivm, i*type.length/4); z = interp->pos[2]; if (depth_mode & EARLY_DEPTH_TEST) { @@ -333,7 +335,7 @@ generate_fs(struct gallivm_state *gallivm, } } - lp_build_interp_soa_update_inputs(interp, gallivm, i); + lp_build_interp_soa_update_inputs(interp, gallivm, i*type.length/4); /* Build the actual shader */ lp_build_tgsi_soa(gallivm, tokens, type, &mask, @@ -515,7 +517,7 @@ generate_fragment(struct llvmpipe_context *lp, struct lp_fragment_shader_variant *variant, unsigned partial_mask) { - struct gallivm_state *gallivm = lp->gallivm; + struct gallivm_state *gallivm = variant->gallivm; const struct lp_fragment_shader_variant_key *key = &variant->key; struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; char func_name[256]; @@ -541,8 +543,8 @@ generate_fragment(struct llvmpipe_context *lp, LLVMBuilderRef builder; struct lp_build_sampler_soa *sampler; struct lp_build_interp_soa_context interp; - LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH]; - LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][LP_MAX_VECTOR_LENGTH]; + LLVMValueRef fs_mask[16 / 4]; + LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4]; LLVMValueRef blend_mask; LLVMValueRef function; LLVMValueRef facing; @@ -553,6 +555,8 @@ generate_fragment(struct llvmpipe_context *lp, unsigned cbuf; boolean cbuf0_write_all; + assert(lp_native_vector_width / 32 >= 4); + /* Adjust color input interpolation according to flatshade state: */ memcpy(inputs, shader->inputs, shader->info.base.num_inputs * sizeof inputs[0]); @@ -579,12 +583,12 @@ generate_fragment(struct llvmpipe_context *lp, * characteristics. */ memset(&fs_type, 0, sizeof fs_type); - fs_type.floating = TRUE; /* floating point values */ - fs_type.sign = TRUE; /* values are signed */ - fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ - fs_type.width = 32; /* 32-bit float */ - fs_type.length = 4; /* 4 elements per vector */ - num_fs = 4; /* number of quads per block */ + fs_type.floating = TRUE; /* floating point values */ + fs_type.sign = TRUE; /* values are signed */ + fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ + fs_type.width = 32; /* 32-bit float */ + fs_type.length = MIN2(lp_native_vector_width / 32, 16); /* n*4 elements per vector */ + num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */ memset(&blend_type, 0, sizeof blend_type); blend_type.floating = FALSE; /* values are integers */ @@ -605,7 +609,7 @@ generate_fragment(struct llvmpipe_context *lp, util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s", shader->no, variant->no, partial_mask ? "partial" : "whole"); - arg_types[0] = lp_jit_get_context_type(lp); /* context */ + arg_types[0] = variant->jit_context_ptr_type; /* context */ arg_types[1] = int32_type; /* x */ arg_types[2] = int32_type; /* y */ arg_types[3] = int32_type; /* facing */ @@ -738,20 +742,20 @@ generate_fragment(struct llvmpipe_context *lp, LLVMBuildLoad(builder, fs_out_color[cbuf][chan][i], "fs_color_vals"); } - lp_build_conv(gallivm, fs_type, blend_type, + lp_build_conv(gallivm, fs_type, blend_type, fs_color_vals, num_fs, - &blend_in_color[chan], 1); + &blend_in_color[chan], 1); - lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]); + lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]); } if (partial_mask || !variant->opaque) { - lp_build_conv_mask(lp->gallivm, fs_type, blend_type, + lp_build_conv_mask(variant->gallivm, fs_type, blend_type, fs_mask, num_fs, &blend_mask, 1); } else { - blend_mask = lp_build_const_int_vec(lp->gallivm, blend_type, ~0); + blend_mask = lp_build_const_int_vec(variant->gallivm, blend_type, ~0); } color_ptr = LLVMBuildLoad(builder, @@ -772,7 +776,7 @@ generate_fragment(struct llvmpipe_context *lp, !key->alpha.enabled && !shader->info.base.uses_kill); - generate_blend(lp->gallivm, + generate_blend(variant->gallivm, &key->blend, rt, builder, @@ -787,43 +791,9 @@ generate_fragment(struct llvmpipe_context *lp, LLVMBuildRetVoid(builder); - /* Verify the LLVM IR. If invalid, dump and abort */ -#ifdef DEBUG - if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) { - if (1) - lp_debug_dump_value(function); - abort(); - } -#endif - - /* Apply optimizations to LLVM IR */ - LLVMRunFunctionPassManager(gallivm->passmgr, function); - - if ((gallivm_debug & GALLIVM_DEBUG_IR) || (LP_DEBUG & DEBUG_FS)) { - /* Print the LLVM IR to stderr */ - lp_debug_dump_value(function); - debug_printf("\n"); - } - - /* Dump byte code to a file */ - if (0) { - LLVMWriteBitcodeToFile(gallivm->module, "llvmpipe.bc"); - } + gallivm_verify_function(gallivm, function); variant->nr_instrs += lp_build_count_instructions(function); - /* - * Translate the LLVM IR into machine code. - */ - { - void *f = LLVMGetPointerToGlobal(gallivm->engine, function); - - variant->jit_function[partial_mask] = (lp_jit_frag_func)pointer_to_func(f); - - if ((gallivm_debug & GALLIVM_DEBUG_ASM) || (LP_DEBUG & DEBUG_FS)) { - lp_disassemble(f); - } - lp_func_delete_body(function); - } } @@ -937,6 +907,12 @@ generate_variant(struct llvmpipe_context *lp, if(!variant) return NULL; + variant->gallivm = gallivm_create(); + if (!variant->gallivm) { + FREE(variant); + return NULL; + } + variant->shader = shader; variant->list_item_global.base = variant; variant->list_item_local.base = variant; @@ -968,12 +944,35 @@ generate_variant(struct llvmpipe_context *lp, lp_debug_fs_variant(variant); } - generate_fragment(lp, shader, variant, RAST_EDGE_TEST); + lp_jit_init_types(variant); + + if (variant->jit_function[RAST_EDGE_TEST] == NULL) + generate_fragment(lp, shader, variant, RAST_EDGE_TEST); + + if (variant->jit_function[RAST_WHOLE] == NULL) { + if (variant->opaque) { + /* Specialized shader, which doesn't need to read the color buffer. */ + generate_fragment(lp, shader, variant, RAST_WHOLE); + } + } + + /* + * Compile everything + */ + + gallivm_compile_module(variant->gallivm); + + if (variant->function[RAST_EDGE_TEST]) { + variant->jit_function[RAST_EDGE_TEST] = (lp_jit_frag_func) + gallivm_jit_function(variant->gallivm, + variant->function[RAST_EDGE_TEST]); + } - if (variant->opaque) { - /* Specialized shader, which doesn't need to read the color buffer. */ - generate_fragment(lp, shader, variant, RAST_WHOLE); - } else { + if (variant->function[RAST_WHOLE]) { + variant->jit_function[RAST_WHOLE] = (lp_jit_frag_func) + gallivm_jit_function(variant->gallivm, + variant->function[RAST_WHOLE]); + } else if (!variant->jit_function[RAST_WHOLE]) { variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST]; } @@ -1116,13 +1115,14 @@ llvmpipe_remove_shader_variant(struct llvmpipe_context *lp, /* free all the variant's JIT'd functions */ for (i = 0; i < Elements(variant->function); i++) { if (variant->function[i]) { - if (variant->jit_function[i]) - LLVMFreeMachineCodeForFunction(lp->gallivm->engine, - variant->function[i]); - LLVMDeleteFunction(variant->function[i]); + gallivm_free_function(variant->gallivm, + variant->function[i], + variant->jit_function[i]); } } + gallivm_destroy(variant->gallivm); + /* remove from shader's list */ remove_from_list(&variant->list_item_local); variant->shader->variants_cached--; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index 273d241d8fc..306f5f9669a 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -84,6 +84,12 @@ struct lp_fragment_shader_variant boolean opaque; + struct gallivm_state *gallivm; + + LLVMTypeRef jit_context_ptr_type; + LLVMTypeRef jit_thread_data_ptr_type; + LLVMTypeRef jit_linear_context_ptr_type; + LLVMValueRef function[2]; lp_jit_frag_func jit_function[2]; diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c index 299c1ef85dc..1d5e50be9b7 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c @@ -38,7 +38,6 @@ #include "gallivm/lp_bld_intr.h" #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_type.h" -#include <llvm-c/Analysis.h> /* for LLVMVerifyFunction */ #include "lp_perf.h" #include "lp_debug.h" @@ -77,12 +76,6 @@ struct lp_setup_args LLVMValueRef dy01_ooa; LLVMValueRef dx20_ooa; LLVMValueRef dx01_ooa; - - /* Temporary, per-attribute: - */ - LLVMValueRef v0a; - LLVMValueRef v1a; - LLVMValueRef v2a; }; @@ -146,7 +139,7 @@ store_coef(struct gallivm_state *gallivm, { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef idx = lp_build_const_int32(gallivm, slot); - + LLVMBuildStore(builder, a0, LLVMBuildGEP(builder, args->a0, &idx, 1, "")); @@ -210,27 +203,13 @@ vert_attrib(struct gallivm_state *gallivm, return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name); } -static LLVMValueRef -vert_clamp(LLVMBuilderRef b, - LLVMValueRef x, - LLVMValueRef min, - LLVMValueRef max) -{ - LLVMValueRef min_result = LLVMBuildFCmp(b, LLVMRealUGT, min, x, ""); - LLVMValueRef max_result = LLVMBuildFCmp(b, LLVMRealUGT, x, max, ""); - LLVMValueRef clamp_value; - - clamp_value = LLVMBuildSelect(b, min_result, min, x, ""); - clamp_value = LLVMBuildSelect(b, max_result, max, x, ""); - - return clamp_value; -} static void lp_twoside(struct gallivm_state *gallivm, struct lp_setup_args *args, const struct lp_setup_variant_key *key, - int bcolor_slot) + int bcolor_slot, + LLVMValueRef attribv[3]) { LLVMBuilderRef b = gallivm->builder; LLVMValueRef a0_back, a1_back, a2_back; @@ -248,67 +227,66 @@ lp_twoside(struct gallivm_state *gallivm, * Prefer select to if so we don't have to worry about phis or * allocas. */ - args->v0a = LLVMBuildSelect(b, front_facing, a0_back, args->v0a, ""); - args->v1a = LLVMBuildSelect(b, front_facing, a1_back, args->v1a, ""); - args->v2a = LLVMBuildSelect(b, front_facing, a2_back, args->v2a, ""); + attribv[0] = LLVMBuildSelect(b, front_facing, a0_back, attribv[0], ""); + attribv[1] = LLVMBuildSelect(b, front_facing, a1_back, attribv[1], ""); + attribv[2] = LLVMBuildSelect(b, front_facing, a2_back, attribv[2], ""); } static void lp_do_offset_tri(struct gallivm_state *gallivm, struct lp_setup_args *args, - const struct lp_setup_variant_key *key) + const struct lp_setup_variant_key *key, + LLVMValueRef inv_det, + LLVMValueRef dxyz01, + LLVMValueRef dxyz20, + LLVMValueRef attribv[3]) { LLVMBuilderRef b = gallivm->builder; struct lp_build_context bld; LLVMValueRef zoffset, mult; LLVMValueRef z0_new, z1_new, z2_new; - LLVMValueRef dzdx0, dzdx, dzdy0, dzdy; - LLVMValueRef max, max_value; - - LLVMValueRef one = lp_build_const_float(gallivm, 1.0); - LLVMValueRef zero = lp_build_const_float(gallivm, 0.0); - LLVMValueRef two = lp_build_const_int32(gallivm, 2); - - /* edge vectors: e = v0 - v2, f = v1 - v2 */ - LLVMValueRef v0_x = vert_attrib(gallivm, args->v0, 0, 0, "v0_x"); - LLVMValueRef v1_x = vert_attrib(gallivm, args->v1, 0, 0, "v1_x"); - LLVMValueRef v2_x = vert_attrib(gallivm, args->v2, 0, 0, "v2_x"); - LLVMValueRef v0_y = vert_attrib(gallivm, args->v0, 0, 1, "v0_y"); - LLVMValueRef v1_y = vert_attrib(gallivm, args->v1, 0, 1, "v1_y"); - LLVMValueRef v2_y = vert_attrib(gallivm, args->v2, 0, 1, "v2_y"); - LLVMValueRef v0_z = vert_attrib(gallivm, args->v0, 0, 2, "v0_z"); - LLVMValueRef v1_z = vert_attrib(gallivm, args->v1, 0, 2, "v1_z"); - LLVMValueRef v2_z = vert_attrib(gallivm, args->v2, 0, 2, "v2_z"); - - /* edge vectors: e = v0 - v2, f = v1 - v2 */ - LLVMValueRef dx02 = LLVMBuildFSub(b, v0_x, v2_x, "dx02"); - LLVMValueRef dy02 = LLVMBuildFSub(b, v0_y, v2_y, "dy02"); - LLVMValueRef dz02 = LLVMBuildFSub(b, v0_z, v2_z, "dz02"); - LLVMValueRef dx12 = LLVMBuildFSub(b, v1_x, v2_x, "dx12"); - LLVMValueRef dy12 = LLVMBuildFSub(b, v1_y, v2_y, "dy12"); - LLVMValueRef dz12 = LLVMBuildFSub(b, v1_z, v2_z, "dz12"); - - /* det = cross(e,f).z */ - LLVMValueRef dx02_dy12 = LLVMBuildFMul(b, dx02, dy12, "dx02_dy12"); - LLVMValueRef dy02_dx12 = LLVMBuildFMul(b, dy02, dx12, "dy02_dx12"); - LLVMValueRef det = LLVMBuildFSub(b, dx02_dy12, dy02_dx12, "det"); - LLVMValueRef inv_det = LLVMBuildFDiv(b, one, det, "inv_det"); - - /* (res1,res2) = cross(e,f).xy */ - LLVMValueRef dy02_dz12 = LLVMBuildFMul(b, dy02, dz12, "dy02_dz12"); - LLVMValueRef dz02_dy12 = LLVMBuildFMul(b, dz02, dy12, "dz02_dy12"); - LLVMValueRef dz02_dx12 = LLVMBuildFMul(b, dz02, dx12, "dz02_dx12"); - LLVMValueRef dx02_dz12 = LLVMBuildFMul(b, dx02, dz12, "dx02_dz12"); - LLVMValueRef res1 = LLVMBuildFSub(b, dy02_dz12, dz02_dy12, "res1"); - LLVMValueRef res2 = LLVMBuildFSub(b, dz02_dx12, dx02_dz12, "res2"); + LLVMValueRef dzdxdzdy, dzdx, dzdy, dzxyz20, dyzzx01, dyzzx01_dzxyz20, dzx01_dyz20; + LLVMValueRef z0z1, z0z1z2; + LLVMValueRef max, max_value, res12; + LLVMValueRef shuffles[4]; + LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context); + LLVMValueRef onei = lp_build_const_int32(gallivm, 1); + LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0); + LLVMValueRef twoi = lp_build_const_int32(gallivm, 2); + LLVMValueRef threei = lp_build_const_int32(gallivm, 3); + + /* (res12) = cross(e,f).xy */ + shuffles[0] = twoi; + shuffles[1] = zeroi; + shuffles[2] = onei; + shuffles[3] = twoi; + dzxyz20 = LLVMBuildShuffleVector(b, dxyz20, dxyz20, LLVMConstVector(shuffles, 4), ""); + + shuffles[0] = onei; + shuffles[1] = twoi; + shuffles[2] = twoi; + shuffles[3] = zeroi; + dyzzx01 = LLVMBuildShuffleVector(b, dxyz01, dxyz01, LLVMConstVector(shuffles, 4), ""); + + dyzzx01_dzxyz20 = LLVMBuildFMul(b, dzxyz20, dyzzx01, "dyzzx01_dzxyz20"); + + shuffles[0] = twoi; + shuffles[1] = threei; + shuffles[2] = LLVMGetUndef(shuf_type); + shuffles[3] = LLVMGetUndef(shuf_type); + dzx01_dyz20 = LLVMBuildShuffleVector(b, dyzzx01_dzxyz20, dyzzx01_dzxyz20, + LLVMConstVector(shuffles, 4), ""); + + res12 = LLVMBuildFSub(b, dyzzx01_dzxyz20, dzx01_dyz20, "res12"); /* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/ - lp_build_context_init(&bld, gallivm, lp_type_float(32)); - dzdx0 = LLVMBuildFMul(b, res1, inv_det, "dzdx"); - dzdx = lp_build_abs(&bld, dzdx0); - dzdy0 = LLVMBuildFMul(b, res2, inv_det, "dzdy"); - dzdy = lp_build_abs(&bld, dzdy0); + lp_build_context_init(&bld, gallivm, lp_type_float_vec(32, 128)); + dzdxdzdy = LLVMBuildFMul(b, res12, inv_det, "dzdxdzdy"); + dzdxdzdy = lp_build_abs(&bld, dzdxdzdy); + + dzdx = LLVMBuildExtractElement(b, dzdxdzdy, zeroi, ""); + dzdy = LLVMBuildExtractElement(b, dzdxdzdy, onei, ""); /* zoffset = offset->units + MAX2(dzdx, dzdy) * offset->scale */ max = LLVMBuildFCmp(b, LLVMRealUGT, dzdx, dzdy, ""); @@ -317,45 +295,56 @@ lp_do_offset_tri(struct gallivm_state *gallivm, mult = LLVMBuildFMul(b, max_value, lp_build_const_float(gallivm, key->scale), ""); zoffset = LLVMBuildFAdd(b, lp_build_const_float(gallivm, key->units), mult, "zoffset"); + /* yuck */ + shuffles[0] = twoi; + shuffles[1] = lp_build_const_int32(gallivm, 6); + shuffles[2] = LLVMGetUndef(shuf_type); + shuffles[3] = LLVMGetUndef(shuf_type); + z0z1 = LLVMBuildShuffleVector(b, attribv[0], attribv[1], LLVMConstVector(shuffles, 4), ""); + shuffles[0] = zeroi; + shuffles[1] = onei; + shuffles[2] = lp_build_const_int32(gallivm, 6); + shuffles[3] = LLVMGetUndef(shuf_type); + z0z1z2 = LLVMBuildShuffleVector(b, z0z1, attribv[2], LLVMConstVector(shuffles, 4), ""); + zoffset = vec4f_from_scalar(gallivm, zoffset, ""); + /* clamp and do offset */ - z0_new = vert_clamp(b, LLVMBuildFAdd(b, v0_z, zoffset, ""), zero, one); - z1_new = vert_clamp(b, LLVMBuildFAdd(b, v1_z, zoffset, ""), zero, one); - z2_new = vert_clamp(b, LLVMBuildFAdd(b, v2_z, zoffset, ""), zero, one); + z0z1z2 = lp_build_clamp(&bld, LLVMBuildFAdd(b, z0z1z2, zoffset, ""), bld.zero, bld.one); /* insert into args->a0.z, a1.z, a2.z: - */ - args->v0a = LLVMBuildInsertElement(b, args->v0a, z0_new, two, ""); - args->v1a = LLVMBuildInsertElement(b, args->v1a, z1_new, two, ""); - args->v2a = LLVMBuildInsertElement(b, args->v2a, z2_new, two, ""); + */ + z0_new = LLVMBuildExtractElement(b, z0z1z2, zeroi, ""); + z1_new = LLVMBuildExtractElement(b, z0z1z2, onei, ""); + z2_new = LLVMBuildExtractElement(b, z0z1z2, twoi, ""); + attribv[0] = LLVMBuildInsertElement(b, attribv[0], z0_new, twoi, ""); + attribv[1] = LLVMBuildInsertElement(b, attribv[1], z1_new, twoi, ""); + attribv[2] = LLVMBuildInsertElement(b, attribv[2], z2_new, twoi, ""); } static void load_attribute(struct gallivm_state *gallivm, struct lp_setup_args *args, const struct lp_setup_variant_key *key, - unsigned vert_attr) + unsigned vert_attr, + LLVMValueRef attribv[3]) { LLVMBuilderRef b = gallivm->builder; LLVMValueRef idx = lp_build_const_int32(gallivm, vert_attr); /* Load the vertex data */ - args->v0a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a"); - args->v1a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a"); - args->v2a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a"); + attribv[0] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a"); + attribv[1] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a"); + attribv[2] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a"); - /* Potentially modify it according to twoside, offset, etc: + /* Potentially modify it according to twoside, etc: */ - if (vert_attr == 0 && (key->scale != 0.0f || key->units != 0.0f)) { - lp_do_offset_tri(gallivm, args, key); - } - if (key->twoside) { if (vert_attr == key->color_slot && key->bcolor_slot >= 0) - lp_twoside(gallivm, args, key, key->bcolor_slot); + lp_twoside(gallivm, args, key, key->bcolor_slot, attribv); else if (vert_attr == key->spec_slot && key->bspec_slot >= 0) - lp_twoside(gallivm, args, key, key->bspec_slot); + lp_twoside(gallivm, args, key, key->bspec_slot, attribv); } } @@ -375,8 +364,6 @@ emit_coef4( struct gallivm_state *gallivm, LLVMValueRef x0_center = args->x0_center; LLVMValueRef y0_center = args->y0_center; - /* XXX: using fsub, fmul on vector types -- does this work?? - */ LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01"); LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20"); @@ -406,14 +393,15 @@ emit_coef4( struct gallivm_state *gallivm, static void emit_linear_coef( struct gallivm_state *gallivm, struct lp_setup_args *args, - unsigned slot) + unsigned slot, + LLVMValueRef attribv[3]) { /* nothing to do anymore */ emit_coef4(gallivm, args, slot, - args->v0a, - args->v1a, - args->v2a); + attribv[0], + attribv[1], + attribv[2]); } @@ -426,9 +414,10 @@ emit_linear_coef( struct gallivm_state *gallivm, * divide the interpolated value by the interpolated W at that fragment. */ static void -emit_perspective_coef( struct gallivm_state *gallivm, - struct lp_setup_args *args, - unsigned slot) +apply_perspective_corr( struct gallivm_state *gallivm, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef attribv[3]) { LLVMBuilderRef b = gallivm->builder; @@ -438,20 +427,19 @@ emit_perspective_coef( struct gallivm_state *gallivm, LLVMValueRef v1_oow = vec4f_from_scalar(gallivm, vert_attrib(gallivm, args->v1, 0, 3, ""), "v1_oow"); LLVMValueRef v2_oow = vec4f_from_scalar(gallivm, vert_attrib(gallivm, args->v2, 0, 3, ""), "v2_oow"); - LLVMValueRef v0_oow_v0a = LLVMBuildFMul(b, args->v0a, v0_oow, "v0_oow_v0a"); - LLVMValueRef v1_oow_v1a = LLVMBuildFMul(b, args->v1a, v1_oow, "v1_oow_v1a"); - LLVMValueRef v2_oow_v2a = LLVMBuildFMul(b, args->v2a, v2_oow, "v2_oow_v2a"); - - emit_coef4(gallivm, args, slot, v0_oow_v0a, v1_oow_v1a, v2_oow_v2a); + attribv[0] = LLVMBuildFMul(b, attribv[0], v0_oow, "v0_oow_v0a"); + attribv[1] = LLVMBuildFMul(b, attribv[1], v1_oow, "v1_oow_v1a"); + attribv[2] = LLVMBuildFMul(b, attribv[2], v2_oow, "v2_oow_v2a"); } static void emit_position_coef( struct gallivm_state *gallivm, struct lp_setup_args *args, - int slot ) + int slot, + LLVMValueRef attribv[3]) { - emit_linear_coef(gallivm, args, slot); + emit_linear_coef(gallivm, args, slot, attribv); } @@ -464,7 +452,9 @@ emit_position_coef( struct gallivm_state *gallivm, static void emit_apply_cyl_wrap(struct gallivm_state *gallivm, struct lp_setup_args *args, - uint cyl_wrap) + uint cyl_wrap, + LLVMValueRef attribv[3]) + { LLVMBuilderRef builder = gallivm->builder; struct lp_type type = lp_float32_vec4_type(); @@ -489,43 +479,43 @@ emit_apply_cyl_wrap(struct gallivm_state *gallivm, one = LLVMBuildAnd(builder, one, cyl_mask, ""); /* Edge v0 -> v1 */ - delta = LLVMBuildFSub(builder, args->v1a, args->v0a, ""); + delta = LLVMBuildFSub(builder, attribv[1], attribv[0], ""); - offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half); - offset = LLVMBuildAnd(builder, offset, one, ""); - offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); - args->v0a = LLVMBuildFAdd(builder, args->v0a, offset, ""); + offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half); + offset = LLVMBuildAnd(builder, offset, one, ""); + offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); + attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, ""); - offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half); - offset = LLVMBuildAnd(builder, offset, one, ""); - offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); - args->v1a = LLVMBuildFAdd(builder, args->v1a, offset, ""); + offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half); + offset = LLVMBuildAnd(builder, offset, one, ""); + offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); + attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, ""); /* Edge v1 -> v2 */ - delta = LLVMBuildFSub(builder, args->v2a, args->v1a, ""); + delta = LLVMBuildFSub(builder, attribv[2], attribv[1], ""); - offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half); - offset = LLVMBuildAnd(builder, offset, one, ""); - offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); - args->v1a = LLVMBuildFAdd(builder, args->v1a, offset, ""); + offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half); + offset = LLVMBuildAnd(builder, offset, one, ""); + offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); + attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, ""); - offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half); - offset = LLVMBuildAnd(builder, offset, one, ""); - offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); - args->v2a = LLVMBuildFAdd(builder, args->v2a, offset, ""); + offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half); + offset = LLVMBuildAnd(builder, offset, one, ""); + offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); + attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, ""); /* Edge v2 -> v0 */ - delta = LLVMBuildFSub(builder, args->v0a, args->v2a, ""); + delta = LLVMBuildFSub(builder, attribv[0], attribv[2], ""); - offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half); - offset = LLVMBuildAnd(builder, offset, one, ""); - offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); - args->v2a = LLVMBuildFAdd(builder, args->v2a, offset, ""); + offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half); + offset = LLVMBuildAnd(builder, offset, one, ""); + offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); + attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, ""); - offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half); - offset = LLVMBuildAnd(builder, offset, one, ""); - offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); - args->v0a = LLVMBuildFAdd(builder, args->v0a, offset, ""); + offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half); + offset = LLVMBuildAnd(builder, offset, one, ""); + offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); + attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, ""); } @@ -534,43 +524,38 @@ emit_apply_cyl_wrap(struct gallivm_state *gallivm, */ static void emit_tri_coef( struct gallivm_state *gallivm, - const struct lp_setup_variant_key *key, - struct lp_setup_args *args ) + const struct lp_setup_variant_key *key, + struct lp_setup_args *args) { unsigned slot; - /* The internal position input is in slot zero: - */ - load_attribute(gallivm, args, key, 0); - emit_position_coef(gallivm, args, 0); + LLVMValueRef attribs[3]; - /* setup interpolation for all the remaining attributes: + /* setup interpolation for all the remaining attributes: */ for (slot = 0; slot < key->num_inputs; slot++) { - - if (key->inputs[slot].interp == LP_INTERP_CONSTANT || - key->inputs[slot].interp == LP_INTERP_LINEAR || - key->inputs[slot].interp == LP_INTERP_PERSPECTIVE) - load_attribute(gallivm, args, key, key->inputs[slot].src_index); - switch (key->inputs[slot].interp) { case LP_INTERP_CONSTANT: - if (key->flatshade_first) { - emit_constant_coef4(gallivm, args, slot+1, args->v0a); - } - else { - emit_constant_coef4(gallivm, args, slot+1, args->v2a); - } - break; + load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); + if (key->flatshade_first) { + emit_constant_coef4(gallivm, args, slot+1, attribs[0]); + } + else { + emit_constant_coef4(gallivm, args, slot+1, attribs[2]); + } + break; case LP_INTERP_LINEAR: - emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap); - emit_linear_coef(gallivm, args, slot+1); + load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); + emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs); + emit_linear_coef(gallivm, args, slot+1, attribs); break; case LP_INTERP_PERSPECTIVE: - emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap); - emit_perspective_coef(gallivm, args, slot+1); + load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); + emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs); + apply_perspective_corr(gallivm, args, slot+1, attribs); + emit_linear_coef(gallivm, args, slot+1, attribs); break; case LP_INTERP_POSITION: @@ -591,62 +576,6 @@ emit_tri_coef( struct gallivm_state *gallivm, } -/* XXX: This is generic code, share with fs/vs codegen: - */ -static lp_jit_setup_triangle -finalize_function(struct gallivm_state *gallivm, - LLVMBuilderRef builder, - LLVMValueRef function) -{ - void *f; - - /* Verify the LLVM IR. If invalid, dump and abort */ -#ifdef DEBUG - if (LLVMVerifyFunction(function, LLVMPrintMessageAction)) { - if (1) - lp_debug_dump_value(function); - abort(); - } -#endif - - /* Apply optimizations to LLVM IR */ - LLVMRunFunctionPassManager(gallivm->passmgr, function); - - if (gallivm_debug & GALLIVM_DEBUG_IR) - { - /* Print the LLVM IR to stderr */ - lp_debug_dump_value(function); - debug_printf("\n"); - } - - /* - * Translate the LLVM IR into machine code. - */ - f = LLVMGetPointerToGlobal(gallivm->engine, function); - - if (gallivm_debug & GALLIVM_DEBUG_ASM) - { - lp_disassemble(f); - } - - lp_func_delete_body(function); - - return (lp_jit_setup_triangle) pointer_to_func(f); -} - -/* XXX: Generic code: - */ -static void -lp_emit_emms(struct gallivm_state *gallivm) -{ -#ifdef PIPE_ARCH_X86 - /* Avoid corrupting the FPU stack on 32bit OSes. */ - lp_build_intrinsic(gallivm->builder, "llvm.x86.mmx.emms", - LLVMVoidTypeInContext(gallivm->context), NULL, 0); -#endif -} - - /* XXX: generic code: */ static void @@ -664,49 +593,70 @@ set_noalias(LLVMBuilderRef builder, static void init_args(struct gallivm_state *gallivm, - struct lp_setup_args *args, - const struct lp_setup_variant *variant) + const struct lp_setup_variant_key *key, + struct lp_setup_args *args) { LLVMBuilderRef b = gallivm->builder; + LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context); + LLVMValueRef onef = lp_build_const_float(gallivm, 1.0); + LLVMValueRef onei = lp_build_const_int32(gallivm, 1); + LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0); + LLVMValueRef pixel_center, xy0_center, dxy01, dxy20, dyx20; + LLVMValueRef e, f, ef, ooa; + LLVMValueRef shuffles[4]; + LLVMValueRef attr_pos[3]; + struct lp_type typef4 = lp_type_float_vec(32, 128); - LLVMValueRef v0_x = vert_attrib(gallivm, args->v0, 0, 0, "v0_x"); - LLVMValueRef v0_y = vert_attrib(gallivm, args->v0, 0, 1, "v0_y"); + /* The internal position input is in slot zero: + */ + load_attribute(gallivm, args, key, 0, attr_pos); - LLVMValueRef v1_x = vert_attrib(gallivm, args->v1, 0, 0, "v1_x"); - LLVMValueRef v1_y = vert_attrib(gallivm, args->v1, 0, 1, "v1_y"); + pixel_center = lp_build_const_vec(gallivm, typef4, + key->pixel_center_half ? 0.5 : 0.0); - LLVMValueRef v2_x = vert_attrib(gallivm, args->v2, 0, 0, "v2_x"); - LLVMValueRef v2_y = vert_attrib(gallivm, args->v2, 0, 1, "v2_y"); + /* + * xy are first two elems in v0a/v1a/v2a but just use vec4 arit + * also offset_tri uses actually xyz in them + */ + xy0_center = LLVMBuildFSub(b, attr_pos[0], pixel_center, "xy0_center" ); - LLVMValueRef pixel_center = lp_build_const_float(gallivm, - variant->key.pixel_center_half ? 0.5 : 0); + dxy01 = LLVMBuildFSub(b, attr_pos[0], attr_pos[1], "dxy01"); + dxy20 = LLVMBuildFSub(b, attr_pos[2], attr_pos[0], "dxy20"); - LLVMValueRef x0_center = LLVMBuildFSub(b, v0_x, pixel_center, "x0_center" ); - LLVMValueRef y0_center = LLVMBuildFSub(b, v0_y, pixel_center, "y0_center" ); - - LLVMValueRef dx01 = LLVMBuildFSub(b, v0_x, v1_x, "dx01"); - LLVMValueRef dy01 = LLVMBuildFSub(b, v0_y, v1_y, "dy01"); - LLVMValueRef dx20 = LLVMBuildFSub(b, v2_x, v0_x, "dx20"); - LLVMValueRef dy20 = LLVMBuildFSub(b, v2_y, v0_y, "dy20"); + shuffles[0] = onei; + shuffles[1] = zeroi; + shuffles[2] = LLVMGetUndef(shuf_type); + shuffles[3] = LLVMGetUndef(shuf_type); + + dyx20 = LLVMBuildShuffleVector(b, dxy20, dxy20, LLVMConstVector(shuffles, 4), ""); + + ef = LLVMBuildFMul(b, dxy01, dyx20, "ef"); + e = LLVMBuildExtractElement(b, ef, zeroi, ""); + f = LLVMBuildExtractElement(b, ef, onei, ""); - LLVMValueRef one = lp_build_const_float(gallivm, 1.0); - LLVMValueRef e = LLVMBuildFMul(b, dx01, dy20, "e"); - LLVMValueRef f = LLVMBuildFMul(b, dx20, dy01, "f"); - LLVMValueRef ooa = LLVMBuildFDiv(b, one, LLVMBuildFSub(b, e, f, ""), "ooa"); + ooa = LLVMBuildFDiv(b, onef, LLVMBuildFSub(b, e, f, ""), "ooa"); - LLVMValueRef dy20_ooa = LLVMBuildFMul(b, dy20, ooa, "dy20_ooa"); - LLVMValueRef dy01_ooa = LLVMBuildFMul(b, dy01, ooa, "dy01_ooa"); - LLVMValueRef dx20_ooa = LLVMBuildFMul(b, dx20, ooa, "dx20_ooa"); - LLVMValueRef dx01_ooa = LLVMBuildFMul(b, dx01, ooa, "dx01_ooa"); + ooa = vec4f_from_scalar(gallivm, ooa, ""); + + /* tri offset calc shares a lot of arithmetic, do it here */ + if (key->scale != 0.0f || key->units != 0.0f) { + lp_do_offset_tri(gallivm, args, key, ooa, dxy01, dxy20, attr_pos); + } - args->dy20_ooa = vec4f_from_scalar(gallivm, dy20_ooa, "dy20_ooa_4f"); - args->dy01_ooa = vec4f_from_scalar(gallivm, dy01_ooa, "dy01_ooa_4f"); + dxy20 = LLVMBuildFMul(b, dxy20, ooa, ""); + dxy01 = LLVMBuildFMul(b, dxy01, ooa, ""); - args->dx20_ooa = vec4f_from_scalar(gallivm, dx20_ooa, "dx20_ooa_4f"); - args->dx01_ooa = vec4f_from_scalar(gallivm, dx01_ooa, "dx01_ooa_4f"); + args->dy20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, onei); + args->dy01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, onei); - args->x0_center = vec4f_from_scalar(gallivm, x0_center, "x0_center_4f"); - args->y0_center = vec4f_from_scalar(gallivm, y0_center, "y0_center_4f"); + args->dx20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, zeroi); + args->dx01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, zeroi); + + args->x0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, zeroi); + args->y0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, onei); + + /* might want to merge that with other coef emit in the future */ + emit_position_coef(gallivm, args, 0, attr_pos); } /** @@ -714,18 +664,18 @@ init_args(struct gallivm_state *gallivm, * */ static struct lp_setup_variant * -generate_setup_variant(struct gallivm_state *gallivm, - struct lp_setup_variant_key *key, +generate_setup_variant(struct lp_setup_variant_key *key, struct llvmpipe_context *lp) { struct lp_setup_variant *variant = NULL; + struct gallivm_state *gallivm; struct lp_setup_args args; char func_name[256]; LLVMTypeRef vec4f_type; LLVMTypeRef func_type; LLVMTypeRef arg_types[7]; LLVMBasicBlockRef block; - LLVMBuilderRef builder = gallivm->builder; + LLVMBuilderRef builder; int64_t t0 = 0, t1; if (0) @@ -735,6 +685,13 @@ generate_setup_variant(struct gallivm_state *gallivm, if (variant == NULL) goto fail; + variant->gallivm = gallivm = gallivm_create(); + if (!variant->gallivm) { + goto fail; + } + + builder = gallivm->builder; + if (LP_DEBUG & DEBUG_COUNTERS) { t0 = os_time_get(); } @@ -793,14 +750,17 @@ generate_setup_variant(struct gallivm_state *gallivm, LLVMPositionBuilderAtEnd(builder, block); set_noalias(builder, variant->function, arg_types, Elements(arg_types)); - init_args(gallivm, &args, variant); + init_args(gallivm, &variant->key, &args); emit_tri_coef(gallivm, &variant->key, &args); - lp_emit_emms(gallivm); LLVMBuildRetVoid(builder); - variant->jit_function = finalize_function(gallivm, builder, - variant->function); + gallivm_verify_function(gallivm, variant->function); + + gallivm_compile_module(gallivm); + + variant->jit_function = (lp_jit_setup_triangle) + gallivm_jit_function(gallivm, variant->function); if (!variant->jit_function) goto fail; @@ -818,10 +778,12 @@ generate_setup_variant(struct gallivm_state *gallivm, fail: if (variant) { if (variant->function) { - if (variant->jit_function) - LLVMFreeMachineCodeForFunction(gallivm->engine, - variant->function); - LLVMDeleteFunction(variant->function); + gallivm_free_function(gallivm, + variant->function, + variant->jit_function); + } + if (variant->gallivm) { + gallivm_destroy(variant->gallivm); } FREE(variant); } @@ -882,10 +844,13 @@ remove_setup_variant(struct llvmpipe_context *lp, } if (variant->function) { - if (variant->jit_function) - LLVMFreeMachineCodeForFunction(lp->gallivm->engine, - variant->function); - LLVMDeleteFunction(variant->function); + gallivm_free_function(variant->gallivm, + variant->function, + variant->jit_function); + } + + if (variant->gallivm) { + gallivm_destroy(variant->gallivm); } remove_from_list(&variant->list_item_global); @@ -954,7 +919,7 @@ llvmpipe_update_setup(struct llvmpipe_context *lp) cull_setup_variants(lp); } - variant = generate_setup_variant(lp->gallivm, key, lp); + variant = generate_setup_variant(key, lp); if (variant) { insert_at_head(&lp->setup_variants_list, &variant->list_item_global); lp->nr_setup_variants++; diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.h b/src/gallium/drivers/llvmpipe/lp_state_setup.h index 609c4f62511..e0abe467a6d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.h @@ -55,6 +55,8 @@ struct lp_setup_variant { struct lp_setup_variant_list_item list_item_global; + struct gallivm_state *gallivm; + /* XXX: this is a pointer to the LLVM IR. Once jit_function is * generated, we never need to use the IR again - need to find a * way to release this data without destroying the generated @@ -69,15 +71,6 @@ struct lp_setup_variant { unsigned no; }; -void lp_setup_tri_fallback( const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4], - boolean front_facing, - float (*a0)[4], - float (*dadx)[4], - float (*dady)[4], - const struct lp_setup_variant_key *key ); - void lp_delete_setup_variants(struct llvmpipe_context *lp); void diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h index c64f3e149fd..4b6c8a7a6a5 100644 --- a/src/gallium/drivers/llvmpipe/lp_test.h +++ b/src/gallium/drivers/llvmpipe/lp_test.h @@ -42,11 +42,6 @@ #include <float.h> #include "gallivm/lp_bld.h" -#include <llvm-c/Analysis.h> -#include <llvm-c/ExecutionEngine.h> -#include <llvm-c/Target.h> -#include <llvm-c/BitWriter.h> -#include <llvm-c/Transforms/Scalar.h> #include "pipe/p_state.h" #include "util/u_format.h" @@ -64,14 +59,14 @@ write_tsv_header(FILE *fp); boolean -test_some(struct gallivm_state *gallivm,unsigned verbose, FILE *fp, +test_some(unsigned verbose, FILE *fp, unsigned long n); boolean -test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp); +test_single(unsigned verbose, FILE *fp); boolean -test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp); +test_all(unsigned verbose, FILE *fp); #if defined(PIPE_CC_MSVC) diff --git a/src/gallium/drivers/llvmpipe/lp_test_arit.c b/src/gallium/drivers/llvmpipe/lp_test_arit.c index 45ca32f5866..6e09f7e67b0 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_test_arit.c @@ -53,7 +53,7 @@ write_tsv_header(FILE *fp) } -typedef float (*unary_func_t)(float); +typedef void (*unary_func_t)(float *out, const float *in); /** @@ -180,6 +180,45 @@ const float sincos_values[] = { 5*M_PI/4, }; +const float round_values[] = { + -10.0, -1, 0.0, 12.0, + -1.49, -0.25, 1.25, 2.51, + -0.99, -0.01, 0.01, 0.99, +}; + +static float fractf(float x) +{ + x -= floorf(x); + if (x >= 1.0f) { + // clamp to the largest number smaller than one + x = 1.0f - 0.5f*FLT_EPSILON; + } + return x; +} + + +const float fract_values[] = { + // http://en.wikipedia.org/wiki/IEEE_754-1985#Examples + 0.0f, + -0.0f, + 1.0f, + -1.0f, + 0.5f, + -0.5f, + 1.401298464324817e-45f, // smallest denormal + -1.401298464324817e-45f, + 5.88e-39f, // middle denormal + 1.18e-38f, // largest denormal + -1.18e-38f, + -1.62981451e-08f, + FLT_EPSILON, + -FLT_EPSILON, + 1.0f - 0.5f*FLT_EPSILON, + -1.0f + FLT_EPSILON, + FLT_MAX, + -FLT_MAX +}; + /* * Unary test cases. @@ -196,6 +235,11 @@ unary_tests[] = { {"sin", &lp_build_sin, &sinf, sincos_values, Elements(sincos_values), 20.0 }, {"cos", &lp_build_cos, &cosf, sincos_values, Elements(sincos_values), 20.0 }, {"sgn", &lp_build_sgn, &sgnf, exp2_values, Elements(exp2_values), 20.0 }, + {"round", &lp_build_round, &roundf, round_values, Elements(round_values), 24.0 }, + {"trunc", &lp_build_trunc, &truncf, round_values, Elements(round_values), 24.0 }, + {"floor", &lp_build_floor, &floorf, round_values, Elements(round_values), 24.0 }, + {"ceil", &lp_build_ceil, &ceilf, round_values, Elements(round_values), 24.0 }, + {"fract", &lp_build_fract_safe, &fractf, fract_values, Elements(fract_values), 24.0 }, }; @@ -204,39 +248,40 @@ unary_tests[] = { */ static LLVMValueRef build_unary_test_func(struct gallivm_state *gallivm, - LLVMModuleRef module, - LLVMContextRef context, const struct unary_test_t *test) { - struct lp_type type = lp_type_float_vec(32); - LLVMTypeRef i32t = LLVMInt32TypeInContext(context); - LLVMTypeRef f32t = LLVMFloatTypeInContext(context); + struct lp_type type = lp_type_float_vec(32, lp_native_vector_width); + LLVMContextRef context = gallivm->context; + LLVMModuleRef module = gallivm->module; LLVMTypeRef vf32t = lp_build_vec_type(gallivm, type); - LLVMTypeRef args[1] = { f32t }; - LLVMValueRef func = LLVMAddFunction(module, test->name, LLVMFunctionType(f32t, args, Elements(args), 0)); - LLVMValueRef arg1 = LLVMGetParam(func, 0); + LLVMTypeRef args[2] = { LLVMPointerType(vf32t, 0), LLVMPointerType(vf32t, 0) }; + LLVMValueRef func = LLVMAddFunction(module, test->name, + LLVMFunctionType(LLVMVoidTypeInContext(context), + args, Elements(args), 0)); + LLVMValueRef arg0 = LLVMGetParam(func, 0); + LLVMValueRef arg1 = LLVMGetParam(func, 1); LLVMBuilderRef builder = gallivm->builder; LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(context, func, "entry"); - LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); LLVMValueRef ret; struct lp_build_context bld; - lp_build_context_init(&bld, gallivm, lp_type_float_vec(32)); + lp_build_context_init(&bld, gallivm, type); LLVMSetFunctionCallConv(func, LLVMCCallConv); LLVMPositionBuilderAtEnd(builder, block); - /* scalar to vector */ - arg1 = LLVMBuildInsertElement(builder, LLVMGetUndef(vf32t), arg1, index0, ""); + arg1 = LLVMBuildLoad(builder, arg1, ""); ret = test->builder(&bld, arg1); - /* vector to scalar */ - ret = LLVMBuildExtractElement(builder, ret, index0, ""); + LLVMBuildStore(builder, ret, arg0); + + LLVMBuildRetVoid(builder); + + gallivm_verify_function(gallivm, func); - LLVMBuildRet(builder, ret); return func; } @@ -245,67 +290,86 @@ build_unary_test_func(struct gallivm_state *gallivm, * Test one LLVM unary arithmetic builder function. */ static boolean -test_unary(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, const struct unary_test_t *test) +test_unary(unsigned verbose, FILE *fp, const struct unary_test_t *test) { - LLVMModuleRef module = gallivm->module; + struct gallivm_state *gallivm; LLVMValueRef test_func; - LLVMExecutionEngineRef engine = gallivm->engine; - LLVMContextRef context = gallivm->context; - char *error = NULL; unary_func_t test_func_jit; boolean success = TRUE; - int i; + int i, j; + int length = lp_native_vector_width / 32; + float *in, *out; - test_func = build_unary_test_func(gallivm, module, context, test); + in = align_malloc(length * 4, length * 4); + out = align_malloc(length * 4, length * 4); - if (LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { - printf("LLVMVerifyModule: %s\n", error); - LLVMDumpModule(module); - abort(); + /* random NaNs or 0s could wreak havoc */ + for (i = 0; i < length; i++) { + in[i] = 1.0; } - LLVMDisposeMessage(error); - test_func_jit = (unary_func_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_func)); + gallivm = gallivm_create(); - for (i = 0; i < test->num_values; ++i) { - float value = test->values[i]; - float ref = test->ref(value); - float src = test_func_jit(value); + test_func = build_unary_test_func(gallivm, test); - double error = fabs(src - ref); - double precision = error ? -log2(error/fabs(ref)) : FLT_MANT_DIG; + gallivm_compile_module(gallivm); - bool pass = precision >= test->precision; + test_func_jit = (unary_func_t) gallivm_jit_function(gallivm, test_func); - if (isnan(ref)) { - continue; - } + for (j = 0; j < (test->num_values + length - 1) / length; j++) { + int num_vals = ((j + 1) * length <= test->num_values) ? length : + test->num_values % length; - if (!pass || verbose) { - printf("%s(%.9g): ref = %.9g, src = %.9g, precision = %f bits, %s\n", - test->name, value, ref, src, precision, - pass ? "PASS" : "FAIL"); + for (i = 0; i < num_vals; ++i) { + in[i] = test->values[i+j*length]; } - if (!pass) { - success = FALSE; + test_func_jit(out, in); + for (i = 0; i < num_vals; ++i) { + float ref = test->ref(in[i]); + double error, precision; + bool pass; + + error = fabs(out[i] - ref); + precision = error ? -log2(error/fabs(ref)) : FLT_MANT_DIG; + + pass = precision >= test->precision; + + if (isnan(ref)) { + continue; + } + + if (!pass || verbose) { + printf("%s(%.9g): ref = %.9g, out = %.9g, precision = %f bits, %s\n", + test->name, in[i], ref, out[i], precision, + pass ? "PASS" : "FAIL"); + } + + if (!pass) { + success = FALSE; + } } } - LLVMFreeMachineCodeForFunction(engine, test_func); + gallivm_free_function(gallivm, test_func, test_func_jit); + + gallivm_destroy(gallivm); + + align_free(in); + align_free(out); return success; } boolean -test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +test_all(unsigned verbose, FILE *fp) { boolean success = TRUE; int i; for (i = 0; i < Elements(unary_tests); ++i) { - if (!test_unary(gallivm, verbose, fp, &unary_tests[i])) { + if (!test_unary(verbose, fp, &unary_tests[i])) { success = FALSE; } } @@ -315,19 +379,19 @@ test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) boolean -test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, +test_some(unsigned verbose, FILE *fp, unsigned long n) { /* * Not randomly generated test cases, so test all. */ - return test_all(gallivm, verbose, fp); + return test_all(verbose, fp); } boolean -test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +test_single(unsigned verbose, FILE *fp) { return TRUE; } diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 51324cbb6a3..37b37fda40e 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -36,6 +36,7 @@ * @author Brian Paul <brian@vmware.com> */ +#include "util/u_memory.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_type.h" @@ -53,19 +54,6 @@ enum vector_mode typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res); -/** cast wrapper */ -static blend_test_ptr_t -voidptr_to_blend_test_ptr_t(void *p) -{ - union { - void *v; - blend_test_ptr_t f; - } u; - u.v = p; - return u.f; -} - - void write_tsv_header(FILE *fp) @@ -468,50 +456,43 @@ compute_blend_ref(const struct pipe_blend_state *blend, PIPE_ALIGN_STACK static boolean -test_one(struct gallivm_state *gallivm, - unsigned verbose, +test_one(unsigned verbose, FILE *fp, const struct pipe_blend_state *blend, enum vector_mode mode, struct lp_type type) { - LLVMModuleRef module = gallivm->module; + struct gallivm_state *gallivm; LLVMValueRef func = NULL; - LLVMExecutionEngineRef engine = gallivm->engine; - char *error = NULL; blend_test_ptr_t blend_test_ptr; boolean success; const unsigned n = LP_TEST_NUM_SAMPLES; int64_t cycles[LP_TEST_NUM_SAMPLES]; double cycles_avg = 0.0; unsigned i, j; - void *code; + const unsigned stride = lp_type_width(type)/8; if(verbose >= 1) dump_blend_type(stdout, blend, mode, type); - func = add_blend_test(gallivm, blend, mode, type); + gallivm = gallivm_create(); - if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { - LLVMDumpModule(module); - abort(); - } - LLVMDisposeMessage(error); + func = add_blend_test(gallivm, blend, mode, type); - code = LLVMGetPointerToGlobal(engine, func); - blend_test_ptr = voidptr_to_blend_test_ptr_t(code); + gallivm_compile_module(gallivm); - if(verbose >= 2) - lp_disassemble(code); + blend_test_ptr = (blend_test_ptr_t)gallivm_jit_function(gallivm, func); success = TRUE; - for(i = 0; i < n && success; ++i) { - if(mode == AoS) { - PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; - PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; - PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; - PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; - PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; + if(mode == AoS) { + uint8_t *src, *dst, *con, *res, *ref; + src = align_malloc(stride, stride); + dst = align_malloc(stride, stride); + con = align_malloc(stride, stride); + res = align_malloc(stride, stride); + ref = align_malloc(stride, stride); + + for(i = 0; i < n && success; ++i) { int64_t start_counter = 0; int64_t end_counter = 0; @@ -569,14 +550,21 @@ test_one(struct gallivm_state *gallivm, fprintf(stderr, "\n"); } } - - if(mode == SoA) { - const unsigned stride = type.length*type.width/8; - PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; - PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; - PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; - PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; - PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; + align_free(src); + align_free(dst); + align_free(con); + align_free(res); + align_free(ref); + } + else if(mode == SoA) { + uint8_t *src, *dst, *con, *res, *ref; + src = align_malloc(4*stride, stride); + dst = align_malloc(4*stride, stride); + con = align_malloc(4*stride, stride); + res = align_malloc(4*stride, stride); + ref = align_malloc(4*stride, stride); + + for(i = 0; i < n && success; ++i) { int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; @@ -651,6 +639,11 @@ test_one(struct gallivm_state *gallivm, } } } + align_free(src); + align_free(dst); + align_free(con); + align_free(res); + align_free(ref); } /* @@ -687,16 +680,9 @@ test_one(struct gallivm_state *gallivm, if(fp) write_tsv_row(fp, blend, mode, type, cycles_avg, success); - if (!success) { - if(verbose < 2) - LLVMDumpModule(module); - LLVMWriteBitcodeToFile(module, "blend.bc"); - fprintf(stderr, "blend.bc written\n"); - fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n"); - abort(); - } + gallivm_free_function(gallivm, func, blend_test_ptr); - LLVMFreeMachineCodeForFunction(engine, func); + gallivm_destroy(gallivm); return success; } @@ -753,7 +739,7 @@ const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]); boolean -test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +test_all(unsigned verbose, FILE *fp) { const unsigned *rgb_func; const unsigned *rgb_src_factor; @@ -789,7 +775,7 @@ test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) blend.rt[0].alpha_dst_factor = *alpha_dst_factor; blend.rt[0].colormask = PIPE_MASK_RGBA; - if(!test_one(gallivm, verbose, fp, &blend, mode, *type)) + if(!test_one(verbose, fp, &blend, mode, *type)) success = FALSE; } @@ -806,7 +792,7 @@ test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) boolean -test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, +test_some(unsigned verbose, FILE *fp, unsigned long n) { const unsigned *rgb_func; @@ -849,7 +835,7 @@ test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, blend.rt[0].alpha_dst_factor = *alpha_dst_factor; blend.rt[0].colormask = PIPE_MASK_RGBA; - if(!test_one(gallivm, verbose, fp, &blend, mode, *type)) + if(!test_one(verbose, fp, &blend, mode, *type)) success = FALSE; } @@ -858,7 +844,7 @@ test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, boolean -test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +test_single(unsigned verbose, FILE *fp) { printf("no test_single()"); return TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index 0dcb5422887..71d45bd5ce7 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -142,21 +142,21 @@ add_conv_test(struct gallivm_state *gallivm, LLVMBuildRetVoid(builder);; + gallivm_verify_function(gallivm, func); + return func; } PIPE_ALIGN_STACK static boolean -test_one(struct gallivm_state *gallivm, unsigned verbose, +test_one(unsigned verbose, FILE *fp, struct lp_type src_type, struct lp_type dst_type) { - LLVMModuleRef module = gallivm->module; - LLVMExecutionEngineRef engine = gallivm->engine; + struct gallivm_state *gallivm; LLVMValueRef func = NULL; - char *error = NULL; conv_test_ptr_t conv_test_ptr; boolean success; const unsigned n = LP_TEST_NUM_SAMPLES; @@ -166,10 +166,18 @@ test_one(struct gallivm_state *gallivm, unsigned verbose, unsigned num_dsts; double eps; unsigned i, j; - void *code; - if (src_type.width * src_type.length != dst_type.width * dst_type.length && - src_type.length != dst_type.length) { + if ((src_type.width >= dst_type.width && src_type.length > dst_type.length) || + (src_type.width <= dst_type.width && src_type.length < dst_type.length)) { + return TRUE; + } + + /* Known failures + * - fixed point 32 -> float 32 + * - float 32 -> signed normalised integer 32 + */ + if ((src_type.floating && !dst_type.floating && dst_type.sign && dst_type.norm && src_type.width == dst_type.width) || + (!src_type.floating && dst_type.floating && src_type.fixed && src_type.width == dst_type.width)) { return TRUE; } @@ -183,7 +191,7 @@ test_one(struct gallivm_state *gallivm, unsigned verbose, } if(verbose >= 1) - dump_conv_types(stdout, src_type, dst_type); + dump_conv_types(stderr, src_type, dst_type); if (src_type.length > dst_type.length) { num_srcs = 1; @@ -203,29 +211,20 @@ test_one(struct gallivm_state *gallivm, unsigned verbose, eps = MAX2(lp_const_eps(src_type), lp_const_eps(dst_type)); - func = add_conv_test(gallivm, src_type, num_srcs, dst_type, num_dsts); + gallivm = gallivm_create(); - if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { - LLVMDumpModule(module); - abort(); - } - LLVMDisposeMessage(error); - - if(verbose >= 2) - LLVMDumpModule(module); + func = add_conv_test(gallivm, src_type, num_srcs, dst_type, num_dsts); - code = LLVMGetPointerToGlobal(engine, func); - conv_test_ptr = (conv_test_ptr_t)pointer_to_func(code); + gallivm_compile_module(gallivm); - if(verbose >= 2) - lp_disassemble(code); + conv_test_ptr = (conv_test_ptr_t)gallivm_jit_function(gallivm, func); success = TRUE; for(i = 0; i < n && success; ++i) { unsigned src_stride = src_type.length*src_type.width/8; unsigned dst_stride = dst_type.length*dst_type.width/8; - PIPE_ALIGN_VAR(16) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; - PIPE_ALIGN_VAR(16) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; int64_t start_counter = 0; @@ -320,20 +319,9 @@ test_one(struct gallivm_state *gallivm, unsigned verbose, if(fp) write_tsv_row(fp, src_type, dst_type, cycles_avg, success); - if (!success) { - static boolean firsttime = TRUE; - if(firsttime) { - if(verbose < 2) - LLVMDumpModule(module); - LLVMWriteBitcodeToFile(module, "conv.bc"); - fprintf(stderr, "conv.bc written\n"); - fprintf(stderr, "Invoke as \"llc -o - conv.bc\"\n"); - firsttime = FALSE; - /* abort(); */ - } - } + gallivm_free_function(gallivm, func, conv_test_ptr); - LLVMFreeMachineCodeForFunction(engine, func); + gallivm_destroy(gallivm); return success; } @@ -348,18 +336,33 @@ const struct lp_type conv_types[] = { { TRUE, FALSE, FALSE, TRUE, 32, 4 }, { TRUE, FALSE, FALSE, FALSE, 32, 4 }, + { TRUE, FALSE, TRUE, TRUE, 32, 8 }, + { TRUE, FALSE, TRUE, FALSE, 32, 8 }, + { TRUE, FALSE, FALSE, TRUE, 32, 8 }, + { TRUE, FALSE, FALSE, FALSE, 32, 8 }, + /* Fixed */ { FALSE, TRUE, TRUE, TRUE, 32, 4 }, { FALSE, TRUE, TRUE, FALSE, 32, 4 }, { FALSE, TRUE, FALSE, TRUE, 32, 4 }, { FALSE, TRUE, FALSE, FALSE, 32, 4 }, + { FALSE, TRUE, TRUE, TRUE, 32, 8 }, + { FALSE, TRUE, TRUE, FALSE, 32, 8 }, + { FALSE, TRUE, FALSE, TRUE, 32, 8 }, + { FALSE, TRUE, FALSE, FALSE, 32, 8 }, + /* Integer */ { FALSE, FALSE, TRUE, TRUE, 32, 4 }, { FALSE, FALSE, TRUE, FALSE, 32, 4 }, { FALSE, FALSE, FALSE, TRUE, 32, 4 }, { FALSE, FALSE, FALSE, FALSE, 32, 4 }, + { FALSE, FALSE, TRUE, TRUE, 32, 8 }, + { FALSE, FALSE, TRUE, FALSE, 32, 8 }, + { FALSE, FALSE, FALSE, TRUE, 32, 8 }, + { FALSE, FALSE, FALSE, FALSE, 32, 8 }, + { FALSE, FALSE, TRUE, TRUE, 16, 8 }, { FALSE, FALSE, TRUE, FALSE, 16, 8 }, { FALSE, FALSE, FALSE, TRUE, 16, 8 }, @@ -381,7 +384,7 @@ const unsigned num_types = sizeof(conv_types)/sizeof(conv_types[0]); boolean -test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +test_all(unsigned verbose, FILE *fp) { const struct lp_type *src_type; const struct lp_type *dst_type; @@ -394,7 +397,7 @@ test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) if(src_type == dst_type) continue; - if(!test_one(gallivm, verbose, fp, *src_type, *dst_type)){ + if(!test_one(verbose, fp, *src_type, *dst_type)){ success = FALSE; ++error_count; } @@ -408,7 +411,7 @@ test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) boolean -test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, +test_some(unsigned verbose, FILE *fp, unsigned long n) { const struct lp_type *src_type; @@ -423,7 +426,7 @@ test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, dst_type = &conv_types[rand() % num_types]; } while (src_type == dst_type || src_type->norm != dst_type->norm); - if(!test_one(gallivm, verbose, fp, *src_type, *dst_type)) + if(!test_one(verbose, fp, *src_type, *dst_type)) success = FALSE; } @@ -432,7 +435,7 @@ test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, boolean -test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +test_single(unsigned verbose, FILE *fp) { /* float, fixed, sign, norm, width, len */ struct lp_type f32x4_type = @@ -442,7 +445,7 @@ test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) boolean success; - success = test_one(gallivm, verbose, fp, f32x4_type, ub8x4_type); + success = test_one(verbose, fp, f32x4_type, ub8x4_type); return success; } diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index daf6ded29c7..34cbdbdd630 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -83,7 +83,6 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose, LLVMContextRef context = gallivm->context; LLVMModuleRef module = gallivm->module; LLVMBuilderRef builder = gallivm->builder; - LLVMPassManagerRef passmgr = gallivm->passmgr; LLVMTypeRef args[4]; LLVMValueRef func; LLVMValueRef packed_ptr; @@ -120,16 +119,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose, LLVMBuildRetVoid(builder); - if (LLVMVerifyFunction(func, LLVMPrintMessageAction)) { - LLVMDumpValue(func); - abort(); - } - - LLVMRunFunctionPassManager(passmgr, func); - - if (verbose >= 1) { - LLVMDumpValue(func); - } + gallivm_verify_function(gallivm, func); return func; } @@ -137,26 +127,24 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose, PIPE_ALIGN_STACK static boolean -test_format_float(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, +test_format_float(unsigned verbose, FILE *fp, const struct util_format_description *desc) { + struct gallivm_state *gallivm; LLVMValueRef fetch = NULL; - LLVMExecutionEngineRef engine = gallivm->engine; fetch_ptr_t fetch_ptr; PIPE_ALIGN_VAR(16) float unpacked[4]; boolean first = TRUE; boolean success = TRUE; unsigned i, j, k, l; - void *f; + + gallivm = gallivm_create(); fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_float32_vec4_type()); - f = LLVMGetPointerToGlobal(engine, fetch); - fetch_ptr = (fetch_ptr_t) pointer_to_func(f); + gallivm_compile_module(gallivm); - if (verbose >= 2) { - lp_disassemble(f); - } + fetch_ptr = (fetch_ptr_t) gallivm_jit_function(gallivm, fetch); for (l = 0; l < util_format_nr_test_cases; ++l) { const struct util_format_test_case *test = &util_format_test_cases[l]; @@ -171,25 +159,35 @@ test_format_float(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, for (i = 0; i < desc->block.height; ++i) { for (j = 0; j < desc->block.width; ++j) { - boolean match; + boolean match = TRUE; memset(unpacked, 0, sizeof unpacked); fetch_ptr(unpacked, test->packed, j, i); - match = TRUE; - for(k = 0; k < 4; ++k) - if (fabs((float)test->unpacked[i][j][k] - unpacked[k]) > FLT_EPSILON) + for(k = 0; k < 4; ++k) { + if (util_double_inf_sign(test->unpacked[i][j][k]) != util_inf_sign(unpacked[k])) { match = FALSE; + } + + if (util_is_double_nan(test->unpacked[i][j][k]) != util_is_nan(unpacked[k])) { + match = FALSE; + } + + if (!util_is_double_inf_or_nan(test->unpacked[i][j][k]) && + fabs((float)test->unpacked[i][j][k] - unpacked[k]) > FLT_EPSILON) { + match = FALSE; + } + } if (!match) { printf("FAILED\n"); printf(" Packed: %02x %02x %02x %02x\n", test->packed[0], test->packed[1], test->packed[2], test->packed[3]); - printf(" Unpacked (%u,%u): %f %f %f %f obtained\n", + printf(" Unpacked (%u,%u): %.9g %.9g %.9g %.9g obtained\n", j, i, unpacked[0], unpacked[1], unpacked[2], unpacked[3]); - printf(" %f %f %f %f expected\n", + printf(" %.9g %.9g %.9g %.9g expected\n", test->unpacked[i][j][0], test->unpacked[i][j][1], test->unpacked[i][j][2], @@ -201,14 +199,9 @@ test_format_float(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, } } - if (!success) { - if (verbose < 1) { - LLVMDumpValue(fetch); - } - } + gallivm_free_function(gallivm, fetch, fetch_ptr); - LLVMFreeMachineCodeForFunction(engine, fetch); - LLVMDeleteFunction(fetch); + gallivm_destroy(gallivm); if(fp) write_tsv_row(fp, desc, success); @@ -219,26 +212,24 @@ test_format_float(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, PIPE_ALIGN_STACK static boolean -test_format_unorm8(struct gallivm_state *gallivm, - unsigned verbose, FILE *fp, +test_format_unorm8(unsigned verbose, FILE *fp, const struct util_format_description *desc) { + struct gallivm_state *gallivm; LLVMValueRef fetch = NULL; fetch_ptr_t fetch_ptr; uint8_t unpacked[4]; boolean first = TRUE; boolean success = TRUE; unsigned i, j, k, l; - void *f; + + gallivm = gallivm_create(); fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_unorm8_vec4_type()); - f = LLVMGetPointerToGlobal(gallivm->engine, fetch); - fetch_ptr = (fetch_ptr_t) pointer_to_func(f); + gallivm_compile_module(gallivm); - if (verbose >= 2) { - lp_disassemble(f); - } + fetch_ptr = (fetch_ptr_t) gallivm_jit_function(gallivm, fetch); for (l = 0; l < util_format_nr_test_cases; ++l) { const struct util_format_test_case *test = &util_format_test_cases[l]; @@ -285,6 +276,7 @@ test_format_unorm8(struct gallivm_state *gallivm, float_to_ubyte(test->unpacked[i][j][1]), float_to_ubyte(test->unpacked[i][j][2]), float_to_ubyte(test->unpacked[i][j][3])); + success = FALSE; } } @@ -292,11 +284,9 @@ test_format_unorm8(struct gallivm_state *gallivm, } } - if (!success) - LLVMDumpValue(fetch); + gallivm_free_function(gallivm, fetch, fetch_ptr); - LLVMFreeMachineCodeForFunction(gallivm->engine, fetch); - LLVMDeleteFunction(fetch); + gallivm_destroy(gallivm); if(fp) write_tsv_row(fp, desc, success); @@ -308,17 +298,16 @@ test_format_unorm8(struct gallivm_state *gallivm, static boolean -test_one(struct gallivm_state *gallivm, - unsigned verbose, FILE *fp, +test_one(unsigned verbose, FILE *fp, const struct util_format_description *format_desc) { boolean success = TRUE; - if (!test_format_float(gallivm, verbose, fp, format_desc)) { + if (!test_format_float(verbose, fp, format_desc)) { success = FALSE; } - if (!test_format_unorm8(gallivm, verbose, fp, format_desc)) { + if (!test_format_unorm8(verbose, fp, format_desc)) { success = FALSE; } @@ -327,7 +316,7 @@ test_one(struct gallivm_state *gallivm, boolean -test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +test_all(unsigned verbose, FILE *fp) { enum pipe_format format; boolean success = TRUE; @@ -359,7 +348,7 @@ test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) continue; } - if (!test_one(gallivm, verbose, fp, format_desc)) { + if (!test_one(verbose, fp, format_desc)) { success = FALSE; } } @@ -369,15 +358,15 @@ test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) boolean -test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, +test_some(unsigned verbose, FILE *fp, unsigned long n) { - return test_all(gallivm, verbose, fp); + return test_all(verbose, fp); } boolean -test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +test_single(unsigned verbose, FILE *fp) { printf("no test_single()"); return TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index d229c620310..4c610923146 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -39,6 +39,7 @@ #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_debug.h" #include "lp_test.h" @@ -369,7 +370,6 @@ int main(int argc, char **argv) unsigned i; boolean success; boolean single = FALSE; - struct gallivm_state *gallivm; for(i = 1; i < argc; ++i) { if(strcmp(argv[i], "-v") == 0) @@ -384,23 +384,28 @@ int main(int argc, char **argv) lp_build_init(); - gallivm = gallivm_create(); +#ifdef DEBUG + if (verbose >= 2) { + gallivm_debug |= GALLIVM_DEBUG_IR; + gallivm_debug |= GALLIVM_DEBUG_ASM; + } +#endif util_cpu_detect(); if(fp) { /* Warm up the caches */ - test_some(gallivm, 0, NULL, 100); + test_some(0, NULL, 100); write_tsv_header(fp); } if (single) - success = test_single(gallivm, verbose, fp); + success = test_single(verbose, fp); else if (n) - success = test_some(gallivm, verbose, fp, n); + success = test_some(verbose, fp, n); else - success = test_all(gallivm, verbose, fp); + success = test_all(verbose, fp); if(fp) fclose(fp); diff --git a/src/gallium/drivers/llvmpipe/lp_test_printf.c b/src/gallium/drivers/llvmpipe/lp_test_printf.c index 620cdb57c13..c483de94d40 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_printf.c +++ b/src/gallium/drivers/llvmpipe/lp_test_printf.c @@ -78,66 +78,61 @@ add_printf_test(struct gallivm_state *gallivm) LLVMBuildRetVoid(builder); + gallivm_verify_function(gallivm, func); + return func; } PIPE_ALIGN_STACK static boolean -test_printf(struct gallivm_state *gallivm, - unsigned verbose, FILE *fp, +test_printf(unsigned verbose, FILE *fp, const struct printf_test_case *testcase) { - LLVMExecutionEngineRef engine = gallivm->engine; - LLVMModuleRef module = gallivm->module; + struct gallivm_state *gallivm; LLVMValueRef test; - char *error = NULL; test_printf_t test_printf_func; boolean success = TRUE; - void *code; - test = add_printf_test(gallivm); + gallivm = gallivm_create(); - if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { - LLVMDumpModule(module); - abort(); - } - LLVMDisposeMessage(error); + test = add_printf_test(gallivm); - code = LLVMGetPointerToGlobal(engine, test); - test_printf_func = (test_printf_t) pointer_to_func(code); + gallivm_compile_module(gallivm); - // LLVMDumpModule(module); + test_printf_func = (test_printf_t) gallivm_jit_function(gallivm, test); test_printf_func(0); - LLVMFreeMachineCodeForFunction(engine, test); + gallivm_free_function(gallivm, test, test_printf_func); + + gallivm_destroy(gallivm); return success; } boolean -test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +test_all(unsigned verbose, FILE *fp) { boolean success = TRUE; - test_printf(gallivm, verbose, fp, NULL); + test_printf(verbose, fp, NULL); return success; } boolean -test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, +test_some(unsigned verbose, FILE *fp, unsigned long n) { - return test_all(gallivm, verbose, fp); + return test_all(verbose, fp); } boolean -test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +test_single(unsigned verbose, FILE *fp) { printf("no test_single()"); return TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_test_round.c b/src/gallium/drivers/llvmpipe/lp_test_round.c deleted file mode 100644 index fc3edf372d5..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_test_round.c +++ /dev/null @@ -1,242 +0,0 @@ -/************************************************************************** - * - * Copyright 2010 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include <stdlib.h> -#include <stdio.h> - -#include "util/u_pointer.h" -#include "gallivm/lp_bld.h" -#include "gallivm/lp_bld_init.h" -#include "gallivm/lp_bld_arit.h" - -#include "lp_test.h" - - -void -write_tsv_header(FILE *fp) -{ - fprintf(fp, - "result\t" - "format\n"); - - fflush(fp); -} - - -#ifdef PIPE_ARCH_SSE - -# include <emmintrin.h> - -typedef __m128 (*test_round_t)(__m128); - -typedef LLVMValueRef (*lp_func_t)(struct lp_build_context *, LLVMValueRef); - - -static LLVMValueRef -add_test(struct gallivm_state *gallivm, const char *name, lp_func_t lp_func) -{ - LLVMModuleRef module = gallivm->module; - LLVMContextRef context = gallivm->context; - LLVMBuilderRef builder = gallivm->builder; - - LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatTypeInContext(context), 4); - LLVMTypeRef args[1] = { v4sf }; - LLVMValueRef func = LLVMAddFunction(module, name, LLVMFunctionType(v4sf, args, 1, 0)); - LLVMValueRef arg1 = LLVMGetParam(func, 0); - LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(context, func, "entry"); - LLVMValueRef ret; - struct lp_build_context bld; - - lp_build_context_init(&bld, gallivm, lp_float32_vec4_type()); - - LLVMSetFunctionCallConv(func, LLVMCCallConv); - - LLVMPositionBuilderAtEnd(builder, block); - - ret = lp_func(&bld, arg1); - - LLVMBuildRet(builder, ret); - - return func; -} - -static void -printv(char* string, __m128 value) -{ - __m128 v = value; - float *f = (float *)&v; - printf("%s: %10f %10f %10f %10f\n", string, - f[0], f[1], f[2], f[3]); -} - -static boolean -compare(__m128 x, __m128 y) -{ - boolean success = TRUE; - float *xp = (float *) &x; - float *yp = (float *) &y; - if (xp[0] != yp[0] || - xp[1] != yp[1] || - xp[2] != yp[2] || - xp[3] != yp[3]) { - printf(" Incorrect result! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ \n"); - success = FALSE; - } - return success; -} - - - -PIPE_ALIGN_STACK -static boolean -test_round(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) -{ - LLVMModuleRef module = gallivm->module; - LLVMValueRef test_round = NULL, test_trunc, test_floor, test_ceil; - LLVMExecutionEngineRef engine = gallivm->engine; - char *error = NULL; - test_round_t round_func, trunc_func, floor_func, ceil_func; - float unpacked[4]; - boolean success = TRUE; - int i; - - test_round = add_test(gallivm, "round", lp_build_round); - test_trunc = add_test(gallivm, "trunc", lp_build_trunc); - test_floor = add_test(gallivm, "floor", lp_build_floor); - test_ceil = add_test(gallivm, "ceil", lp_build_ceil); - - if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { - printf("LLVMVerifyModule: %s\n", error); - LLVMDumpModule(module); - abort(); - } - LLVMDisposeMessage(error); - - round_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_round)); - trunc_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_trunc)); - floor_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_floor)); - ceil_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_ceil)); - - memset(unpacked, 0, sizeof unpacked); - - if (0) - LLVMDumpModule(module); - - for (i = 0; i < 3; i++) { - /* NOTE: There are several acceptable rules for x.5 rounding: ceiling, - * nearest even, etc. So we avoid testing such corner cases here. - */ - __m128 xvals[3] = { - {-10.0, -1, 0, 12.0}, - {-1.49, -0.25, 1.25, 2.51}, - {-0.99, -0.01, 0.01, 0.99} - }; - __m128 x = xvals[i]; - __m128 y, ref; - float *xp = (float *) &x; - float *refp = (float *) &ref; - - printf("\n"); - printv("x ", x); - - refp[0] = round(xp[0]); - refp[1] = round(xp[1]); - refp[2] = round(xp[2]); - refp[3] = round(xp[3]); - y = round_func(x); - printv("C round(x) ", ref); - printv("LLVM round(x)", y); - success = success && compare(ref, y); - - refp[0] = trunc(xp[0]); - refp[1] = trunc(xp[1]); - refp[2] = trunc(xp[2]); - refp[3] = trunc(xp[3]); - y = trunc_func(x); - printv("C trunc(x) ", ref); - printv("LLVM trunc(x)", y); - success = success && compare(ref, y); - - refp[0] = floor(xp[0]); - refp[1] = floor(xp[1]); - refp[2] = floor(xp[2]); - refp[3] = floor(xp[3]); - y = floor_func(x); - printv("C floor(x) ", ref); - printv("LLVM floor(x)", y); - success = success && compare(ref, y); - - refp[0] = ceil(xp[0]); - refp[1] = ceil(xp[1]); - refp[2] = ceil(xp[2]); - refp[3] = ceil(xp[3]); - y = ceil_func(x); - printv("C ceil(x) ", ref); - printv("LLVM ceil(x) ", y); - success = success && compare(ref, y); - } - - LLVMFreeMachineCodeForFunction(engine, test_round); - LLVMFreeMachineCodeForFunction(engine, test_trunc); - LLVMFreeMachineCodeForFunction(engine, test_floor); - LLVMFreeMachineCodeForFunction(engine, test_ceil); - - return success; -} - -#else /* !PIPE_ARCH_SSE */ - -static boolean -test_round(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) -{ - return TRUE; -} - -#endif /* !PIPE_ARCH_SSE */ - - -boolean -test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) -{ - return test_round(gallivm, verbose, fp); -} - - -boolean -test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, - unsigned long n) -{ - return test_all(gallivm, verbose, fp); -} - -boolean -test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) -{ - printf("no test_single()"); - return TRUE; -} diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c index daa96f20c7e..9151e427ba7 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c @@ -178,8 +178,7 @@ lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base, unsigned unit, unsigned num_coords, const LLVMValueRef *coords, - const LLVMValueRef *ddx, - const LLVMValueRef *ddy, + const struct lp_derivatives *derivs, LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ LLVMValueRef *texel) @@ -189,7 +188,7 @@ lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base, assert(unit < PIPE_MAX_SAMPLERS); if (LP_PERF & PERF_NO_TEX) { - lp_build_sample_nop(gallivm, type, texel); + lp_build_sample_nop(gallivm, type, num_coords, coords, texel); return; } @@ -199,7 +198,7 @@ lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base, type, unit, num_coords, coords, - ddx, ddy, + derivs, lod_bias, explicit_lod, texel); } @@ -210,6 +209,7 @@ lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base, static void lp_llvm_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base, struct gallivm_state *gallivm, + struct lp_type type, unsigned unit, LLVMValueRef explicit_lod, /* optional */ LLVMValueRef *sizes_out) @@ -221,6 +221,7 @@ lp_llvm_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base, lp_build_size_query_soa(gallivm, &sampler->dynamic_state.static_state[unit], &sampler->dynamic_state.base, + type, unit, explicit_lod, sizes_out); |