summaryrefslogtreecommitdiff
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/llvmpipe/.gitignore1
-rw-r--r--src/gallium/drivers/llvmpipe/Makefile3
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.c115
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.c429
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.h10
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c48
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.c7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.c8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.h5
-rw-r--r--src/gallium/drivers/llvmpipe/lp_memory.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_memory.h7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c162
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.h6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_setup.c487
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_setup.h11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test.h11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_arit.c166
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_blend.c102
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_conv.c87
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_format.c95
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_main.c17
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_printf.c37
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_round.c242
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample.c9
27 files changed, 1058 insertions, 1019 deletions
diff --git a/src/gallium/drivers/llvmpipe/.gitignore b/src/gallium/drivers/llvmpipe/.gitignore
index f6973b54d2c..21cd3cf2ed2 100644
--- a/src/gallium/drivers/llvmpipe/.gitignore
+++ b/src/gallium/drivers/llvmpipe/.gitignore
@@ -4,4 +4,3 @@ lp_test_blend
lp_test_conv
lp_test_format
lp_test_printf
-lp_test_round
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 26fbde9a169..ef16fc7d882 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -55,8 +55,7 @@ PROGS := lp_test_format \
lp_test_arit \
lp_test_blend \
lp_test_conv \
- lp_test_printf \
- lp_test_round
+ lp_test_printf
# Need this for the lp_test_*.o files
CLEAN_EXTRA = *.o
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 85560a1c716..cea44a78679 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -94,7 +94,6 @@ if not env['embedded']:
if not env['msvc']:
tests.append('arit')
- tests.append('round')
for test in tests:
testname = 'lp_test_' + test
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 87a6a2751d4..8efa75c01d3 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -59,6 +59,7 @@
#include "pipe/p_state.h"
#include "util/u_format.h"
+#include "util/u_cpu_detect.h"
#include "gallivm/lp_bld_type.h"
#include "gallivm/lp_bld_arit.h"
@@ -102,7 +103,16 @@ lp_build_stencil_test_single(struct lp_build_context *bld,
struct lp_type type = bld->type;
LLVMValueRef res;
- assert(type.sign);
+ /*
+ * SSE2 has intrinsics for signed comparisons, but not unsigned ones. Values
+ * are between 0..255 so ensure we generate the fastest comparisons for
+ * wider elements.
+ */
+ if (type.width <= 8) {
+ assert(!type.sign);
+ } else {
+ assert(type.sign);
+ }
assert(stencil->enabled);
@@ -424,29 +434,86 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
LLVMBuilderRef builder = gallivm->builder;
LLVMContextRef context = gallivm->context;
LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1);
- LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
- LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8TypeInContext(context), 16);
- LLVMValueRef counti = LLVMBuildBitCast(builder, countv, i8v16, "counti");
- LLVMValueRef maskarray[4] = {
- lp_build_const_int32(gallivm, 0),
- lp_build_const_int32(gallivm, 4),
- lp_build_const_int32(gallivm, 8),
- lp_build_const_int32(gallivm, 12)
- };
- LLVMValueRef shufflemask = LLVMConstVector(maskarray, 4);
- LLVMValueRef shufflev = LLVMBuildShuffleVector(builder, counti, LLVMGetUndef(i8v16), shufflemask, "shufflev");
- LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32TypeInContext(context), "shuffle");
- LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32TypeInContext(context), shuffle);
- LLVMValueRef orig = LLVMBuildLoad(builder, counter, "orig");
- LLVMValueRef incr = LLVMBuildAdd(builder, orig, count, "incr");
- LLVMBuildStore(builder, incr, counter);
+ LLVMValueRef count, newcount;
+
+ assert(type.length <= 16);
+ assert(type.floating);
+
+ if(util_cpu_caps.has_sse && type.length == 4) {
+ const char *movmskintr = "llvm.x86.sse.movmsk.ps";
+ const char *popcntintr = "llvm.ctpop.i32";
+ LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
+ lp_build_vec_type(gallivm, type), "");
+ bits = lp_build_intrinsic_unary(builder, movmskintr,
+ LLVMInt32TypeInContext(context), bits);
+ count = lp_build_intrinsic_unary(builder, popcntintr,
+ LLVMInt32TypeInContext(context), bits);
+ }
+ else if(util_cpu_caps.has_avx && type.length == 8) {
+ const char *movmskintr = "llvm.x86.avx.movmsk.ps.256";
+ const char *popcntintr = "llvm.ctpop.i32";
+ LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
+ lp_build_vec_type(gallivm, type), "");
+ bits = lp_build_intrinsic_unary(builder, movmskintr,
+ LLVMInt32TypeInContext(context), bits);
+ count = lp_build_intrinsic_unary(builder, popcntintr,
+ LLVMInt32TypeInContext(context), bits);
+ }
+ else {
+ unsigned i;
+ LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
+ LLVMTypeRef counttype = LLVMIntTypeInContext(context, type.length * 8);
+ LLVMTypeRef i8vntype = LLVMVectorType(LLVMInt8TypeInContext(context), type.length * 4);
+ LLVMValueRef shufflev, countd;
+ LLVMValueRef shuffles[16];
+ const char *popcntintr = NULL;
+
+ countv = LLVMBuildBitCast(builder, countv, i8vntype, "");
+
+ for (i = 0; i < type.length; i++) {
+ shuffles[i] = lp_build_const_int32(gallivm, 4*i);
+ }
+
+ shufflev = LLVMConstVector(shuffles, type.length);
+ countd = LLVMBuildShuffleVector(builder, countv, LLVMGetUndef(i8vntype), shufflev, "");
+ countd = LLVMBuildBitCast(builder, countd, counttype, "countd");
+
+ /*
+ * XXX FIXME
+ * this is bad on cpus without popcount (on x86 supported by intel
+ * nehalem, amd barcelona, and up - not tied to sse42).
+ * Would be much faster to just sum the 4 elements of the vector with
+ * some horizontal add (shuffle/add/shuffle/add after the initial and).
+ */
+ switch (type.length) {
+ case 4:
+ popcntintr = "llvm.ctpop.i32";
+ break;
+ case 8:
+ popcntintr = "llvm.ctpop.i64";
+ break;
+ case 16:
+ popcntintr = "llvm.ctpop.i128";
+ break;
+ default:
+ assert(0);
+ }
+ count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd);
+
+ if (type.length > 4) {
+ count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 32), "");
+ }
+ }
+ newcount = LLVMBuildLoad(builder, counter, "origcount");
+ newcount = LLVMBuildAdd(builder, newcount, count, "newcount");
+ LLVMBuildStore(builder, newcount, counter);
}
/**
* Generate code for performing depth and/or stencil tests.
- * We operate on a vector of values (typically a 2x2 quad).
+ * We operate on a vector of values (typically n 2x2 quads).
*
* \param depth the depth test state
* \param stencil the front/back stencil state
@@ -454,9 +521,9 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
* \param format_desc description of the depth/stencil surface
* \param mask the alive/dead pixel mask for the quad (vector)
* \param stencil_refs the front/back stencil ref values (scalar)
- * \param z_src the incoming depth/stencil values (a 2x2 quad, float32)
+ * \param z_src the incoming depth/stencil values (n 2x2 quad values, float32)
* \param zs_dst_ptr pointer to depth/stencil values in framebuffer
- * \param facing contains boolean value indicating front/back facing polygon
+ * \param face contains boolean value indicating front/back facing polygon
*/
void
lp_build_depth_stencil_test(struct gallivm_state *gallivm,
@@ -507,6 +574,12 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
assert(z_type.width == z_src_type.width);
assert(z_type.length == z_src_type.length);
+ /* FIXME: for non-float depth/stencil might generate better code
+ * if we'd always split it up to use 128bit operations.
+ * For stencil we'd almost certainly want to pack to 8xi16 values,
+ * for z just run twice.
+ */
+
/* Sanity checking */
{
const unsigned z_swizzle = format_desc->swizzle[0];
@@ -548,7 +621,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
lp_build_context_init(&z_bld, gallivm, z_type);
/* Setup build context for stencil vals */
- s_type = lp_type_int_vec(z_type.width);
+ s_type = lp_int_type(z_type);
lp_build_context_init(&s_bld, gallivm, s_type);
/* Load current z/stencil value from z/stencil buffer */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index 0d51ccb0349..d108f35f719 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -61,6 +61,9 @@
* # | # | #
* #################
*
+ * If we iterate over multiple quads at once, quads 01 and 23 are processed
+ * together.
+ *
* Within each quad, we have four pixels which are represented in SOA
* order:
*
@@ -72,6 +75,10 @@
*
* So the green channel (for example) of the four pixels is stored in
* a single vector register: {g0, g1, g2, g3}.
+ * The order stays the same even with multiple quads:
+ * 0 1 4 5
+ * 2 3 6 7
+ * is stored as g0..g7
*/
@@ -102,8 +109,8 @@
#define PERSPECTIVE_DIVIDE_PER_QUAD 0
-static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
-static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
+static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};
+static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};
static void
@@ -115,132 +122,353 @@ attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix
lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
}
-
-/**
- * Initialize the bld->a0, dadx, dady fields. This involves fetching
- * those values from the arrays which are passed into the JIT function.
+/* Much easier, and significantly less instructions in the per-stamp
+ * part (less than half) but overall more instructions so a loss if
+ * most quads are active. Might be a win though with larger vectors.
+ * No ability to do per-quad divide (doable but not implemented)
+ * Could be made to work with passed in pixel offsets (i.e. active quad merging).
*/
static void
-coeffs_init(struct lp_build_interp_soa_context *bld,
- LLVMValueRef a0_ptr,
- LLVMValueRef dadx_ptr,
- LLVMValueRef dady_ptr)
+coeffs_init_simple(struct lp_build_interp_soa_context *bld,
+ LLVMValueRef a0_ptr,
+ LLVMValueRef dadx_ptr,
+ LLVMValueRef dady_ptr)
{
struct lp_build_context *coeff_bld = &bld->coeff_bld;
+ struct lp_build_context *setup_bld = &bld->setup_bld;
struct gallivm_state *gallivm = coeff_bld->gallivm;
LLVMBuilderRef builder = gallivm->builder;
- LLVMValueRef zero = LLVMConstNull(coeff_bld->elem_type);
- LLVMValueRef one = LLVMConstReal(coeff_bld->elem_type, 1.0);
- LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
- LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
- LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
- LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
unsigned attrib;
- unsigned chan;
-
- /* TODO: Use more vector operations */
for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
+ /*
+ * always fetch all 4 values for performance/simplicity
+ * Note: we do that here because it seems to generate better
+ * code. It generates a lot of moves initially but less
+ * moves later. As far as I can tell this looks like a
+ * llvm issue, instead of simply reloading the values from
+ * the passed in pointers it if it runs out of registers
+ * it spills/reloads them. Maybe some optimization passes
+ * would help.
+ * Might want to investigate this again later.
+ */
+ const unsigned interp = bld->interp[attrib];
+ LLVMValueRef index = lp_build_const_int32(gallivm,
+ attrib * TGSI_NUM_CHANNELS);
+ LLVMValueRef ptr;
+ LLVMValueRef dadxaos = setup_bld->zero;
+ LLVMValueRef dadyaos = setup_bld->zero;
+ LLVMValueRef a0aos = setup_bld->zero;
+
+ switch (interp) {
+ case LP_INTERP_PERSPECTIVE:
+ /* fall-through */
+
+ case LP_INTERP_LINEAR:
+ ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
+ ptr = LLVMBuildBitCast(builder, ptr,
+ LLVMPointerType(setup_bld->vec_type, 0), "");
+ dadxaos = LLVMBuildLoad(builder, ptr, "");
+
+ ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
+ ptr = LLVMBuildBitCast(builder, ptr,
+ LLVMPointerType(setup_bld->vec_type, 0), "");
+ dadyaos = LLVMBuildLoad(builder, ptr, "");
+
+ attrib_name(dadxaos, attrib, 0, ".dadxaos");
+ attrib_name(dadyaos, attrib, 0, ".dadyaos");
+ /* fall-through */
+
+ case LP_INTERP_CONSTANT:
+ case LP_INTERP_FACING:
+ ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
+ ptr = LLVMBuildBitCast(builder, ptr,
+ LLVMPointerType(setup_bld->vec_type, 0), "");
+ a0aos = LLVMBuildLoad(builder, ptr, "");
+ attrib_name(a0aos, attrib, 0, ".a0aos");
+ break;
+
+ case LP_INTERP_POSITION:
+ /* Nothing to do as the position coeffs are already setup in slot 0 */
+ continue;
+
+ default:
+ assert(0);
+ break;
+ }
+ bld->a0aos[attrib] = a0aos;
+ bld->dadxaos[attrib] = dadxaos;
+ bld->dadyaos[attrib] = dadyaos;
+ }
+}
+
+/**
+ * Interpolate the shader input attribute values.
+ * This is called for each (group of) quad(s).
+ */
+static void
+attribs_update_simple(struct lp_build_interp_soa_context *bld,
+ struct gallivm_state *gallivm,
+ int quad_start_index,
+ int start,
+ int end)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_build_context *coeff_bld = &bld->coeff_bld;
+ struct lp_build_context *setup_bld = &bld->setup_bld;
+ LLVMValueRef oow = NULL;
+ unsigned attrib, i;
+ LLVMValueRef pixoffx;
+ LLVMValueRef pixoffy;
+ unsigned num_pix = coeff_bld->type.length;
+
+ /* could do this with code-generated passed in pixel offsets */
+ pixoffx = coeff_bld->undef;
+ pixoffy = coeff_bld->undef;
+ for (i = 0; i < coeff_bld->type.length; i++) {
+ LLVMValueRef nr = lp_build_const_int32(gallivm, i);
+ LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
+ (quad_start_index & 1) * 2);
+ LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
+ (quad_start_index & 2));
+ pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, "");
+ pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, "");
+ }
+
+ pixoffx = LLVMBuildFAdd(builder, pixoffx,
+ lp_build_broadcast_scalar(coeff_bld, bld->x), "");
+ pixoffy = LLVMBuildFAdd(builder, pixoffy,
+ lp_build_broadcast_scalar(coeff_bld, bld->y), "");
+
+ for (attrib = start; attrib < end; attrib++) {
const unsigned mask = bld->mask[attrib];
const unsigned interp = bld->interp[attrib];
- for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+ unsigned chan;
+
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
if (mask & (1 << chan)) {
- LLVMValueRef index = lp_build_const_int32(gallivm,
- attrib * TGSI_NUM_CHANNELS + chan);
- LLVMValueRef a0 = zero;
- LLVMValueRef dadx = zero;
- LLVMValueRef dady = zero;
- LLVMValueRef dadxy = zero;
- LLVMValueRef dadq;
- LLVMValueRef dadq2;
- LLVMValueRef a;
+ LLVMValueRef index;
+ LLVMValueRef dadx = coeff_bld->zero;
+ LLVMValueRef dady = coeff_bld->zero;
+ LLVMValueRef a = coeff_bld->zero;
+ index = lp_build_const_int32(gallivm, chan);
switch (interp) {
case LP_INTERP_PERSPECTIVE:
/* fall-through */
case LP_INTERP_LINEAR:
if (attrib == 0 && chan == 0) {
- dadxy = dadx = one;
+ dadx = coeff_bld->one;
}
else if (attrib == 0 && chan == 1) {
- dadxy = dady = one;
+ dady = coeff_bld->one;
}
else {
- dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
- dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
- dadxy = LLVMBuildFAdd(builder, dadx, dady, "");
- attrib_name(dadx, attrib, chan, ".dadx");
- attrib_name(dady, attrib, chan, ".dady");
- attrib_name(dadxy, attrib, chan, ".dadxy");
+ dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
+ coeff_bld->type, bld->dadxaos[attrib],
+ index);
+ dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
+ coeff_bld->type, bld->dadyaos[attrib],
+ index);
+ a = lp_build_extract_broadcast(gallivm, setup_bld->type,
+ coeff_bld->type, bld->a0aos[attrib],
+ index);
}
- /* fall-through */
+ /*
+ * a = a0 + (x * dadx + y * dady)
+ */
+ dadx = LLVMBuildFMul(builder, dadx, pixoffx, "");
+ dady = LLVMBuildFMul(builder, dady, pixoffy, "");
+ a = LLVMBuildFAdd(builder, a, dadx, "");
+ a = LLVMBuildFAdd(builder, a, dady, "");
+
+ if (interp == LP_INTERP_PERSPECTIVE) {
+ if (oow == NULL) {
+ LLVMValueRef w = bld->attribs[0][3];
+ assert(attrib != 0);
+ assert(bld->mask[0] & TGSI_WRITEMASK_W);
+ oow = lp_build_rcp(coeff_bld, w);
+ }
+ a = lp_build_mul(coeff_bld, a, oow);
+ }
+ break;
case LP_INTERP_CONSTANT:
case LP_INTERP_FACING:
- a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), "");
- attrib_name(a0, attrib, chan, ".a0");
+ a = lp_build_extract_broadcast(gallivm, setup_bld->type,
+ coeff_bld->type, bld->a0aos[attrib],
+ index);
break;
case LP_INTERP_POSITION:
- /* Nothing to do as the position coeffs are already setup in slot 0 */
- continue;
+ assert(attrib > 0);
+ a = bld->attribs[0][chan];
+ break;
default:
assert(0);
break;
}
- /*
- * dadq = {0, dadx, dady, dadx + dady}
- */
+ if ((attrib == 0) && (chan == 2)){
+ /* FIXME: Depth values can exceed 1.0, due to the fact that
+ * setup interpolation coefficients refer to (0,0) which causes
+ * precision loss. So we must clamp to 1.0 here to avoid artifacts
+ */
+ a = lp_build_min(coeff_bld, a, coeff_bld->one);
+ }
+ bld->attribs[attrib][chan] = a;
+ }
+ }
+ }
+}
- dadq = coeff_bld->undef;
- dadq = LLVMBuildInsertElement(builder, dadq, zero, i0, "");
- dadq = LLVMBuildInsertElement(builder, dadq, dadx, i1, "");
- dadq = LLVMBuildInsertElement(builder, dadq, dady, i2, "");
- dadq = LLVMBuildInsertElement(builder, dadq, dadxy, i3, "");
+/**
+ * Initialize the bld->a, dadq fields. This involves fetching
+ * those values from the arrays which are passed into the JIT function.
+ */
+static void
+coeffs_init(struct lp_build_interp_soa_context *bld,
+ LLVMValueRef a0_ptr,
+ LLVMValueRef dadx_ptr,
+ LLVMValueRef dady_ptr)
+{
+ struct lp_build_context *coeff_bld = &bld->coeff_bld;
+ struct lp_build_context *setup_bld = &bld->setup_bld;
+ struct gallivm_state *gallivm = coeff_bld->gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef pixoffx, pixoffy;
+ unsigned attrib;
+ unsigned chan;
+ unsigned i;
+
+ pixoffx = coeff_bld->undef;
+ pixoffy = coeff_bld->undef;
+ for (i = 0; i < coeff_bld->type.length; i++) {
+ LLVMValueRef nr = lp_build_const_int32(gallivm, i);
+ LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i]);
+ LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i]);
+ pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, "");
+ pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, "");
+ }
- /*
- * dadq2 = 2 * dq
- */
- dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
+ for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
+ const unsigned mask = bld->mask[attrib];
+ const unsigned interp = bld->interp[attrib];
+ LLVMValueRef index = lp_build_const_int32(gallivm,
+ attrib * TGSI_NUM_CHANNELS);
+ LLVMValueRef ptr;
+ LLVMValueRef dadxaos = setup_bld->zero;
+ LLVMValueRef dadyaos = setup_bld->zero;
+ LLVMValueRef a0aos = setup_bld->zero;
+
+ /* always fetch all 4 values for performance/simplicity */
+ switch (interp) {
+ case LP_INTERP_PERSPECTIVE:
+ /* fall-through */
+
+ case LP_INTERP_LINEAR:
+ ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
+ ptr = LLVMBuildBitCast(builder, ptr,
+ LLVMPointerType(setup_bld->vec_type, 0), "");
+ dadxaos = LLVMBuildLoad(builder, ptr, "");
+
+ ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
+ ptr = LLVMBuildBitCast(builder, ptr,
+ LLVMPointerType(setup_bld->vec_type, 0), "");
+ dadyaos = LLVMBuildLoad(builder, ptr, "");
+
+ attrib_name(dadxaos, attrib, 0, ".dadxaos");
+ attrib_name(dadyaos, attrib, 0, ".dadyaos");
+ /* fall-through */
+
+ case LP_INTERP_CONSTANT:
+ case LP_INTERP_FACING:
+ ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
+ ptr = LLVMBuildBitCast(builder, ptr,
+ LLVMPointerType(setup_bld->vec_type, 0), "");
+ a0aos = LLVMBuildLoad(builder, ptr, "");
+ attrib_name(a0aos, attrib, 0, ".a0aos");
+ break;
+
+ case LP_INTERP_POSITION:
+ /* Nothing to do as the position coeffs are already setup in slot 0 */
+ continue;
+
+ default:
+ assert(0);
+ break;
+ }
- /*
- * a = a0 + (x * dadx + y * dady)
- */
+ /*
+ * a = a0 + (x * dadx + y * dady)
+ * a0aos is the attrib value at top left corner of stamp
+ */
+ if (interp != LP_INTERP_CONSTANT &&
+ interp != LP_INTERP_FACING) {
+ LLVMValueRef axaos, ayaos;
+ axaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->x),
+ dadxaos, "");
+ ayaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->y),
+ dadyaos, "");
+ a0aos = LLVMBuildFAdd(builder, a0aos, ayaos, "");
+ a0aos = LLVMBuildFAdd(builder, a0aos, axaos, "");
+ }
+
+ /*
+ * dadq = {0, dadx, dady, dadx + dady}
+ * for two quads (side by side) this is:
+ * {0, dadx, dady, dadx+dady, 2*dadx, 2*dadx+dady, 3*dadx+dady}
+ */
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+ /* this generates a CRAPLOAD of shuffles... */
+ if (mask & (1 << chan)) {
+ LLVMValueRef dadx, dady;
+ LLVMValueRef dadq, dadq2;
+ LLVMValueRef a;
+ LLVMValueRef chan_index = lp_build_const_int32(gallivm, chan);
if (attrib == 0 && chan == 0) {
- a = bld->x;
+ a = lp_build_broadcast_scalar(coeff_bld, bld->x);
+ dadx = coeff_bld->one;
+ dady = coeff_bld->zero;
}
else if (attrib == 0 && chan == 1) {
- a = bld->y;
+ a = lp_build_broadcast_scalar(coeff_bld, bld->y);
+ dady = coeff_bld->one;
+ dadx = coeff_bld->zero;
}
else {
- a = a0;
- if (interp != LP_INTERP_CONSTANT &&
- interp != LP_INTERP_FACING) {
- LLVMValueRef ax, ay, axy;
- ax = LLVMBuildFMul(builder, bld->x, dadx, "");
- ay = LLVMBuildFMul(builder, bld->y, dady, "");
- axy = LLVMBuildFAdd(builder, ax, ay, "");
- a = LLVMBuildFAdd(builder, a, axy, "");
- }
- }
+ dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
+ coeff_bld->type, dadxaos, chan_index);
+ dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
+ coeff_bld->type, dadyaos, chan_index);
- /*
- * a = {a, a, a, a}
- */
+ /*
+ * a = {a, a, a, a}
+ */
+ a = lp_build_extract_broadcast(gallivm, setup_bld->type,
+ coeff_bld->type, a0aos, chan_index);
+ }
- a = lp_build_broadcast(gallivm, coeff_bld->vec_type, a);
+ dadx = LLVMBuildFMul(builder, dadx, pixoffx, "");
+ dady = LLVMBuildFMul(builder, dady, pixoffy, "");
+ dadq = LLVMBuildFAdd(builder, dadx, dady, "");
/*
- * Compute the attrib values on the upper-left corner of each quad.
+ * Compute the attrib values on the upper-left corner of each
+ * group of quads.
+ * Note that if we process 2 quads at once this doesn't
+ * really exactly to what we want.
+ * We need to access elem 0 and 2 respectively later if we process
+ * 2 quads at once.
*/
if (interp != LP_INTERP_CONSTANT &&
interp != LP_INTERP_FACING) {
+ dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
a = LLVMBuildFAdd(builder, a, dadq2, "");
}
@@ -249,6 +477,12 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
* a *= 1 / w
*/
+ /*
+ * XXX since we're only going to access elements 0,2 out of 8
+ * if we have 8-wide vectors we should do the division only 4-wide.
+ * a is really a 2-elements in a 4-wide vector disguised as 8-wide
+ * in this case.
+ */
if (interp == LP_INTERP_PERSPECTIVE) {
LLVMValueRef w = bld->a[0][3];
assert(attrib != 0);
@@ -279,18 +513,18 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
static void
attribs_update(struct lp_build_interp_soa_context *bld,
struct gallivm_state *gallivm,
- int quad_index,
+ int quad_start_index,
int start,
int end)
{
LLVMBuilderRef builder = gallivm->builder;
struct lp_build_context *coeff_bld = &bld->coeff_bld;
- LLVMValueRef shuffle = lp_build_const_int_vec(gallivm, coeff_bld->type, quad_index);
+ LLVMValueRef shuffle = lp_build_const_int_vec(gallivm, coeff_bld->type, quad_start_index);
LLVMValueRef oow = NULL;
unsigned attrib;
unsigned chan;
- assert(quad_index < 4);
+ assert(quad_start_index < 4);
for(attrib = start; attrib < end; ++attrib) {
const unsigned mask = bld->mask[attrib];
@@ -412,6 +646,7 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
LLVMValueRef y0)
{
struct lp_type coeff_type;
+ struct lp_type setup_type;
unsigned attrib;
unsigned chan;
@@ -421,19 +656,26 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
coeff_type.floating = TRUE;
coeff_type.sign = TRUE;
coeff_type.width = 32;
- coeff_type.length = TGSI_QUAD_SIZE;
+ coeff_type.length = type.length;
+
+ memset(&setup_type, 0, sizeof setup_type);
+ setup_type.floating = TRUE;
+ setup_type.sign = TRUE;
+ setup_type.width = 32;
+ setup_type.length = TGSI_NUM_CHANNELS;
+
/* XXX: we don't support interpolating into any other types */
assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
+ lp_build_context_init(&bld->setup_bld, gallivm, setup_type);
/* For convenience */
bld->pos = bld->attribs[0];
bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1];
/* Position */
- bld->num_attribs = 1;
bld->mask[0] = TGSI_WRITEMASK_XYZW;
bld->interp[0] = LP_INTERP_LINEAR;
@@ -453,7 +695,12 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
pos_init(bld, x0, y0);
- coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
+ if (coeff_type.length > 4) {
+ coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);
+ }
+ else {
+ coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
+ }
}
@@ -463,20 +710,30 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
void
lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
struct gallivm_state *gallivm,
- int quad_index)
+ int quad_start_index)
{
- assert(quad_index < 4);
+ assert(quad_start_index < 4);
- attribs_update(bld, gallivm, quad_index, 1, bld->num_attribs);
+ if (bld->coeff_bld.type.length > 4) {
+ attribs_update_simple(bld, gallivm, quad_start_index, 1, bld->num_attribs);
+ }
+ else {
+ attribs_update(bld, gallivm, quad_start_index, 1, bld->num_attribs);
+ }
}
void
lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
struct gallivm_state *gallivm,
- int quad_index)
+ int quad_start_index)
{
- assert(quad_index < 4);
+ assert(quad_start_index < 4);
- attribs_update(bld, gallivm, quad_index, 0, 1);
+ if (bld->coeff_bld.type.length > 4) {
+ attribs_update_simple(bld, gallivm, quad_start_index, 0, 1);
+ }
+ else {
+ attribs_update(bld, gallivm, quad_start_index, 0, 1);
+ }
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
index 6970a9b8c2c..f293b582318 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -79,6 +79,7 @@ struct lp_build_interp_soa_context
{
/* TGSI_QUAD_SIZE x float */
struct lp_build_context coeff_bld;
+ struct lp_build_context setup_bld;
unsigned num_attribs;
unsigned mask[1 + PIPE_MAX_SHADER_INPUTS]; /**< TGSI_WRITE_MASK_x */
@@ -87,8 +88,11 @@ struct lp_build_interp_soa_context
LLVMValueRef x;
LLVMValueRef y;
- LLVMValueRef a [1 + PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
+ LLVMValueRef a[1 + PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
LLVMValueRef dadq[1 + PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
+ LLVMValueRef a0aos[1 + PIPE_MAX_SHADER_INPUTS];
+ LLVMValueRef dadxaos[1 + PIPE_MAX_SHADER_INPUTS];
+ LLVMValueRef dadyaos[1 + PIPE_MAX_SHADER_INPUTS];
LLVMValueRef oow;
@@ -118,12 +122,12 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
void
lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
struct gallivm_state *gallivm,
- int quad_index);
+ int quad_start_index);
void
lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
struct gallivm_state *gallivm,
- int quad_index);
+ int quad__start_index);
#endif /* LP_BLD_INTERP_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 9e4c7d6734e..07cea9158c3 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -51,42 +51,6 @@
unsigned llvmpipe_variant_count;
-/**
- * This function is called by the gallivm "garbage collector" when
- * the LLVM global data structures are freed. We must free all LLVM-related
- * data. Specifically, all JIT'd shader variants.
- */
-static void
-garbage_collect_callback(void *cb_data)
-{
- struct llvmpipe_context *lp = (struct llvmpipe_context *) cb_data;
- struct lp_fs_variant_list_item *li;
-
- /* Free all the context's shader variants */
- li = first_elem(&lp->fs_variants_list);
- while (!at_end(&lp->fs_variants_list, li)) {
- struct lp_fs_variant_list_item *next = next_elem(li);
- llvmpipe_remove_shader_variant(lp, li->base);
- li = next;
- }
-
- /* Free all the context's primitive setup variants */
- lp_delete_setup_variants(lp);
-
- /* release references to setup variants, shaders */
- lp_setup_set_setup_variant(lp->setup, NULL);
- lp_setup_set_fs_variant(lp->setup, NULL);
- lp_setup_reset(lp->setup);
-
- /* This type will be recreated upon demand */
- lp->jit_context_ptr_type = NULL;
-
- /* mark all state as dirty to ensure new shaders are jit'd, etc. */
- lp->dirty = ~0;
-}
-
-
-
static void llvmpipe_destroy( struct pipe_context *pipe )
{
struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
@@ -94,9 +58,6 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
lp_print_counters();
- gallivm_remove_garbage_collector_callback(garbage_collect_callback,
- llvmpipe);
-
/* This will also destroy llvmpipe->setup:
*/
if (llvmpipe->draw)
@@ -128,8 +89,6 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
lp_delete_setup_variants(llvmpipe);
- gallivm_destroy(llvmpipe->gallivm);
-
align_free( llvmpipe );
}
@@ -195,12 +154,10 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
llvmpipe_init_context_resource_funcs( &llvmpipe->pipe );
llvmpipe_init_surface_functions(llvmpipe);
- llvmpipe->gallivm = gallivm_create();
-
/*
* Create drawing context and plug our rendering stage into it.
*/
- llvmpipe->draw = draw_create_gallivm(&llvmpipe->pipe, llvmpipe->gallivm);
+ llvmpipe->draw = draw_create(&llvmpipe->pipe);
if (!llvmpipe->draw)
goto fail;
@@ -226,9 +183,6 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
lp_reset_counters();
- gallivm_register_garbage_collector_callback(garbage_collect_callback,
- llvmpipe);
-
return &llvmpipe->pipe;
fail:
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index d4750705b43..d0220e188cf 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -131,10 +131,6 @@ struct llvmpipe_context {
unsigned nr_fs_variants;
unsigned nr_fs_instrs;
- /** JIT code generation */
- struct gallivm_state *gallivm;
- LLVMTypeRef jit_context_ptr_type;
-
struct lp_setup_variant_list_item setup_variants_list;
unsigned nr_setup_variants;
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c
index 42430550ea6..964b792b739 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.c
+++ b/src/gallium/drivers/llvmpipe/lp_flush.c
@@ -54,13 +54,6 @@ llvmpipe_flush( struct pipe_context *pipe,
/* ask the setup module to flush */
lp_setup_flush(llvmpipe->setup, fence, reason);
-
- if (llvmpipe_variant_count > 1000) {
- /* time to do a garbage collection */
- gallivm_garbage_collect(llvmpipe->gallivm);
- llvmpipe_variant_count = 0;
- }
-
/* Enable to dump BMPs of the color/depth buffers each frame */
if (0) {
static unsigned frame_no = 1;
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index eb1db84e4b8..7a85eab41a0 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -41,7 +41,7 @@
static void
-lp_jit_create_types(struct llvmpipe_context *lp)
+lp_jit_create_types(struct lp_fragment_shader_variant *lp)
{
struct gallivm_state *gallivm = lp->gallivm;
LLVMContextRef lc = gallivm->context;
@@ -183,11 +183,9 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
}
-LLVMTypeRef
-lp_jit_get_context_type(struct llvmpipe_context *lp)
+void
+lp_jit_init_types(struct lp_fragment_shader_variant *lp)
{
if (!lp->jit_context_ptr_type)
lp_jit_create_types(lp);
-
- return lp->jit_context_ptr_type;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index 04e8dd5267b..584d2c8fd81 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -42,6 +42,7 @@
#include "lp_texture.h"
+struct lp_fragment_shader_variant;
struct llvmpipe_screen;
@@ -164,8 +165,8 @@ void
lp_jit_screen_init(struct llvmpipe_screen *screen);
-LLVMTypeRef
-lp_jit_get_context_type(struct llvmpipe_context *lp);
+void
+lp_jit_init_types(struct lp_fragment_shader_variant *lp);
#endif /* LP_JIT_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_memory.c b/src/gallium/drivers/llvmpipe/lp_memory.c
index 0f55d4a80ae..85f73e54ac4 100644
--- a/src/gallium/drivers/llvmpipe/lp_memory.c
+++ b/src/gallium/drivers/llvmpipe/lp_memory.c
@@ -36,10 +36,12 @@
* number of threads or using a smaller tilesize when multiple
* colorbuffers are bound.
*/
-PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4];
+PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN)
+uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4];
/* A single dummy tile used in a couple of out-of-memory situations.
*/
-PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
+PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN)
+uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
diff --git a/src/gallium/drivers/llvmpipe/lp_memory.h b/src/gallium/drivers/llvmpipe/lp_memory.h
index f7418f5e087..5552c2908e1 100644
--- a/src/gallium/drivers/llvmpipe/lp_memory.h
+++ b/src/gallium/drivers/llvmpipe/lp_memory.h
@@ -32,9 +32,12 @@
#include "pipe/p_compiler.h"
#include "pipe/p_state.h"
#include "lp_limits.h"
+#include "gallivm/lp_bld_type.h"
-extern PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4];
+extern PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN)
+uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4];
-extern PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
+extern PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN)
+uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
#endif /* LP_MEMORY_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index 09af0274d7a..d743d7689ae 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -42,6 +42,7 @@
#include "lp_tile_soa.h"
#include "gallivm/lp_bld_debug.h"
#include "lp_scene.h"
+#include "lp_tex_sample.h"
#ifdef DEBUG
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 03d15f6e2b0..54f45357fdc 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -97,56 +97,56 @@
#include "lp_state_fs.h"
-#include <llvm-c/Analysis.h>
-#include <llvm-c/BitWriter.h>
-
-
/** Fragment shader number (for debugging) */
static unsigned fs_no = 0;
/**
- * Expand the relevent bits of mask_input to a 4-dword mask for the
- * four pixels in a 2x2 quad. This will set the four elements of the
+ * Expand the relevant bits of mask_input to a n*4-dword mask for the
+ * n*four pixels in n 2x2 quads. This will set the n*four elements of the
* quad mask vector to 0 or ~0.
+ * Grouping is 01, 23 for 2 quad mode hence only 0 and 2 are valid
+ * quad arguments with fs length 8.
*
- * \param quad which quad of the quad group to test, in [0,3]
+ * \param first_quad which quad(s) of the quad group to test, in [0,3]
* \param mask_input bitwise mask for the whole 4x4 stamp
*/
static LLVMValueRef
generate_quad_mask(struct gallivm_state *gallivm,
struct lp_type fs_type,
- unsigned quad,
+ unsigned first_quad,
LLVMValueRef mask_input) /* int32 */
{
LLVMBuilderRef builder = gallivm->builder;
struct lp_type mask_type;
LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
- LLVMValueRef bits[4];
+ LLVMValueRef bits[16];
LLVMValueRef mask;
- int shift;
+ int shift, i;
/*
* XXX: We'll need a different path for 16 x u8
*/
assert(fs_type.width == 32);
- assert(fs_type.length == 4);
+ assert(fs_type.length <= Elements(bits));
mask_type = lp_int_type(fs_type);
/*
* mask_input >>= (quad * 4)
*/
- switch (quad) {
+ switch (first_quad) {
case 0:
shift = 0;
break;
case 1:
+ assert(fs_type.length == 4);
shift = 2;
break;
case 2:
shift = 8;
break;
case 3:
+ assert(fs_type.length == 4);
shift = 10;
break;
default:
@@ -166,12 +166,14 @@ generate_quad_mask(struct gallivm_state *gallivm,
lp_build_vec_type(gallivm, mask_type),
mask_input);
- bits[0] = LLVMConstInt(i32t, 1 << 0, 0);
- bits[1] = LLVMConstInt(i32t, 1 << 1, 0);
- bits[2] = LLVMConstInt(i32t, 1 << 4, 0);
- bits[3] = LLVMConstInt(i32t, 1 << 5, 0);
-
- mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, 4), "");
+ for (i = 0; i < fs_type.length / 4; i++) {
+ unsigned j = 2 * (i % 2) + (i / 2) * 8;
+ bits[4*i + 0] = LLVMConstInt(i32t, 1 << (j + 0), 0);
+ bits[4*i + 1] = LLVMConstInt(i32t, 1 << (j + 1), 0);
+ bits[4*i + 2] = LLVMConstInt(i32t, 1 << (j + 4), 0);
+ bits[4*i + 3] = LLVMConstInt(i32t, 1 << (j + 5), 0);
+ }
+ mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, fs_type.length), "");
/*
* mask = mask != 0 ? ~0 : 0
@@ -300,7 +302,7 @@ generate_fs(struct gallivm_state *gallivm,
/* do triangle edge testing */
if (partial_mask) {
*pmask = generate_quad_mask(gallivm, type,
- i, mask_input);
+ i*type.length/4, mask_input);
}
else {
*pmask = lp_build_const_int_vec(gallivm, type, ~0);
@@ -312,7 +314,7 @@ generate_fs(struct gallivm_state *gallivm,
if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader)
lp_build_mask_check(&mask);
- lp_build_interp_soa_update_pos(interp, gallivm, i);
+ lp_build_interp_soa_update_pos(interp, gallivm, i*type.length/4);
z = interp->pos[2];
if (depth_mode & EARLY_DEPTH_TEST) {
@@ -333,7 +335,7 @@ generate_fs(struct gallivm_state *gallivm,
}
}
- lp_build_interp_soa_update_inputs(interp, gallivm, i);
+ lp_build_interp_soa_update_inputs(interp, gallivm, i*type.length/4);
/* Build the actual shader */
lp_build_tgsi_soa(gallivm, tokens, type, &mask,
@@ -515,7 +517,7 @@ generate_fragment(struct llvmpipe_context *lp,
struct lp_fragment_shader_variant *variant,
unsigned partial_mask)
{
- struct gallivm_state *gallivm = lp->gallivm;
+ struct gallivm_state *gallivm = variant->gallivm;
const struct lp_fragment_shader_variant_key *key = &variant->key;
struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
char func_name[256];
@@ -541,8 +543,8 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMBuilderRef builder;
struct lp_build_sampler_soa *sampler;
struct lp_build_interp_soa_context interp;
- LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH];
- LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef fs_mask[16 / 4];
+ LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4];
LLVMValueRef blend_mask;
LLVMValueRef function;
LLVMValueRef facing;
@@ -553,6 +555,8 @@ generate_fragment(struct llvmpipe_context *lp,
unsigned cbuf;
boolean cbuf0_write_all;
+ assert(lp_native_vector_width / 32 >= 4);
+
/* Adjust color input interpolation according to flatshade state:
*/
memcpy(inputs, shader->inputs, shader->info.base.num_inputs * sizeof inputs[0]);
@@ -579,12 +583,12 @@ generate_fragment(struct llvmpipe_context *lp,
* characteristics. */
memset(&fs_type, 0, sizeof fs_type);
- fs_type.floating = TRUE; /* floating point values */
- fs_type.sign = TRUE; /* values are signed */
- fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */
- fs_type.width = 32; /* 32-bit float */
- fs_type.length = 4; /* 4 elements per vector */
- num_fs = 4; /* number of quads per block */
+ fs_type.floating = TRUE; /* floating point values */
+ fs_type.sign = TRUE; /* values are signed */
+ fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */
+ fs_type.width = 32; /* 32-bit float */
+ fs_type.length = MIN2(lp_native_vector_width / 32, 16); /* n*4 elements per vector */
+ num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */
memset(&blend_type, 0, sizeof blend_type);
blend_type.floating = FALSE; /* values are integers */
@@ -605,7 +609,7 @@ generate_fragment(struct llvmpipe_context *lp,
util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s",
shader->no, variant->no, partial_mask ? "partial" : "whole");
- arg_types[0] = lp_jit_get_context_type(lp); /* context */
+ arg_types[0] = variant->jit_context_ptr_type; /* context */
arg_types[1] = int32_type; /* x */
arg_types[2] = int32_type; /* y */
arg_types[3] = int32_type; /* facing */
@@ -738,20 +742,20 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMBuildLoad(builder, fs_out_color[cbuf][chan][i], "fs_color_vals");
}
- lp_build_conv(gallivm, fs_type, blend_type,
+ lp_build_conv(gallivm, fs_type, blend_type,
fs_color_vals,
num_fs,
- &blend_in_color[chan], 1);
+ &blend_in_color[chan], 1);
- lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]);
+ lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]);
}
if (partial_mask || !variant->opaque) {
- lp_build_conv_mask(lp->gallivm, fs_type, blend_type,
+ lp_build_conv_mask(variant->gallivm, fs_type, blend_type,
fs_mask, num_fs,
&blend_mask, 1);
} else {
- blend_mask = lp_build_const_int_vec(lp->gallivm, blend_type, ~0);
+ blend_mask = lp_build_const_int_vec(variant->gallivm, blend_type, ~0);
}
color_ptr = LLVMBuildLoad(builder,
@@ -772,7 +776,7 @@ generate_fragment(struct llvmpipe_context *lp,
!key->alpha.enabled &&
!shader->info.base.uses_kill);
- generate_blend(lp->gallivm,
+ generate_blend(variant->gallivm,
&key->blend,
rt,
builder,
@@ -787,43 +791,9 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMBuildRetVoid(builder);
- /* Verify the LLVM IR. If invalid, dump and abort */
-#ifdef DEBUG
- if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) {
- if (1)
- lp_debug_dump_value(function);
- abort();
- }
-#endif
-
- /* Apply optimizations to LLVM IR */
- LLVMRunFunctionPassManager(gallivm->passmgr, function);
-
- if ((gallivm_debug & GALLIVM_DEBUG_IR) || (LP_DEBUG & DEBUG_FS)) {
- /* Print the LLVM IR to stderr */
- lp_debug_dump_value(function);
- debug_printf("\n");
- }
-
- /* Dump byte code to a file */
- if (0) {
- LLVMWriteBitcodeToFile(gallivm->module, "llvmpipe.bc");
- }
+ gallivm_verify_function(gallivm, function);
variant->nr_instrs += lp_build_count_instructions(function);
- /*
- * Translate the LLVM IR into machine code.
- */
- {
- void *f = LLVMGetPointerToGlobal(gallivm->engine, function);
-
- variant->jit_function[partial_mask] = (lp_jit_frag_func)pointer_to_func(f);
-
- if ((gallivm_debug & GALLIVM_DEBUG_ASM) || (LP_DEBUG & DEBUG_FS)) {
- lp_disassemble(f);
- }
- lp_func_delete_body(function);
- }
}
@@ -937,6 +907,12 @@ generate_variant(struct llvmpipe_context *lp,
if(!variant)
return NULL;
+ variant->gallivm = gallivm_create();
+ if (!variant->gallivm) {
+ FREE(variant);
+ return NULL;
+ }
+
variant->shader = shader;
variant->list_item_global.base = variant;
variant->list_item_local.base = variant;
@@ -968,12 +944,35 @@ generate_variant(struct llvmpipe_context *lp,
lp_debug_fs_variant(variant);
}
- generate_fragment(lp, shader, variant, RAST_EDGE_TEST);
+ lp_jit_init_types(variant);
+
+ if (variant->jit_function[RAST_EDGE_TEST] == NULL)
+ generate_fragment(lp, shader, variant, RAST_EDGE_TEST);
+
+ if (variant->jit_function[RAST_WHOLE] == NULL) {
+ if (variant->opaque) {
+ /* Specialized shader, which doesn't need to read the color buffer. */
+ generate_fragment(lp, shader, variant, RAST_WHOLE);
+ }
+ }
+
+ /*
+ * Compile everything
+ */
+
+ gallivm_compile_module(variant->gallivm);
+
+ if (variant->function[RAST_EDGE_TEST]) {
+ variant->jit_function[RAST_EDGE_TEST] = (lp_jit_frag_func)
+ gallivm_jit_function(variant->gallivm,
+ variant->function[RAST_EDGE_TEST]);
+ }
- if (variant->opaque) {
- /* Specialized shader, which doesn't need to read the color buffer. */
- generate_fragment(lp, shader, variant, RAST_WHOLE);
- } else {
+ if (variant->function[RAST_WHOLE]) {
+ variant->jit_function[RAST_WHOLE] = (lp_jit_frag_func)
+ gallivm_jit_function(variant->gallivm,
+ variant->function[RAST_WHOLE]);
+ } else if (!variant->jit_function[RAST_WHOLE]) {
variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST];
}
@@ -1116,13 +1115,14 @@ llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
/* free all the variant's JIT'd functions */
for (i = 0; i < Elements(variant->function); i++) {
if (variant->function[i]) {
- if (variant->jit_function[i])
- LLVMFreeMachineCodeForFunction(lp->gallivm->engine,
- variant->function[i]);
- LLVMDeleteFunction(variant->function[i]);
+ gallivm_free_function(variant->gallivm,
+ variant->function[i],
+ variant->jit_function[i]);
}
}
+ gallivm_destroy(variant->gallivm);
+
/* remove from shader's list */
remove_from_list(&variant->list_item_local);
variant->shader->variants_cached--;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h
index 273d241d8fc..306f5f9669a 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.h
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h
@@ -84,6 +84,12 @@ struct lp_fragment_shader_variant
boolean opaque;
+ struct gallivm_state *gallivm;
+
+ LLVMTypeRef jit_context_ptr_type;
+ LLVMTypeRef jit_thread_data_ptr_type;
+ LLVMTypeRef jit_linear_context_ptr_type;
+
LLVMValueRef function[2];
lp_jit_frag_func jit_function[2];
diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c
index 299c1ef85dc..1d5e50be9b7 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -38,7 +38,6 @@
#include "gallivm/lp_bld_intr.h"
#include "gallivm/lp_bld_flow.h"
#include "gallivm/lp_bld_type.h"
-#include <llvm-c/Analysis.h> /* for LLVMVerifyFunction */
#include "lp_perf.h"
#include "lp_debug.h"
@@ -77,12 +76,6 @@ struct lp_setup_args
LLVMValueRef dy01_ooa;
LLVMValueRef dx20_ooa;
LLVMValueRef dx01_ooa;
-
- /* Temporary, per-attribute:
- */
- LLVMValueRef v0a;
- LLVMValueRef v1a;
- LLVMValueRef v2a;
};
@@ -146,7 +139,7 @@ store_coef(struct gallivm_state *gallivm,
{
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef idx = lp_build_const_int32(gallivm, slot);
-
+
LLVMBuildStore(builder,
a0,
LLVMBuildGEP(builder, args->a0, &idx, 1, ""));
@@ -210,27 +203,13 @@ vert_attrib(struct gallivm_state *gallivm,
return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name);
}
-static LLVMValueRef
-vert_clamp(LLVMBuilderRef b,
- LLVMValueRef x,
- LLVMValueRef min,
- LLVMValueRef max)
-{
- LLVMValueRef min_result = LLVMBuildFCmp(b, LLVMRealUGT, min, x, "");
- LLVMValueRef max_result = LLVMBuildFCmp(b, LLVMRealUGT, x, max, "");
- LLVMValueRef clamp_value;
-
- clamp_value = LLVMBuildSelect(b, min_result, min, x, "");
- clamp_value = LLVMBuildSelect(b, max_result, max, x, "");
-
- return clamp_value;
-}
static void
lp_twoside(struct gallivm_state *gallivm,
struct lp_setup_args *args,
const struct lp_setup_variant_key *key,
- int bcolor_slot)
+ int bcolor_slot,
+ LLVMValueRef attribv[3])
{
LLVMBuilderRef b = gallivm->builder;
LLVMValueRef a0_back, a1_back, a2_back;
@@ -248,67 +227,66 @@ lp_twoside(struct gallivm_state *gallivm,
* Prefer select to if so we don't have to worry about phis or
* allocas.
*/
- args->v0a = LLVMBuildSelect(b, front_facing, a0_back, args->v0a, "");
- args->v1a = LLVMBuildSelect(b, front_facing, a1_back, args->v1a, "");
- args->v2a = LLVMBuildSelect(b, front_facing, a2_back, args->v2a, "");
+ attribv[0] = LLVMBuildSelect(b, front_facing, a0_back, attribv[0], "");
+ attribv[1] = LLVMBuildSelect(b, front_facing, a1_back, attribv[1], "");
+ attribv[2] = LLVMBuildSelect(b, front_facing, a2_back, attribv[2], "");
}
static void
lp_do_offset_tri(struct gallivm_state *gallivm,
struct lp_setup_args *args,
- const struct lp_setup_variant_key *key)
+ const struct lp_setup_variant_key *key,
+ LLVMValueRef inv_det,
+ LLVMValueRef dxyz01,
+ LLVMValueRef dxyz20,
+ LLVMValueRef attribv[3])
{
LLVMBuilderRef b = gallivm->builder;
struct lp_build_context bld;
LLVMValueRef zoffset, mult;
LLVMValueRef z0_new, z1_new, z2_new;
- LLVMValueRef dzdx0, dzdx, dzdy0, dzdy;
- LLVMValueRef max, max_value;
-
- LLVMValueRef one = lp_build_const_float(gallivm, 1.0);
- LLVMValueRef zero = lp_build_const_float(gallivm, 0.0);
- LLVMValueRef two = lp_build_const_int32(gallivm, 2);
-
- /* edge vectors: e = v0 - v2, f = v1 - v2 */
- LLVMValueRef v0_x = vert_attrib(gallivm, args->v0, 0, 0, "v0_x");
- LLVMValueRef v1_x = vert_attrib(gallivm, args->v1, 0, 0, "v1_x");
- LLVMValueRef v2_x = vert_attrib(gallivm, args->v2, 0, 0, "v2_x");
- LLVMValueRef v0_y = vert_attrib(gallivm, args->v0, 0, 1, "v0_y");
- LLVMValueRef v1_y = vert_attrib(gallivm, args->v1, 0, 1, "v1_y");
- LLVMValueRef v2_y = vert_attrib(gallivm, args->v2, 0, 1, "v2_y");
- LLVMValueRef v0_z = vert_attrib(gallivm, args->v0, 0, 2, "v0_z");
- LLVMValueRef v1_z = vert_attrib(gallivm, args->v1, 0, 2, "v1_z");
- LLVMValueRef v2_z = vert_attrib(gallivm, args->v2, 0, 2, "v2_z");
-
- /* edge vectors: e = v0 - v2, f = v1 - v2 */
- LLVMValueRef dx02 = LLVMBuildFSub(b, v0_x, v2_x, "dx02");
- LLVMValueRef dy02 = LLVMBuildFSub(b, v0_y, v2_y, "dy02");
- LLVMValueRef dz02 = LLVMBuildFSub(b, v0_z, v2_z, "dz02");
- LLVMValueRef dx12 = LLVMBuildFSub(b, v1_x, v2_x, "dx12");
- LLVMValueRef dy12 = LLVMBuildFSub(b, v1_y, v2_y, "dy12");
- LLVMValueRef dz12 = LLVMBuildFSub(b, v1_z, v2_z, "dz12");
-
- /* det = cross(e,f).z */
- LLVMValueRef dx02_dy12 = LLVMBuildFMul(b, dx02, dy12, "dx02_dy12");
- LLVMValueRef dy02_dx12 = LLVMBuildFMul(b, dy02, dx12, "dy02_dx12");
- LLVMValueRef det = LLVMBuildFSub(b, dx02_dy12, dy02_dx12, "det");
- LLVMValueRef inv_det = LLVMBuildFDiv(b, one, det, "inv_det");
-
- /* (res1,res2) = cross(e,f).xy */
- LLVMValueRef dy02_dz12 = LLVMBuildFMul(b, dy02, dz12, "dy02_dz12");
- LLVMValueRef dz02_dy12 = LLVMBuildFMul(b, dz02, dy12, "dz02_dy12");
- LLVMValueRef dz02_dx12 = LLVMBuildFMul(b, dz02, dx12, "dz02_dx12");
- LLVMValueRef dx02_dz12 = LLVMBuildFMul(b, dx02, dz12, "dx02_dz12");
- LLVMValueRef res1 = LLVMBuildFSub(b, dy02_dz12, dz02_dy12, "res1");
- LLVMValueRef res2 = LLVMBuildFSub(b, dz02_dx12, dx02_dz12, "res2");
+ LLVMValueRef dzdxdzdy, dzdx, dzdy, dzxyz20, dyzzx01, dyzzx01_dzxyz20, dzx01_dyz20;
+ LLVMValueRef z0z1, z0z1z2;
+ LLVMValueRef max, max_value, res12;
+ LLVMValueRef shuffles[4];
+ LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
+ LLVMValueRef onei = lp_build_const_int32(gallivm, 1);
+ LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0);
+ LLVMValueRef twoi = lp_build_const_int32(gallivm, 2);
+ LLVMValueRef threei = lp_build_const_int32(gallivm, 3);
+
+ /* (res12) = cross(e,f).xy */
+ shuffles[0] = twoi;
+ shuffles[1] = zeroi;
+ shuffles[2] = onei;
+ shuffles[3] = twoi;
+ dzxyz20 = LLVMBuildShuffleVector(b, dxyz20, dxyz20, LLVMConstVector(shuffles, 4), "");
+
+ shuffles[0] = onei;
+ shuffles[1] = twoi;
+ shuffles[2] = twoi;
+ shuffles[3] = zeroi;
+ dyzzx01 = LLVMBuildShuffleVector(b, dxyz01, dxyz01, LLVMConstVector(shuffles, 4), "");
+
+ dyzzx01_dzxyz20 = LLVMBuildFMul(b, dzxyz20, dyzzx01, "dyzzx01_dzxyz20");
+
+ shuffles[0] = twoi;
+ shuffles[1] = threei;
+ shuffles[2] = LLVMGetUndef(shuf_type);
+ shuffles[3] = LLVMGetUndef(shuf_type);
+ dzx01_dyz20 = LLVMBuildShuffleVector(b, dyzzx01_dzxyz20, dyzzx01_dzxyz20,
+ LLVMConstVector(shuffles, 4), "");
+
+ res12 = LLVMBuildFSub(b, dyzzx01_dzxyz20, dzx01_dyz20, "res12");
/* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/
- lp_build_context_init(&bld, gallivm, lp_type_float(32));
- dzdx0 = LLVMBuildFMul(b, res1, inv_det, "dzdx");
- dzdx = lp_build_abs(&bld, dzdx0);
- dzdy0 = LLVMBuildFMul(b, res2, inv_det, "dzdy");
- dzdy = lp_build_abs(&bld, dzdy0);
+ lp_build_context_init(&bld, gallivm, lp_type_float_vec(32, 128));
+ dzdxdzdy = LLVMBuildFMul(b, res12, inv_det, "dzdxdzdy");
+ dzdxdzdy = lp_build_abs(&bld, dzdxdzdy);
+
+ dzdx = LLVMBuildExtractElement(b, dzdxdzdy, zeroi, "");
+ dzdy = LLVMBuildExtractElement(b, dzdxdzdy, onei, "");
/* zoffset = offset->units + MAX2(dzdx, dzdy) * offset->scale */
max = LLVMBuildFCmp(b, LLVMRealUGT, dzdx, dzdy, "");
@@ -317,45 +295,56 @@ lp_do_offset_tri(struct gallivm_state *gallivm,
mult = LLVMBuildFMul(b, max_value, lp_build_const_float(gallivm, key->scale), "");
zoffset = LLVMBuildFAdd(b, lp_build_const_float(gallivm, key->units), mult, "zoffset");
+ /* yuck */
+ shuffles[0] = twoi;
+ shuffles[1] = lp_build_const_int32(gallivm, 6);
+ shuffles[2] = LLVMGetUndef(shuf_type);
+ shuffles[3] = LLVMGetUndef(shuf_type);
+ z0z1 = LLVMBuildShuffleVector(b, attribv[0], attribv[1], LLVMConstVector(shuffles, 4), "");
+ shuffles[0] = zeroi;
+ shuffles[1] = onei;
+ shuffles[2] = lp_build_const_int32(gallivm, 6);
+ shuffles[3] = LLVMGetUndef(shuf_type);
+ z0z1z2 = LLVMBuildShuffleVector(b, z0z1, attribv[2], LLVMConstVector(shuffles, 4), "");
+ zoffset = vec4f_from_scalar(gallivm, zoffset, "");
+
/* clamp and do offset */
- z0_new = vert_clamp(b, LLVMBuildFAdd(b, v0_z, zoffset, ""), zero, one);
- z1_new = vert_clamp(b, LLVMBuildFAdd(b, v1_z, zoffset, ""), zero, one);
- z2_new = vert_clamp(b, LLVMBuildFAdd(b, v2_z, zoffset, ""), zero, one);
+ z0z1z2 = lp_build_clamp(&bld, LLVMBuildFAdd(b, z0z1z2, zoffset, ""), bld.zero, bld.one);
/* insert into args->a0.z, a1.z, a2.z:
- */
- args->v0a = LLVMBuildInsertElement(b, args->v0a, z0_new, two, "");
- args->v1a = LLVMBuildInsertElement(b, args->v1a, z1_new, two, "");
- args->v2a = LLVMBuildInsertElement(b, args->v2a, z2_new, two, "");
+ */
+ z0_new = LLVMBuildExtractElement(b, z0z1z2, zeroi, "");
+ z1_new = LLVMBuildExtractElement(b, z0z1z2, onei, "");
+ z2_new = LLVMBuildExtractElement(b, z0z1z2, twoi, "");
+ attribv[0] = LLVMBuildInsertElement(b, attribv[0], z0_new, twoi, "");
+ attribv[1] = LLVMBuildInsertElement(b, attribv[1], z1_new, twoi, "");
+ attribv[2] = LLVMBuildInsertElement(b, attribv[2], z2_new, twoi, "");
}
static void
load_attribute(struct gallivm_state *gallivm,
struct lp_setup_args *args,
const struct lp_setup_variant_key *key,
- unsigned vert_attr)
+ unsigned vert_attr,
+ LLVMValueRef attribv[3])
{
LLVMBuilderRef b = gallivm->builder;
LLVMValueRef idx = lp_build_const_int32(gallivm, vert_attr);
/* Load the vertex data
*/
- args->v0a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a");
- args->v1a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a");
- args->v2a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a");
+ attribv[0] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a");
+ attribv[1] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a");
+ attribv[2] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a");
- /* Potentially modify it according to twoside, offset, etc:
+ /* Potentially modify it according to twoside, etc:
*/
- if (vert_attr == 0 && (key->scale != 0.0f || key->units != 0.0f)) {
- lp_do_offset_tri(gallivm, args, key);
- }
-
if (key->twoside) {
if (vert_attr == key->color_slot && key->bcolor_slot >= 0)
- lp_twoside(gallivm, args, key, key->bcolor_slot);
+ lp_twoside(gallivm, args, key, key->bcolor_slot, attribv);
else if (vert_attr == key->spec_slot && key->bspec_slot >= 0)
- lp_twoside(gallivm, args, key, key->bspec_slot);
+ lp_twoside(gallivm, args, key, key->bspec_slot, attribv);
}
}
@@ -375,8 +364,6 @@ emit_coef4( struct gallivm_state *gallivm,
LLVMValueRef x0_center = args->x0_center;
LLVMValueRef y0_center = args->y0_center;
- /* XXX: using fsub, fmul on vector types -- does this work??
- */
LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01");
LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20");
@@ -406,14 +393,15 @@ emit_coef4( struct gallivm_state *gallivm,
static void
emit_linear_coef( struct gallivm_state *gallivm,
struct lp_setup_args *args,
- unsigned slot)
+ unsigned slot,
+ LLVMValueRef attribv[3])
{
/* nothing to do anymore */
emit_coef4(gallivm,
args, slot,
- args->v0a,
- args->v1a,
- args->v2a);
+ attribv[0],
+ attribv[1],
+ attribv[2]);
}
@@ -426,9 +414,10 @@ emit_linear_coef( struct gallivm_state *gallivm,
* divide the interpolated value by the interpolated W at that fragment.
*/
static void
-emit_perspective_coef( struct gallivm_state *gallivm,
- struct lp_setup_args *args,
- unsigned slot)
+apply_perspective_corr( struct gallivm_state *gallivm,
+ struct lp_setup_args *args,
+ unsigned slot,
+ LLVMValueRef attribv[3])
{
LLVMBuilderRef b = gallivm->builder;
@@ -438,20 +427,19 @@ emit_perspective_coef( struct gallivm_state *gallivm,
LLVMValueRef v1_oow = vec4f_from_scalar(gallivm, vert_attrib(gallivm, args->v1, 0, 3, ""), "v1_oow");
LLVMValueRef v2_oow = vec4f_from_scalar(gallivm, vert_attrib(gallivm, args->v2, 0, 3, ""), "v2_oow");
- LLVMValueRef v0_oow_v0a = LLVMBuildFMul(b, args->v0a, v0_oow, "v0_oow_v0a");
- LLVMValueRef v1_oow_v1a = LLVMBuildFMul(b, args->v1a, v1_oow, "v1_oow_v1a");
- LLVMValueRef v2_oow_v2a = LLVMBuildFMul(b, args->v2a, v2_oow, "v2_oow_v2a");
-
- emit_coef4(gallivm, args, slot, v0_oow_v0a, v1_oow_v1a, v2_oow_v2a);
+ attribv[0] = LLVMBuildFMul(b, attribv[0], v0_oow, "v0_oow_v0a");
+ attribv[1] = LLVMBuildFMul(b, attribv[1], v1_oow, "v1_oow_v1a");
+ attribv[2] = LLVMBuildFMul(b, attribv[2], v2_oow, "v2_oow_v2a");
}
static void
emit_position_coef( struct gallivm_state *gallivm,
struct lp_setup_args *args,
- int slot )
+ int slot,
+ LLVMValueRef attribv[3])
{
- emit_linear_coef(gallivm, args, slot);
+ emit_linear_coef(gallivm, args, slot, attribv);
}
@@ -464,7 +452,9 @@ emit_position_coef( struct gallivm_state *gallivm,
static void
emit_apply_cyl_wrap(struct gallivm_state *gallivm,
struct lp_setup_args *args,
- uint cyl_wrap)
+ uint cyl_wrap,
+ LLVMValueRef attribv[3])
+
{
LLVMBuilderRef builder = gallivm->builder;
struct lp_type type = lp_float32_vec4_type();
@@ -489,43 +479,43 @@ emit_apply_cyl_wrap(struct gallivm_state *gallivm,
one = LLVMBuildAnd(builder, one, cyl_mask, "");
/* Edge v0 -> v1 */
- delta = LLVMBuildFSub(builder, args->v1a, args->v0a, "");
+ delta = LLVMBuildFSub(builder, attribv[1], attribv[0], "");
- offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half);
- offset = LLVMBuildAnd(builder, offset, one, "");
- offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
- args->v0a = LLVMBuildFAdd(builder, args->v0a, offset, "");
+ offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half);
+ offset = LLVMBuildAnd(builder, offset, one, "");
+ offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
+ attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, "");
- offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half);
- offset = LLVMBuildAnd(builder, offset, one, "");
- offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
- args->v1a = LLVMBuildFAdd(builder, args->v1a, offset, "");
+ offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half);
+ offset = LLVMBuildAnd(builder, offset, one, "");
+ offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
+ attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, "");
/* Edge v1 -> v2 */
- delta = LLVMBuildFSub(builder, args->v2a, args->v1a, "");
+ delta = LLVMBuildFSub(builder, attribv[2], attribv[1], "");
- offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half);
- offset = LLVMBuildAnd(builder, offset, one, "");
- offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
- args->v1a = LLVMBuildFAdd(builder, args->v1a, offset, "");
+ offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half);
+ offset = LLVMBuildAnd(builder, offset, one, "");
+ offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
+ attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, "");
- offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half);
- offset = LLVMBuildAnd(builder, offset, one, "");
- offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
- args->v2a = LLVMBuildFAdd(builder, args->v2a, offset, "");
+ offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half);
+ offset = LLVMBuildAnd(builder, offset, one, "");
+ offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
+ attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, "");
/* Edge v2 -> v0 */
- delta = LLVMBuildFSub(builder, args->v0a, args->v2a, "");
+ delta = LLVMBuildFSub(builder, attribv[0], attribv[2], "");
- offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half);
- offset = LLVMBuildAnd(builder, offset, one, "");
- offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
- args->v2a = LLVMBuildFAdd(builder, args->v2a, offset, "");
+ offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half);
+ offset = LLVMBuildAnd(builder, offset, one, "");
+ offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
+ attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, "");
- offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half);
- offset = LLVMBuildAnd(builder, offset, one, "");
- offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
- args->v0a = LLVMBuildFAdd(builder, args->v0a, offset, "");
+ offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half);
+ offset = LLVMBuildAnd(builder, offset, one, "");
+ offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
+ attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, "");
}
@@ -534,43 +524,38 @@ emit_apply_cyl_wrap(struct gallivm_state *gallivm,
*/
static void
emit_tri_coef( struct gallivm_state *gallivm,
- const struct lp_setup_variant_key *key,
- struct lp_setup_args *args )
+ const struct lp_setup_variant_key *key,
+ struct lp_setup_args *args)
{
unsigned slot;
- /* The internal position input is in slot zero:
- */
- load_attribute(gallivm, args, key, 0);
- emit_position_coef(gallivm, args, 0);
+ LLVMValueRef attribs[3];
- /* setup interpolation for all the remaining attributes:
+ /* setup interpolation for all the remaining attributes:
*/
for (slot = 0; slot < key->num_inputs; slot++) {
-
- if (key->inputs[slot].interp == LP_INTERP_CONSTANT ||
- key->inputs[slot].interp == LP_INTERP_LINEAR ||
- key->inputs[slot].interp == LP_INTERP_PERSPECTIVE)
- load_attribute(gallivm, args, key, key->inputs[slot].src_index);
-
switch (key->inputs[slot].interp) {
case LP_INTERP_CONSTANT:
- if (key->flatshade_first) {
- emit_constant_coef4(gallivm, args, slot+1, args->v0a);
- }
- else {
- emit_constant_coef4(gallivm, args, slot+1, args->v2a);
- }
- break;
+ load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
+ if (key->flatshade_first) {
+ emit_constant_coef4(gallivm, args, slot+1, attribs[0]);
+ }
+ else {
+ emit_constant_coef4(gallivm, args, slot+1, attribs[2]);
+ }
+ break;
case LP_INTERP_LINEAR:
- emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap);
- emit_linear_coef(gallivm, args, slot+1);
+ load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
+ emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs);
+ emit_linear_coef(gallivm, args, slot+1, attribs);
break;
case LP_INTERP_PERSPECTIVE:
- emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap);
- emit_perspective_coef(gallivm, args, slot+1);
+ load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
+ emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs);
+ apply_perspective_corr(gallivm, args, slot+1, attribs);
+ emit_linear_coef(gallivm, args, slot+1, attribs);
break;
case LP_INTERP_POSITION:
@@ -591,62 +576,6 @@ emit_tri_coef( struct gallivm_state *gallivm,
}
-/* XXX: This is generic code, share with fs/vs codegen:
- */
-static lp_jit_setup_triangle
-finalize_function(struct gallivm_state *gallivm,
- LLVMBuilderRef builder,
- LLVMValueRef function)
-{
- void *f;
-
- /* Verify the LLVM IR. If invalid, dump and abort */
-#ifdef DEBUG
- if (LLVMVerifyFunction(function, LLVMPrintMessageAction)) {
- if (1)
- lp_debug_dump_value(function);
- abort();
- }
-#endif
-
- /* Apply optimizations to LLVM IR */
- LLVMRunFunctionPassManager(gallivm->passmgr, function);
-
- if (gallivm_debug & GALLIVM_DEBUG_IR)
- {
- /* Print the LLVM IR to stderr */
- lp_debug_dump_value(function);
- debug_printf("\n");
- }
-
- /*
- * Translate the LLVM IR into machine code.
- */
- f = LLVMGetPointerToGlobal(gallivm->engine, function);
-
- if (gallivm_debug & GALLIVM_DEBUG_ASM)
- {
- lp_disassemble(f);
- }
-
- lp_func_delete_body(function);
-
- return (lp_jit_setup_triangle) pointer_to_func(f);
-}
-
-/* XXX: Generic code:
- */
-static void
-lp_emit_emms(struct gallivm_state *gallivm)
-{
-#ifdef PIPE_ARCH_X86
- /* Avoid corrupting the FPU stack on 32bit OSes. */
- lp_build_intrinsic(gallivm->builder, "llvm.x86.mmx.emms",
- LLVMVoidTypeInContext(gallivm->context), NULL, 0);
-#endif
-}
-
-
/* XXX: generic code:
*/
static void
@@ -664,49 +593,70 @@ set_noalias(LLVMBuilderRef builder,
static void
init_args(struct gallivm_state *gallivm,
- struct lp_setup_args *args,
- const struct lp_setup_variant *variant)
+ const struct lp_setup_variant_key *key,
+ struct lp_setup_args *args)
{
LLVMBuilderRef b = gallivm->builder;
+ LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
+ LLVMValueRef onef = lp_build_const_float(gallivm, 1.0);
+ LLVMValueRef onei = lp_build_const_int32(gallivm, 1);
+ LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0);
+ LLVMValueRef pixel_center, xy0_center, dxy01, dxy20, dyx20;
+ LLVMValueRef e, f, ef, ooa;
+ LLVMValueRef shuffles[4];
+ LLVMValueRef attr_pos[3];
+ struct lp_type typef4 = lp_type_float_vec(32, 128);
- LLVMValueRef v0_x = vert_attrib(gallivm, args->v0, 0, 0, "v0_x");
- LLVMValueRef v0_y = vert_attrib(gallivm, args->v0, 0, 1, "v0_y");
+ /* The internal position input is in slot zero:
+ */
+ load_attribute(gallivm, args, key, 0, attr_pos);
- LLVMValueRef v1_x = vert_attrib(gallivm, args->v1, 0, 0, "v1_x");
- LLVMValueRef v1_y = vert_attrib(gallivm, args->v1, 0, 1, "v1_y");
+ pixel_center = lp_build_const_vec(gallivm, typef4,
+ key->pixel_center_half ? 0.5 : 0.0);
- LLVMValueRef v2_x = vert_attrib(gallivm, args->v2, 0, 0, "v2_x");
- LLVMValueRef v2_y = vert_attrib(gallivm, args->v2, 0, 1, "v2_y");
+ /*
+ * xy are first two elems in v0a/v1a/v2a but just use vec4 arit
+ * also offset_tri uses actually xyz in them
+ */
+ xy0_center = LLVMBuildFSub(b, attr_pos[0], pixel_center, "xy0_center" );
- LLVMValueRef pixel_center = lp_build_const_float(gallivm,
- variant->key.pixel_center_half ? 0.5 : 0);
+ dxy01 = LLVMBuildFSub(b, attr_pos[0], attr_pos[1], "dxy01");
+ dxy20 = LLVMBuildFSub(b, attr_pos[2], attr_pos[0], "dxy20");
- LLVMValueRef x0_center = LLVMBuildFSub(b, v0_x, pixel_center, "x0_center" );
- LLVMValueRef y0_center = LLVMBuildFSub(b, v0_y, pixel_center, "y0_center" );
-
- LLVMValueRef dx01 = LLVMBuildFSub(b, v0_x, v1_x, "dx01");
- LLVMValueRef dy01 = LLVMBuildFSub(b, v0_y, v1_y, "dy01");
- LLVMValueRef dx20 = LLVMBuildFSub(b, v2_x, v0_x, "dx20");
- LLVMValueRef dy20 = LLVMBuildFSub(b, v2_y, v0_y, "dy20");
+ shuffles[0] = onei;
+ shuffles[1] = zeroi;
+ shuffles[2] = LLVMGetUndef(shuf_type);
+ shuffles[3] = LLVMGetUndef(shuf_type);
+
+ dyx20 = LLVMBuildShuffleVector(b, dxy20, dxy20, LLVMConstVector(shuffles, 4), "");
+
+ ef = LLVMBuildFMul(b, dxy01, dyx20, "ef");
+ e = LLVMBuildExtractElement(b, ef, zeroi, "");
+ f = LLVMBuildExtractElement(b, ef, onei, "");
- LLVMValueRef one = lp_build_const_float(gallivm, 1.0);
- LLVMValueRef e = LLVMBuildFMul(b, dx01, dy20, "e");
- LLVMValueRef f = LLVMBuildFMul(b, dx20, dy01, "f");
- LLVMValueRef ooa = LLVMBuildFDiv(b, one, LLVMBuildFSub(b, e, f, ""), "ooa");
+ ooa = LLVMBuildFDiv(b, onef, LLVMBuildFSub(b, e, f, ""), "ooa");
- LLVMValueRef dy20_ooa = LLVMBuildFMul(b, dy20, ooa, "dy20_ooa");
- LLVMValueRef dy01_ooa = LLVMBuildFMul(b, dy01, ooa, "dy01_ooa");
- LLVMValueRef dx20_ooa = LLVMBuildFMul(b, dx20, ooa, "dx20_ooa");
- LLVMValueRef dx01_ooa = LLVMBuildFMul(b, dx01, ooa, "dx01_ooa");
+ ooa = vec4f_from_scalar(gallivm, ooa, "");
+
+ /* tri offset calc shares a lot of arithmetic, do it here */
+ if (key->scale != 0.0f || key->units != 0.0f) {
+ lp_do_offset_tri(gallivm, args, key, ooa, dxy01, dxy20, attr_pos);
+ }
- args->dy20_ooa = vec4f_from_scalar(gallivm, dy20_ooa, "dy20_ooa_4f");
- args->dy01_ooa = vec4f_from_scalar(gallivm, dy01_ooa, "dy01_ooa_4f");
+ dxy20 = LLVMBuildFMul(b, dxy20, ooa, "");
+ dxy01 = LLVMBuildFMul(b, dxy01, ooa, "");
- args->dx20_ooa = vec4f_from_scalar(gallivm, dx20_ooa, "dx20_ooa_4f");
- args->dx01_ooa = vec4f_from_scalar(gallivm, dx01_ooa, "dx01_ooa_4f");
+ args->dy20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, onei);
+ args->dy01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, onei);
- args->x0_center = vec4f_from_scalar(gallivm, x0_center, "x0_center_4f");
- args->y0_center = vec4f_from_scalar(gallivm, y0_center, "y0_center_4f");
+ args->dx20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, zeroi);
+ args->dx01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, zeroi);
+
+ args->x0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, zeroi);
+ args->y0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, onei);
+
+ /* might want to merge that with other coef emit in the future */
+ emit_position_coef(gallivm, args, 0, attr_pos);
}
/**
@@ -714,18 +664,18 @@ init_args(struct gallivm_state *gallivm,
*
*/
static struct lp_setup_variant *
-generate_setup_variant(struct gallivm_state *gallivm,
- struct lp_setup_variant_key *key,
+generate_setup_variant(struct lp_setup_variant_key *key,
struct llvmpipe_context *lp)
{
struct lp_setup_variant *variant = NULL;
+ struct gallivm_state *gallivm;
struct lp_setup_args args;
char func_name[256];
LLVMTypeRef vec4f_type;
LLVMTypeRef func_type;
LLVMTypeRef arg_types[7];
LLVMBasicBlockRef block;
- LLVMBuilderRef builder = gallivm->builder;
+ LLVMBuilderRef builder;
int64_t t0 = 0, t1;
if (0)
@@ -735,6 +685,13 @@ generate_setup_variant(struct gallivm_state *gallivm,
if (variant == NULL)
goto fail;
+ variant->gallivm = gallivm = gallivm_create();
+ if (!variant->gallivm) {
+ goto fail;
+ }
+
+ builder = gallivm->builder;
+
if (LP_DEBUG & DEBUG_COUNTERS) {
t0 = os_time_get();
}
@@ -793,14 +750,17 @@ generate_setup_variant(struct gallivm_state *gallivm,
LLVMPositionBuilderAtEnd(builder, block);
set_noalias(builder, variant->function, arg_types, Elements(arg_types));
- init_args(gallivm, &args, variant);
+ init_args(gallivm, &variant->key, &args);
emit_tri_coef(gallivm, &variant->key, &args);
- lp_emit_emms(gallivm);
LLVMBuildRetVoid(builder);
- variant->jit_function = finalize_function(gallivm, builder,
- variant->function);
+ gallivm_verify_function(gallivm, variant->function);
+
+ gallivm_compile_module(gallivm);
+
+ variant->jit_function = (lp_jit_setup_triangle)
+ gallivm_jit_function(gallivm, variant->function);
if (!variant->jit_function)
goto fail;
@@ -818,10 +778,12 @@ generate_setup_variant(struct gallivm_state *gallivm,
fail:
if (variant) {
if (variant->function) {
- if (variant->jit_function)
- LLVMFreeMachineCodeForFunction(gallivm->engine,
- variant->function);
- LLVMDeleteFunction(variant->function);
+ gallivm_free_function(gallivm,
+ variant->function,
+ variant->jit_function);
+ }
+ if (variant->gallivm) {
+ gallivm_destroy(variant->gallivm);
}
FREE(variant);
}
@@ -882,10 +844,13 @@ remove_setup_variant(struct llvmpipe_context *lp,
}
if (variant->function) {
- if (variant->jit_function)
- LLVMFreeMachineCodeForFunction(lp->gallivm->engine,
- variant->function);
- LLVMDeleteFunction(variant->function);
+ gallivm_free_function(variant->gallivm,
+ variant->function,
+ variant->jit_function);
+ }
+
+ if (variant->gallivm) {
+ gallivm_destroy(variant->gallivm);
}
remove_from_list(&variant->list_item_global);
@@ -954,7 +919,7 @@ llvmpipe_update_setup(struct llvmpipe_context *lp)
cull_setup_variants(lp);
}
- variant = generate_setup_variant(lp->gallivm, key, lp);
+ variant = generate_setup_variant(key, lp);
if (variant) {
insert_at_head(&lp->setup_variants_list, &variant->list_item_global);
lp->nr_setup_variants++;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.h b/src/gallium/drivers/llvmpipe/lp_state_setup.h
index 609c4f62511..e0abe467a6d 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_setup.h
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.h
@@ -55,6 +55,8 @@ struct lp_setup_variant {
struct lp_setup_variant_list_item list_item_global;
+ struct gallivm_state *gallivm;
+
/* XXX: this is a pointer to the LLVM IR. Once jit_function is
* generated, we never need to use the IR again - need to find a
* way to release this data without destroying the generated
@@ -69,15 +71,6 @@ struct lp_setup_variant {
unsigned no;
};
-void lp_setup_tri_fallback( const float (*v0)[4],
- const float (*v1)[4],
- const float (*v2)[4],
- boolean front_facing,
- float (*a0)[4],
- float (*dadx)[4],
- float (*dady)[4],
- const struct lp_setup_variant_key *key );
-
void lp_delete_setup_variants(struct llvmpipe_context *lp);
void
diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h
index c64f3e149fd..4b6c8a7a6a5 100644
--- a/src/gallium/drivers/llvmpipe/lp_test.h
+++ b/src/gallium/drivers/llvmpipe/lp_test.h
@@ -42,11 +42,6 @@
#include <float.h>
#include "gallivm/lp_bld.h"
-#include <llvm-c/Analysis.h>
-#include <llvm-c/ExecutionEngine.h>
-#include <llvm-c/Target.h>
-#include <llvm-c/BitWriter.h>
-#include <llvm-c/Transforms/Scalar.h>
#include "pipe/p_state.h"
#include "util/u_format.h"
@@ -64,14 +59,14 @@ write_tsv_header(FILE *fp);
boolean
-test_some(struct gallivm_state *gallivm,unsigned verbose, FILE *fp,
+test_some(unsigned verbose, FILE *fp,
unsigned long n);
boolean
-test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp);
+test_single(unsigned verbose, FILE *fp);
boolean
-test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp);
+test_all(unsigned verbose, FILE *fp);
#if defined(PIPE_CC_MSVC)
diff --git a/src/gallium/drivers/llvmpipe/lp_test_arit.c b/src/gallium/drivers/llvmpipe/lp_test_arit.c
index 45ca32f5866..6e09f7e67b0 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_arit.c
@@ -53,7 +53,7 @@ write_tsv_header(FILE *fp)
}
-typedef float (*unary_func_t)(float);
+typedef void (*unary_func_t)(float *out, const float *in);
/**
@@ -180,6 +180,45 @@ const float sincos_values[] = {
5*M_PI/4,
};
+const float round_values[] = {
+ -10.0, -1, 0.0, 12.0,
+ -1.49, -0.25, 1.25, 2.51,
+ -0.99, -0.01, 0.01, 0.99,
+};
+
+static float fractf(float x)
+{
+ x -= floorf(x);
+ if (x >= 1.0f) {
+ // clamp to the largest number smaller than one
+ x = 1.0f - 0.5f*FLT_EPSILON;
+ }
+ return x;
+}
+
+
+const float fract_values[] = {
+ // http://en.wikipedia.org/wiki/IEEE_754-1985#Examples
+ 0.0f,
+ -0.0f,
+ 1.0f,
+ -1.0f,
+ 0.5f,
+ -0.5f,
+ 1.401298464324817e-45f, // smallest denormal
+ -1.401298464324817e-45f,
+ 5.88e-39f, // middle denormal
+ 1.18e-38f, // largest denormal
+ -1.18e-38f,
+ -1.62981451e-08f,
+ FLT_EPSILON,
+ -FLT_EPSILON,
+ 1.0f - 0.5f*FLT_EPSILON,
+ -1.0f + FLT_EPSILON,
+ FLT_MAX,
+ -FLT_MAX
+};
+
/*
* Unary test cases.
@@ -196,6 +235,11 @@ unary_tests[] = {
{"sin", &lp_build_sin, &sinf, sincos_values, Elements(sincos_values), 20.0 },
{"cos", &lp_build_cos, &cosf, sincos_values, Elements(sincos_values), 20.0 },
{"sgn", &lp_build_sgn, &sgnf, exp2_values, Elements(exp2_values), 20.0 },
+ {"round", &lp_build_round, &roundf, round_values, Elements(round_values), 24.0 },
+ {"trunc", &lp_build_trunc, &truncf, round_values, Elements(round_values), 24.0 },
+ {"floor", &lp_build_floor, &floorf, round_values, Elements(round_values), 24.0 },
+ {"ceil", &lp_build_ceil, &ceilf, round_values, Elements(round_values), 24.0 },
+ {"fract", &lp_build_fract_safe, &fractf, fract_values, Elements(fract_values), 24.0 },
};
@@ -204,39 +248,40 @@ unary_tests[] = {
*/
static LLVMValueRef
build_unary_test_func(struct gallivm_state *gallivm,
- LLVMModuleRef module,
- LLVMContextRef context,
const struct unary_test_t *test)
{
- struct lp_type type = lp_type_float_vec(32);
- LLVMTypeRef i32t = LLVMInt32TypeInContext(context);
- LLVMTypeRef f32t = LLVMFloatTypeInContext(context);
+ struct lp_type type = lp_type_float_vec(32, lp_native_vector_width);
+ LLVMContextRef context = gallivm->context;
+ LLVMModuleRef module = gallivm->module;
LLVMTypeRef vf32t = lp_build_vec_type(gallivm, type);
- LLVMTypeRef args[1] = { f32t };
- LLVMValueRef func = LLVMAddFunction(module, test->name, LLVMFunctionType(f32t, args, Elements(args), 0));
- LLVMValueRef arg1 = LLVMGetParam(func, 0);
+ LLVMTypeRef args[2] = { LLVMPointerType(vf32t, 0), LLVMPointerType(vf32t, 0) };
+ LLVMValueRef func = LLVMAddFunction(module, test->name,
+ LLVMFunctionType(LLVMVoidTypeInContext(context),
+ args, Elements(args), 0));
+ LLVMValueRef arg0 = LLVMGetParam(func, 0);
+ LLVMValueRef arg1 = LLVMGetParam(func, 1);
LLVMBuilderRef builder = gallivm->builder;
LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(context, func, "entry");
- LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
LLVMValueRef ret;
struct lp_build_context bld;
- lp_build_context_init(&bld, gallivm, lp_type_float_vec(32));
+ lp_build_context_init(&bld, gallivm, type);
LLVMSetFunctionCallConv(func, LLVMCCallConv);
LLVMPositionBuilderAtEnd(builder, block);
- /* scalar to vector */
- arg1 = LLVMBuildInsertElement(builder, LLVMGetUndef(vf32t), arg1, index0, "");
+ arg1 = LLVMBuildLoad(builder, arg1, "");
ret = test->builder(&bld, arg1);
- /* vector to scalar */
- ret = LLVMBuildExtractElement(builder, ret, index0, "");
+ LLVMBuildStore(builder, ret, arg0);
+
+ LLVMBuildRetVoid(builder);
+
+ gallivm_verify_function(gallivm, func);
- LLVMBuildRet(builder, ret);
return func;
}
@@ -245,67 +290,86 @@ build_unary_test_func(struct gallivm_state *gallivm,
* Test one LLVM unary arithmetic builder function.
*/
static boolean
-test_unary(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, const struct unary_test_t *test)
+test_unary(unsigned verbose, FILE *fp, const struct unary_test_t *test)
{
- LLVMModuleRef module = gallivm->module;
+ struct gallivm_state *gallivm;
LLVMValueRef test_func;
- LLVMExecutionEngineRef engine = gallivm->engine;
- LLVMContextRef context = gallivm->context;
- char *error = NULL;
unary_func_t test_func_jit;
boolean success = TRUE;
- int i;
+ int i, j;
+ int length = lp_native_vector_width / 32;
+ float *in, *out;
- test_func = build_unary_test_func(gallivm, module, context, test);
+ in = align_malloc(length * 4, length * 4);
+ out = align_malloc(length * 4, length * 4);
- if (LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
- printf("LLVMVerifyModule: %s\n", error);
- LLVMDumpModule(module);
- abort();
+ /* random NaNs or 0s could wreak havoc */
+ for (i = 0; i < length; i++) {
+ in[i] = 1.0;
}
- LLVMDisposeMessage(error);
- test_func_jit = (unary_func_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_func));
+ gallivm = gallivm_create();
- for (i = 0; i < test->num_values; ++i) {
- float value = test->values[i];
- float ref = test->ref(value);
- float src = test_func_jit(value);
+ test_func = build_unary_test_func(gallivm, test);
- double error = fabs(src - ref);
- double precision = error ? -log2(error/fabs(ref)) : FLT_MANT_DIG;
+ gallivm_compile_module(gallivm);
- bool pass = precision >= test->precision;
+ test_func_jit = (unary_func_t) gallivm_jit_function(gallivm, test_func);
- if (isnan(ref)) {
- continue;
- }
+ for (j = 0; j < (test->num_values + length - 1) / length; j++) {
+ int num_vals = ((j + 1) * length <= test->num_values) ? length :
+ test->num_values % length;
- if (!pass || verbose) {
- printf("%s(%.9g): ref = %.9g, src = %.9g, precision = %f bits, %s\n",
- test->name, value, ref, src, precision,
- pass ? "PASS" : "FAIL");
+ for (i = 0; i < num_vals; ++i) {
+ in[i] = test->values[i+j*length];
}
- if (!pass) {
- success = FALSE;
+ test_func_jit(out, in);
+ for (i = 0; i < num_vals; ++i) {
+ float ref = test->ref(in[i]);
+ double error, precision;
+ bool pass;
+
+ error = fabs(out[i] - ref);
+ precision = error ? -log2(error/fabs(ref)) : FLT_MANT_DIG;
+
+ pass = precision >= test->precision;
+
+ if (isnan(ref)) {
+ continue;
+ }
+
+ if (!pass || verbose) {
+ printf("%s(%.9g): ref = %.9g, out = %.9g, precision = %f bits, %s\n",
+ test->name, in[i], ref, out[i], precision,
+ pass ? "PASS" : "FAIL");
+ }
+
+ if (!pass) {
+ success = FALSE;
+ }
}
}
- LLVMFreeMachineCodeForFunction(engine, test_func);
+ gallivm_free_function(gallivm, test_func, test_func_jit);
+
+ gallivm_destroy(gallivm);
+
+ align_free(in);
+ align_free(out);
return success;
}
boolean
-test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
+test_all(unsigned verbose, FILE *fp)
{
boolean success = TRUE;
int i;
for (i = 0; i < Elements(unary_tests); ++i) {
- if (!test_unary(gallivm, verbose, fp, &unary_tests[i])) {
+ if (!test_unary(verbose, fp, &unary_tests[i])) {
success = FALSE;
}
}
@@ -315,19 +379,19 @@ test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
boolean
-test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
+test_some(unsigned verbose, FILE *fp,
unsigned long n)
{
/*
* Not randomly generated test cases, so test all.
*/
- return test_all(gallivm, verbose, fp);
+ return test_all(verbose, fp);
}
boolean
-test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
+test_single(unsigned verbose, FILE *fp)
{
return TRUE;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index 51324cbb6a3..37b37fda40e 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -36,6 +36,7 @@
* @author Brian Paul <brian@vmware.com>
*/
+#include "util/u_memory.h"
#include "gallivm/lp_bld_init.h"
#include "gallivm/lp_bld_type.h"
@@ -53,19 +54,6 @@ enum vector_mode
typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
-/** cast wrapper */
-static blend_test_ptr_t
-voidptr_to_blend_test_ptr_t(void *p)
-{
- union {
- void *v;
- blend_test_ptr_t f;
- } u;
- u.v = p;
- return u.f;
-}
-
-
void
write_tsv_header(FILE *fp)
@@ -468,50 +456,43 @@ compute_blend_ref(const struct pipe_blend_state *blend,
PIPE_ALIGN_STACK
static boolean
-test_one(struct gallivm_state *gallivm,
- unsigned verbose,
+test_one(unsigned verbose,
FILE *fp,
const struct pipe_blend_state *blend,
enum vector_mode mode,
struct lp_type type)
{
- LLVMModuleRef module = gallivm->module;
+ struct gallivm_state *gallivm;
LLVMValueRef func = NULL;
- LLVMExecutionEngineRef engine = gallivm->engine;
- char *error = NULL;
blend_test_ptr_t blend_test_ptr;
boolean success;
const unsigned n = LP_TEST_NUM_SAMPLES;
int64_t cycles[LP_TEST_NUM_SAMPLES];
double cycles_avg = 0.0;
unsigned i, j;
- void *code;
+ const unsigned stride = lp_type_width(type)/8;
if(verbose >= 1)
dump_blend_type(stdout, blend, mode, type);
- func = add_blend_test(gallivm, blend, mode, type);
+ gallivm = gallivm_create();
- if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
- LLVMDumpModule(module);
- abort();
- }
- LLVMDisposeMessage(error);
+ func = add_blend_test(gallivm, blend, mode, type);
- code = LLVMGetPointerToGlobal(engine, func);
- blend_test_ptr = voidptr_to_blend_test_ptr_t(code);
+ gallivm_compile_module(gallivm);
- if(verbose >= 2)
- lp_disassemble(code);
+ blend_test_ptr = (blend_test_ptr_t)gallivm_jit_function(gallivm, func);
success = TRUE;
- for(i = 0; i < n && success; ++i) {
- if(mode == AoS) {
- PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
- PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
- PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
- PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
- PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
+ if(mode == AoS) {
+ uint8_t *src, *dst, *con, *res, *ref;
+ src = align_malloc(stride, stride);
+ dst = align_malloc(stride, stride);
+ con = align_malloc(stride, stride);
+ res = align_malloc(stride, stride);
+ ref = align_malloc(stride, stride);
+
+ for(i = 0; i < n && success; ++i) {
int64_t start_counter = 0;
int64_t end_counter = 0;
@@ -569,14 +550,21 @@ test_one(struct gallivm_state *gallivm,
fprintf(stderr, "\n");
}
}
-
- if(mode == SoA) {
- const unsigned stride = type.length*type.width/8;
- PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
- PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
- PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
- PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
- PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
+ align_free(src);
+ align_free(dst);
+ align_free(con);
+ align_free(res);
+ align_free(ref);
+ }
+ else if(mode == SoA) {
+ uint8_t *src, *dst, *con, *res, *ref;
+ src = align_malloc(4*stride, stride);
+ dst = align_malloc(4*stride, stride);
+ con = align_malloc(4*stride, stride);
+ res = align_malloc(4*stride, stride);
+ ref = align_malloc(4*stride, stride);
+
+ for(i = 0; i < n && success; ++i) {
int64_t start_counter = 0;
int64_t end_counter = 0;
boolean mismatch;
@@ -651,6 +639,11 @@ test_one(struct gallivm_state *gallivm,
}
}
}
+ align_free(src);
+ align_free(dst);
+ align_free(con);
+ align_free(res);
+ align_free(ref);
}
/*
@@ -687,16 +680,9 @@ test_one(struct gallivm_state *gallivm,
if(fp)
write_tsv_row(fp, blend, mode, type, cycles_avg, success);
- if (!success) {
- if(verbose < 2)
- LLVMDumpModule(module);
- LLVMWriteBitcodeToFile(module, "blend.bc");
- fprintf(stderr, "blend.bc written\n");
- fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n");
- abort();
- }
+ gallivm_free_function(gallivm, func, blend_test_ptr);
- LLVMFreeMachineCodeForFunction(engine, func);
+ gallivm_destroy(gallivm);
return success;
}
@@ -753,7 +739,7 @@ const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
boolean
-test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
+test_all(unsigned verbose, FILE *fp)
{
const unsigned *rgb_func;
const unsigned *rgb_src_factor;
@@ -789,7 +775,7 @@ test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
blend.rt[0].colormask = PIPE_MASK_RGBA;
- if(!test_one(gallivm, verbose, fp, &blend, mode, *type))
+ if(!test_one(verbose, fp, &blend, mode, *type))
success = FALSE;
}
@@ -806,7 +792,7 @@ test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
boolean
-test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
+test_some(unsigned verbose, FILE *fp,
unsigned long n)
{
const unsigned *rgb_func;
@@ -849,7 +835,7 @@ test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
blend.rt[0].colormask = PIPE_MASK_RGBA;
- if(!test_one(gallivm, verbose, fp, &blend, mode, *type))
+ if(!test_one(verbose, fp, &blend, mode, *type))
success = FALSE;
}
@@ -858,7 +844,7 @@ test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
boolean
-test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
+test_single(unsigned verbose, FILE *fp)
{
printf("no test_single()");
return TRUE;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index 0dcb5422887..71d45bd5ce7 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -142,21 +142,21 @@ add_conv_test(struct gallivm_state *gallivm,
LLVMBuildRetVoid(builder);;
+ gallivm_verify_function(gallivm, func);
+
return func;
}
PIPE_ALIGN_STACK
static boolean
-test_one(struct gallivm_state *gallivm, unsigned verbose,
+test_one(unsigned verbose,
FILE *fp,
struct lp_type src_type,
struct lp_type dst_type)
{
- LLVMModuleRef module = gallivm->module;
- LLVMExecutionEngineRef engine = gallivm->engine;
+ struct gallivm_state *gallivm;
LLVMValueRef func = NULL;
- char *error = NULL;
conv_test_ptr_t conv_test_ptr;
boolean success;
const unsigned n = LP_TEST_NUM_SAMPLES;
@@ -166,10 +166,18 @@ test_one(struct gallivm_state *gallivm, unsigned verbose,
unsigned num_dsts;
double eps;
unsigned i, j;
- void *code;
- if (src_type.width * src_type.length != dst_type.width * dst_type.length &&
- src_type.length != dst_type.length) {
+ if ((src_type.width >= dst_type.width && src_type.length > dst_type.length) ||
+ (src_type.width <= dst_type.width && src_type.length < dst_type.length)) {
+ return TRUE;
+ }
+
+ /* Known failures
+ * - fixed point 32 -> float 32
+ * - float 32 -> signed normalised integer 32
+ */
+ if ((src_type.floating && !dst_type.floating && dst_type.sign && dst_type.norm && src_type.width == dst_type.width) ||
+ (!src_type.floating && dst_type.floating && src_type.fixed && src_type.width == dst_type.width)) {
return TRUE;
}
@@ -183,7 +191,7 @@ test_one(struct gallivm_state *gallivm, unsigned verbose,
}
if(verbose >= 1)
- dump_conv_types(stdout, src_type, dst_type);
+ dump_conv_types(stderr, src_type, dst_type);
if (src_type.length > dst_type.length) {
num_srcs = 1;
@@ -203,29 +211,20 @@ test_one(struct gallivm_state *gallivm, unsigned verbose,
eps = MAX2(lp_const_eps(src_type), lp_const_eps(dst_type));
- func = add_conv_test(gallivm, src_type, num_srcs, dst_type, num_dsts);
+ gallivm = gallivm_create();
- if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
- LLVMDumpModule(module);
- abort();
- }
- LLVMDisposeMessage(error);
-
- if(verbose >= 2)
- LLVMDumpModule(module);
+ func = add_conv_test(gallivm, src_type, num_srcs, dst_type, num_dsts);
- code = LLVMGetPointerToGlobal(engine, func);
- conv_test_ptr = (conv_test_ptr_t)pointer_to_func(code);
+ gallivm_compile_module(gallivm);
- if(verbose >= 2)
- lp_disassemble(code);
+ conv_test_ptr = (conv_test_ptr_t)gallivm_jit_function(gallivm, func);
success = TRUE;
for(i = 0; i < n && success; ++i) {
unsigned src_stride = src_type.length*src_type.width/8;
unsigned dst_stride = dst_type.length*dst_type.width/8;
- PIPE_ALIGN_VAR(16) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
- PIPE_ALIGN_VAR(16) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+ PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+ PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
int64_t start_counter = 0;
@@ -320,20 +319,9 @@ test_one(struct gallivm_state *gallivm, unsigned verbose,
if(fp)
write_tsv_row(fp, src_type, dst_type, cycles_avg, success);
- if (!success) {
- static boolean firsttime = TRUE;
- if(firsttime) {
- if(verbose < 2)
- LLVMDumpModule(module);
- LLVMWriteBitcodeToFile(module, "conv.bc");
- fprintf(stderr, "conv.bc written\n");
- fprintf(stderr, "Invoke as \"llc -o - conv.bc\"\n");
- firsttime = FALSE;
- /* abort(); */
- }
- }
+ gallivm_free_function(gallivm, func, conv_test_ptr);
- LLVMFreeMachineCodeForFunction(engine, func);
+ gallivm_destroy(gallivm);
return success;
}
@@ -348,18 +336,33 @@ const struct lp_type conv_types[] = {
{ TRUE, FALSE, FALSE, TRUE, 32, 4 },
{ TRUE, FALSE, FALSE, FALSE, 32, 4 },
+ { TRUE, FALSE, TRUE, TRUE, 32, 8 },
+ { TRUE, FALSE, TRUE, FALSE, 32, 8 },
+ { TRUE, FALSE, FALSE, TRUE, 32, 8 },
+ { TRUE, FALSE, FALSE, FALSE, 32, 8 },
+
/* Fixed */
{ FALSE, TRUE, TRUE, TRUE, 32, 4 },
{ FALSE, TRUE, TRUE, FALSE, 32, 4 },
{ FALSE, TRUE, FALSE, TRUE, 32, 4 },
{ FALSE, TRUE, FALSE, FALSE, 32, 4 },
+ { FALSE, TRUE, TRUE, TRUE, 32, 8 },
+ { FALSE, TRUE, TRUE, FALSE, 32, 8 },
+ { FALSE, TRUE, FALSE, TRUE, 32, 8 },
+ { FALSE, TRUE, FALSE, FALSE, 32, 8 },
+
/* Integer */
{ FALSE, FALSE, TRUE, TRUE, 32, 4 },
{ FALSE, FALSE, TRUE, FALSE, 32, 4 },
{ FALSE, FALSE, FALSE, TRUE, 32, 4 },
{ FALSE, FALSE, FALSE, FALSE, 32, 4 },
+ { FALSE, FALSE, TRUE, TRUE, 32, 8 },
+ { FALSE, FALSE, TRUE, FALSE, 32, 8 },
+ { FALSE, FALSE, FALSE, TRUE, 32, 8 },
+ { FALSE, FALSE, FALSE, FALSE, 32, 8 },
+
{ FALSE, FALSE, TRUE, TRUE, 16, 8 },
{ FALSE, FALSE, TRUE, FALSE, 16, 8 },
{ FALSE, FALSE, FALSE, TRUE, 16, 8 },
@@ -381,7 +384,7 @@ const unsigned num_types = sizeof(conv_types)/sizeof(conv_types[0]);
boolean
-test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
+test_all(unsigned verbose, FILE *fp)
{
const struct lp_type *src_type;
const struct lp_type *dst_type;
@@ -394,7 +397,7 @@ test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
if(src_type == dst_type)
continue;
- if(!test_one(gallivm, verbose, fp, *src_type, *dst_type)){
+ if(!test_one(verbose, fp, *src_type, *dst_type)){
success = FALSE;
++error_count;
}
@@ -408,7 +411,7 @@ test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
boolean
-test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
+test_some(unsigned verbose, FILE *fp,
unsigned long n)
{
const struct lp_type *src_type;
@@ -423,7 +426,7 @@ test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
dst_type = &conv_types[rand() % num_types];
} while (src_type == dst_type || src_type->norm != dst_type->norm);
- if(!test_one(gallivm, verbose, fp, *src_type, *dst_type))
+ if(!test_one(verbose, fp, *src_type, *dst_type))
success = FALSE;
}
@@ -432,7 +435,7 @@ test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
boolean
-test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
+test_single(unsigned verbose, FILE *fp)
{
/* float, fixed, sign, norm, width, len */
struct lp_type f32x4_type =
@@ -442,7 +445,7 @@ test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
boolean success;
- success = test_one(gallivm, verbose, fp, f32x4_type, ub8x4_type);
+ success = test_one(verbose, fp, f32x4_type, ub8x4_type);
return success;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index daf6ded29c7..34cbdbdd630 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -83,7 +83,6 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
LLVMContextRef context = gallivm->context;
LLVMModuleRef module = gallivm->module;
LLVMBuilderRef builder = gallivm->builder;
- LLVMPassManagerRef passmgr = gallivm->passmgr;
LLVMTypeRef args[4];
LLVMValueRef func;
LLVMValueRef packed_ptr;
@@ -120,16 +119,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
LLVMBuildRetVoid(builder);
- if (LLVMVerifyFunction(func, LLVMPrintMessageAction)) {
- LLVMDumpValue(func);
- abort();
- }
-
- LLVMRunFunctionPassManager(passmgr, func);
-
- if (verbose >= 1) {
- LLVMDumpValue(func);
- }
+ gallivm_verify_function(gallivm, func);
return func;
}
@@ -137,26 +127,24 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
PIPE_ALIGN_STACK
static boolean
-test_format_float(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
+test_format_float(unsigned verbose, FILE *fp,
const struct util_format_description *desc)
{
+ struct gallivm_state *gallivm;
LLVMValueRef fetch = NULL;
- LLVMExecutionEngineRef engine = gallivm->engine;
fetch_ptr_t fetch_ptr;
PIPE_ALIGN_VAR(16) float unpacked[4];
boolean first = TRUE;
boolean success = TRUE;
unsigned i, j, k, l;
- void *f;
+
+ gallivm = gallivm_create();
fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_float32_vec4_type());
- f = LLVMGetPointerToGlobal(engine, fetch);
- fetch_ptr = (fetch_ptr_t) pointer_to_func(f);
+ gallivm_compile_module(gallivm);
- if (verbose >= 2) {
- lp_disassemble(f);
- }
+ fetch_ptr = (fetch_ptr_t) gallivm_jit_function(gallivm, fetch);
for (l = 0; l < util_format_nr_test_cases; ++l) {
const struct util_format_test_case *test = &util_format_test_cases[l];
@@ -171,25 +159,35 @@ test_format_float(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
for (i = 0; i < desc->block.height; ++i) {
for (j = 0; j < desc->block.width; ++j) {
- boolean match;
+ boolean match = TRUE;
memset(unpacked, 0, sizeof unpacked);
fetch_ptr(unpacked, test->packed, j, i);
- match = TRUE;
- for(k = 0; k < 4; ++k)
- if (fabs((float)test->unpacked[i][j][k] - unpacked[k]) > FLT_EPSILON)
+ for(k = 0; k < 4; ++k) {
+ if (util_double_inf_sign(test->unpacked[i][j][k]) != util_inf_sign(unpacked[k])) {
match = FALSE;
+ }
+
+ if (util_is_double_nan(test->unpacked[i][j][k]) != util_is_nan(unpacked[k])) {
+ match = FALSE;
+ }
+
+ if (!util_is_double_inf_or_nan(test->unpacked[i][j][k]) &&
+ fabs((float)test->unpacked[i][j][k] - unpacked[k]) > FLT_EPSILON) {
+ match = FALSE;
+ }
+ }
if (!match) {
printf("FAILED\n");
printf(" Packed: %02x %02x %02x %02x\n",
test->packed[0], test->packed[1], test->packed[2], test->packed[3]);
- printf(" Unpacked (%u,%u): %f %f %f %f obtained\n",
+ printf(" Unpacked (%u,%u): %.9g %.9g %.9g %.9g obtained\n",
j, i,
unpacked[0], unpacked[1], unpacked[2], unpacked[3]);
- printf(" %f %f %f %f expected\n",
+ printf(" %.9g %.9g %.9g %.9g expected\n",
test->unpacked[i][j][0],
test->unpacked[i][j][1],
test->unpacked[i][j][2],
@@ -201,14 +199,9 @@ test_format_float(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
}
}
- if (!success) {
- if (verbose < 1) {
- LLVMDumpValue(fetch);
- }
- }
+ gallivm_free_function(gallivm, fetch, fetch_ptr);
- LLVMFreeMachineCodeForFunction(engine, fetch);
- LLVMDeleteFunction(fetch);
+ gallivm_destroy(gallivm);
if(fp)
write_tsv_row(fp, desc, success);
@@ -219,26 +212,24 @@ test_format_float(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
PIPE_ALIGN_STACK
static boolean
-test_format_unorm8(struct gallivm_state *gallivm,
- unsigned verbose, FILE *fp,
+test_format_unorm8(unsigned verbose, FILE *fp,
const struct util_format_description *desc)
{
+ struct gallivm_state *gallivm;
LLVMValueRef fetch = NULL;
fetch_ptr_t fetch_ptr;
uint8_t unpacked[4];
boolean first = TRUE;
boolean success = TRUE;
unsigned i, j, k, l;
- void *f;
+
+ gallivm = gallivm_create();
fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_unorm8_vec4_type());
- f = LLVMGetPointerToGlobal(gallivm->engine, fetch);
- fetch_ptr = (fetch_ptr_t) pointer_to_func(f);
+ gallivm_compile_module(gallivm);
- if (verbose >= 2) {
- lp_disassemble(f);
- }
+ fetch_ptr = (fetch_ptr_t) gallivm_jit_function(gallivm, fetch);
for (l = 0; l < util_format_nr_test_cases; ++l) {
const struct util_format_test_case *test = &util_format_test_cases[l];
@@ -285,6 +276,7 @@ test_format_unorm8(struct gallivm_state *gallivm,
float_to_ubyte(test->unpacked[i][j][1]),
float_to_ubyte(test->unpacked[i][j][2]),
float_to_ubyte(test->unpacked[i][j][3]));
+
success = FALSE;
}
}
@@ -292,11 +284,9 @@ test_format_unorm8(struct gallivm_state *gallivm,
}
}
- if (!success)
- LLVMDumpValue(fetch);
+ gallivm_free_function(gallivm, fetch, fetch_ptr);
- LLVMFreeMachineCodeForFunction(gallivm->engine, fetch);
- LLVMDeleteFunction(fetch);
+ gallivm_destroy(gallivm);
if(fp)
write_tsv_row(fp, desc, success);
@@ -308,17 +298,16 @@ test_format_unorm8(struct gallivm_state *gallivm,
static boolean
-test_one(struct gallivm_state *gallivm,
- unsigned verbose, FILE *fp,
+test_one(unsigned verbose, FILE *fp,
const struct util_format_description *format_desc)
{
boolean success = TRUE;
- if (!test_format_float(gallivm, verbose, fp, format_desc)) {
+ if (!test_format_float(verbose, fp, format_desc)) {
success = FALSE;
}
- if (!test_format_unorm8(gallivm, verbose, fp, format_desc)) {
+ if (!test_format_unorm8(verbose, fp, format_desc)) {
success = FALSE;
}
@@ -327,7 +316,7 @@ test_one(struct gallivm_state *gallivm,
boolean
-test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
+test_all(unsigned verbose, FILE *fp)
{
enum pipe_format format;
boolean success = TRUE;
@@ -359,7 +348,7 @@ test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
continue;
}
- if (!test_one(gallivm, verbose, fp, format_desc)) {
+ if (!test_one(verbose, fp, format_desc)) {
success = FALSE;
}
}
@@ -369,15 +358,15 @@ test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
boolean
-test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
+test_some(unsigned verbose, FILE *fp,
unsigned long n)
{
- return test_all(gallivm, verbose, fp);
+ return test_all(verbose, fp);
}
boolean
-test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
+test_single(unsigned verbose, FILE *fp)
{
printf("no test_single()");
return TRUE;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c
index d229c620310..4c610923146 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_main.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_main.c
@@ -39,6 +39,7 @@
#include "gallivm/lp_bld_const.h"
#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_debug.h"
#include "lp_test.h"
@@ -369,7 +370,6 @@ int main(int argc, char **argv)
unsigned i;
boolean success;
boolean single = FALSE;
- struct gallivm_state *gallivm;
for(i = 1; i < argc; ++i) {
if(strcmp(argv[i], "-v") == 0)
@@ -384,23 +384,28 @@ int main(int argc, char **argv)
lp_build_init();
- gallivm = gallivm_create();
+#ifdef DEBUG
+ if (verbose >= 2) {
+ gallivm_debug |= GALLIVM_DEBUG_IR;
+ gallivm_debug |= GALLIVM_DEBUG_ASM;
+ }
+#endif
util_cpu_detect();
if(fp) {
/* Warm up the caches */
- test_some(gallivm, 0, NULL, 100);
+ test_some(0, NULL, 100);
write_tsv_header(fp);
}
if (single)
- success = test_single(gallivm, verbose, fp);
+ success = test_single(verbose, fp);
else if (n)
- success = test_some(gallivm, verbose, fp, n);
+ success = test_some(verbose, fp, n);
else
- success = test_all(gallivm, verbose, fp);
+ success = test_all(verbose, fp);
if(fp)
fclose(fp);
diff --git a/src/gallium/drivers/llvmpipe/lp_test_printf.c b/src/gallium/drivers/llvmpipe/lp_test_printf.c
index 620cdb57c13..c483de94d40 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_printf.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_printf.c
@@ -78,66 +78,61 @@ add_printf_test(struct gallivm_state *gallivm)
LLVMBuildRetVoid(builder);
+ gallivm_verify_function(gallivm, func);
+
return func;
}
PIPE_ALIGN_STACK
static boolean
-test_printf(struct gallivm_state *gallivm,
- unsigned verbose, FILE *fp,
+test_printf(unsigned verbose, FILE *fp,
const struct printf_test_case *testcase)
{
- LLVMExecutionEngineRef engine = gallivm->engine;
- LLVMModuleRef module = gallivm->module;
+ struct gallivm_state *gallivm;
LLVMValueRef test;
- char *error = NULL;
test_printf_t test_printf_func;
boolean success = TRUE;
- void *code;
- test = add_printf_test(gallivm);
+ gallivm = gallivm_create();
- if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
- LLVMDumpModule(module);
- abort();
- }
- LLVMDisposeMessage(error);
+ test = add_printf_test(gallivm);
- code = LLVMGetPointerToGlobal(engine, test);
- test_printf_func = (test_printf_t) pointer_to_func(code);
+ gallivm_compile_module(gallivm);
- // LLVMDumpModule(module);
+ test_printf_func = (test_printf_t) gallivm_jit_function(gallivm, test);
test_printf_func(0);
- LLVMFreeMachineCodeForFunction(engine, test);
+ gallivm_free_function(gallivm, test, test_printf_func);
+
+ gallivm_destroy(gallivm);
return success;
}
boolean
-test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
+test_all(unsigned verbose, FILE *fp)
{
boolean success = TRUE;
- test_printf(gallivm, verbose, fp, NULL);
+ test_printf(verbose, fp, NULL);
return success;
}
boolean
-test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
+test_some(unsigned verbose, FILE *fp,
unsigned long n)
{
- return test_all(gallivm, verbose, fp);
+ return test_all(verbose, fp);
}
boolean
-test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
+test_single(unsigned verbose, FILE *fp)
{
printf("no test_single()");
return TRUE;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_round.c b/src/gallium/drivers/llvmpipe/lp_test_round.c
deleted file mode 100644
index fc3edf372d5..00000000000
--- a/src/gallium/drivers/llvmpipe/lp_test_round.c
+++ /dev/null
@@ -1,242 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2010 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include <stdlib.h>
-#include <stdio.h>
-
-#include "util/u_pointer.h"
-#include "gallivm/lp_bld.h"
-#include "gallivm/lp_bld_init.h"
-#include "gallivm/lp_bld_arit.h"
-
-#include "lp_test.h"
-
-
-void
-write_tsv_header(FILE *fp)
-{
- fprintf(fp,
- "result\t"
- "format\n");
-
- fflush(fp);
-}
-
-
-#ifdef PIPE_ARCH_SSE
-
-# include <emmintrin.h>
-
-typedef __m128 (*test_round_t)(__m128);
-
-typedef LLVMValueRef (*lp_func_t)(struct lp_build_context *, LLVMValueRef);
-
-
-static LLVMValueRef
-add_test(struct gallivm_state *gallivm, const char *name, lp_func_t lp_func)
-{
- LLVMModuleRef module = gallivm->module;
- LLVMContextRef context = gallivm->context;
- LLVMBuilderRef builder = gallivm->builder;
-
- LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatTypeInContext(context), 4);
- LLVMTypeRef args[1] = { v4sf };
- LLVMValueRef func = LLVMAddFunction(module, name, LLVMFunctionType(v4sf, args, 1, 0));
- LLVMValueRef arg1 = LLVMGetParam(func, 0);
- LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(context, func, "entry");
- LLVMValueRef ret;
- struct lp_build_context bld;
-
- lp_build_context_init(&bld, gallivm, lp_float32_vec4_type());
-
- LLVMSetFunctionCallConv(func, LLVMCCallConv);
-
- LLVMPositionBuilderAtEnd(builder, block);
-
- ret = lp_func(&bld, arg1);
-
- LLVMBuildRet(builder, ret);
-
- return func;
-}
-
-static void
-printv(char* string, __m128 value)
-{
- __m128 v = value;
- float *f = (float *)&v;
- printf("%s: %10f %10f %10f %10f\n", string,
- f[0], f[1], f[2], f[3]);
-}
-
-static boolean
-compare(__m128 x, __m128 y)
-{
- boolean success = TRUE;
- float *xp = (float *) &x;
- float *yp = (float *) &y;
- if (xp[0] != yp[0] ||
- xp[1] != yp[1] ||
- xp[2] != yp[2] ||
- xp[3] != yp[3]) {
- printf(" Incorrect result! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ \n");
- success = FALSE;
- }
- return success;
-}
-
-
-
-PIPE_ALIGN_STACK
-static boolean
-test_round(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
-{
- LLVMModuleRef module = gallivm->module;
- LLVMValueRef test_round = NULL, test_trunc, test_floor, test_ceil;
- LLVMExecutionEngineRef engine = gallivm->engine;
- char *error = NULL;
- test_round_t round_func, trunc_func, floor_func, ceil_func;
- float unpacked[4];
- boolean success = TRUE;
- int i;
-
- test_round = add_test(gallivm, "round", lp_build_round);
- test_trunc = add_test(gallivm, "trunc", lp_build_trunc);
- test_floor = add_test(gallivm, "floor", lp_build_floor);
- test_ceil = add_test(gallivm, "ceil", lp_build_ceil);
-
- if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
- printf("LLVMVerifyModule: %s\n", error);
- LLVMDumpModule(module);
- abort();
- }
- LLVMDisposeMessage(error);
-
- round_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_round));
- trunc_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_trunc));
- floor_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_floor));
- ceil_func = (test_round_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_ceil));
-
- memset(unpacked, 0, sizeof unpacked);
-
- if (0)
- LLVMDumpModule(module);
-
- for (i = 0; i < 3; i++) {
- /* NOTE: There are several acceptable rules for x.5 rounding: ceiling,
- * nearest even, etc. So we avoid testing such corner cases here.
- */
- __m128 xvals[3] = {
- {-10.0, -1, 0, 12.0},
- {-1.49, -0.25, 1.25, 2.51},
- {-0.99, -0.01, 0.01, 0.99}
- };
- __m128 x = xvals[i];
- __m128 y, ref;
- float *xp = (float *) &x;
- float *refp = (float *) &ref;
-
- printf("\n");
- printv("x ", x);
-
- refp[0] = round(xp[0]);
- refp[1] = round(xp[1]);
- refp[2] = round(xp[2]);
- refp[3] = round(xp[3]);
- y = round_func(x);
- printv("C round(x) ", ref);
- printv("LLVM round(x)", y);
- success = success && compare(ref, y);
-
- refp[0] = trunc(xp[0]);
- refp[1] = trunc(xp[1]);
- refp[2] = trunc(xp[2]);
- refp[3] = trunc(xp[3]);
- y = trunc_func(x);
- printv("C trunc(x) ", ref);
- printv("LLVM trunc(x)", y);
- success = success && compare(ref, y);
-
- refp[0] = floor(xp[0]);
- refp[1] = floor(xp[1]);
- refp[2] = floor(xp[2]);
- refp[3] = floor(xp[3]);
- y = floor_func(x);
- printv("C floor(x) ", ref);
- printv("LLVM floor(x)", y);
- success = success && compare(ref, y);
-
- refp[0] = ceil(xp[0]);
- refp[1] = ceil(xp[1]);
- refp[2] = ceil(xp[2]);
- refp[3] = ceil(xp[3]);
- y = ceil_func(x);
- printv("C ceil(x) ", ref);
- printv("LLVM ceil(x) ", y);
- success = success && compare(ref, y);
- }
-
- LLVMFreeMachineCodeForFunction(engine, test_round);
- LLVMFreeMachineCodeForFunction(engine, test_trunc);
- LLVMFreeMachineCodeForFunction(engine, test_floor);
- LLVMFreeMachineCodeForFunction(engine, test_ceil);
-
- return success;
-}
-
-#else /* !PIPE_ARCH_SSE */
-
-static boolean
-test_round(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
-{
- return TRUE;
-}
-
-#endif /* !PIPE_ARCH_SSE */
-
-
-boolean
-test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
-{
- return test_round(gallivm, verbose, fp);
-}
-
-
-boolean
-test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
- unsigned long n)
-{
- return test_all(gallivm, verbose, fp);
-}
-
-boolean
-test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
-{
- printf("no test_single()");
- return TRUE;
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
index daa96f20c7e..9151e427ba7 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
@@ -178,8 +178,7 @@ lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
unsigned unit,
unsigned num_coords,
const LLVMValueRef *coords,
- const LLVMValueRef *ddx,
- const LLVMValueRef *ddy,
+ const struct lp_derivatives *derivs,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
LLVMValueRef *texel)
@@ -189,7 +188,7 @@ lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
assert(unit < PIPE_MAX_SAMPLERS);
if (LP_PERF & PERF_NO_TEX) {
- lp_build_sample_nop(gallivm, type, texel);
+ lp_build_sample_nop(gallivm, type, num_coords, coords, texel);
return;
}
@@ -199,7 +198,7 @@ lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
type,
unit,
num_coords, coords,
- ddx, ddy,
+ derivs,
lod_bias, explicit_lod,
texel);
}
@@ -210,6 +209,7 @@ lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
static void
lp_llvm_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base,
struct gallivm_state *gallivm,
+ struct lp_type type,
unsigned unit,
LLVMValueRef explicit_lod, /* optional */
LLVMValueRef *sizes_out)
@@ -221,6 +221,7 @@ lp_llvm_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base,
lp_build_size_query_soa(gallivm,
&sampler->dynamic_state.static_state[unit],
&sampler->dynamic_state.base,
+ type,
unit,
explicit_lod,
sizes_out);