summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/draw/draw_llvm.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/draw/draw_llvm.c')
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.c759
1 files changed, 286 insertions, 473 deletions
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index e08221eb392..8d9b5309aff 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -43,6 +43,8 @@
#include "gallivm/lp_bld_intr.h"
#include "gallivm/lp_bld_init.h"
#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_pack.h"
+#include "gallivm/lp_bld_format.h"
#include "tgsi/tgsi_exec.h"
#include "tgsi/tgsi_dump.h"
@@ -56,40 +58,6 @@
#define DEBUG_STORE 0
-/**
- * This function is called by the gallivm "garbage collector" when
- * the LLVM global data structures are freed. We must free all LLVM-related
- * data. Specifically, all JIT'd shader variants.
- */
-static void
-draw_llvm_garbage_collect_callback(void *cb_data)
-{
- struct draw_llvm *llvm = (struct draw_llvm *) cb_data;
- struct draw_context *draw = llvm->draw;
- struct draw_llvm_variant_list_item *li;
-
- /* Ensure prepare will be run and shaders recompiled */
- assert(!draw->suspend_flushing);
- draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
-
- /* free all shader variants */
- li = first_elem(&llvm->vs_variants_list);
- while (!at_end(&llvm->vs_variants_list, li)) {
- struct draw_llvm_variant_list_item *next = next_elem(li);
- draw_llvm_destroy_variant(li->base);
- li = next;
- }
-
- /* Null-out these pointers so they get remade next time they're needed.
- * See the accessor functions below.
- */
- llvm->context_ptr_type = NULL;
- llvm->buffer_ptr_type = NULL;
- llvm->vb_ptr_type = NULL;
- llvm->vertex_header_ptr_type = NULL;
-}
-
-
static void
draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var,
boolean elts);
@@ -316,56 +284,56 @@ create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
* Create LLVM types for various structures.
*/
static void
-create_jit_types(struct draw_llvm *llvm)
+create_jit_types(struct draw_llvm_variant *variant)
{
- struct gallivm_state *gallivm = llvm->gallivm;
+ struct gallivm_state *gallivm = variant->gallivm;
LLVMTypeRef texture_type, context_type, buffer_type, vb_type;
texture_type = create_jit_texture_type(gallivm, "texture");
context_type = create_jit_context_type(gallivm, texture_type, "draw_jit_context");
- llvm->context_ptr_type = LLVMPointerType(context_type, 0);
+ variant->context_ptr_type = LLVMPointerType(context_type, 0);
buffer_type = LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0);
- llvm->buffer_ptr_type = LLVMPointerType(buffer_type, 0);
+ variant->buffer_ptr_type = LLVMPointerType(buffer_type, 0);
vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer");
- llvm->vb_ptr_type = LLVMPointerType(vb_type, 0);
+ variant->vb_ptr_type = LLVMPointerType(vb_type, 0);
}
static LLVMTypeRef
-get_context_ptr_type(struct draw_llvm *llvm)
+get_context_ptr_type(struct draw_llvm_variant *variant)
{
- if (!llvm->context_ptr_type)
- create_jit_types(llvm);
- return llvm->context_ptr_type;
+ if (!variant->context_ptr_type)
+ create_jit_types(variant);
+ return variant->context_ptr_type;
}
static LLVMTypeRef
-get_buffer_ptr_type(struct draw_llvm *llvm)
+get_buffer_ptr_type(struct draw_llvm_variant *variant)
{
- if (!llvm->buffer_ptr_type)
- create_jit_types(llvm);
- return llvm->buffer_ptr_type;
+ if (!variant->buffer_ptr_type)
+ create_jit_types(variant);
+ return variant->buffer_ptr_type;
}
static LLVMTypeRef
-get_vb_ptr_type(struct draw_llvm *llvm)
+get_vb_ptr_type(struct draw_llvm_variant *variant)
{
- if (!llvm->vb_ptr_type)
- create_jit_types(llvm);
- return llvm->vb_ptr_type;
+ if (!variant->vb_ptr_type)
+ create_jit_types(variant);
+ return variant->vb_ptr_type;
}
static LLVMTypeRef
-get_vertex_header_ptr_type(struct draw_llvm *llvm)
+get_vertex_header_ptr_type(struct draw_llvm_variant *variant)
{
- if (!llvm->vertex_header_ptr_type)
- create_jit_types(llvm);
- return llvm->vertex_header_ptr_type;
+ if (!variant->vertex_header_ptr_type)
+ create_jit_types(variant);
+ return variant->vertex_header_ptr_type;
}
@@ -373,7 +341,7 @@ get_vertex_header_ptr_type(struct draw_llvm *llvm)
* Create per-context LLVM info.
*/
struct draw_llvm *
-draw_llvm_create(struct draw_context *draw, struct gallivm_state *gallivm)
+draw_llvm_create(struct draw_context *draw)
{
struct draw_llvm *llvm;
@@ -384,18 +352,10 @@ draw_llvm_create(struct draw_context *draw, struct gallivm_state *gallivm)
lp_build_init();
llvm->draw = draw;
- llvm->gallivm = gallivm;
-
- if (gallivm_debug & GALLIVM_DEBUG_IR) {
- LLVMDumpModule(llvm->gallivm->module);
- }
llvm->nr_variants = 0;
make_empty_list(&llvm->vs_variants_list);
- gallivm_register_garbage_collector_callback(
- draw_llvm_garbage_collect_callback, llvm);
-
return llvm;
}
@@ -406,9 +366,6 @@ draw_llvm_create(struct draw_context *draw, struct gallivm_state *gallivm)
void
draw_llvm_destroy(struct draw_llvm *llvm)
{
- gallivm_remove_garbage_collector_callback(
- draw_llvm_garbage_collect_callback, llvm);
-
/* XXX free other draw_llvm data? */
FREE(llvm);
}
@@ -435,15 +392,27 @@ draw_llvm_create_variant(struct draw_llvm *llvm,
variant->llvm = llvm;
+ variant->gallivm = gallivm_create();
+
+ create_jit_types(variant);
+
memcpy(&variant->key, key, shader->variant_key_size);
- vertex_header = create_jit_vertex_header(llvm->gallivm, num_inputs);
+ vertex_header = create_jit_vertex_header(variant->gallivm, num_inputs);
- llvm->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
+ variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
draw_llvm_generate(llvm, variant, FALSE); /* linear */
draw_llvm_generate(llvm, variant, TRUE); /* elts */
+ gallivm_compile_module(variant->gallivm);
+
+ variant->jit_func = (draw_jit_vert_func)
+ gallivm_jit_function(variant->gallivm, variant->function);
+
+ variant->jit_func_elts = (draw_jit_vert_func_elts)
+ gallivm_jit_function(variant->gallivm, variant->function_elts);
+
variant->shader = shader;
variant->list_item_global.base = variant;
variant->list_item_local.base = variant;
@@ -455,8 +424,9 @@ draw_llvm_create_variant(struct draw_llvm *llvm,
static void
-generate_vs(struct draw_llvm *llvm,
+generate_vs(struct draw_llvm_variant *variant,
LLVMBuilderRef builder,
+ struct lp_type vs_type,
LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
const struct lp_bld_tgsi_system_values *system_values,
@@ -464,21 +434,11 @@ generate_vs(struct draw_llvm *llvm,
struct lp_build_sampler_soa *draw_sampler,
boolean clamp_vertex_color)
{
+ struct draw_llvm *llvm = variant->llvm;
const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
- struct lp_type vs_type;
- LLVMValueRef consts_ptr = draw_jit_context_vs_constants(llvm->gallivm, context_ptr);
+ LLVMValueRef consts_ptr = draw_jit_context_vs_constants(variant->gallivm, context_ptr);
struct lp_build_sampler_soa *sampler = 0;
- memset(&vs_type, 0, sizeof vs_type);
- vs_type.floating = TRUE; /* floating point values */
- vs_type.sign = TRUE; /* values are signed */
- vs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */
- vs_type.width = 32; /* 32-bit float */
- vs_type.length = 4; /* 4 elements per vector */
-#if 0
- num_vs = 4; /* number of vertices per block */
-#endif
-
if (gallivm_debug & GALLIVM_DEBUG_IR) {
tgsi_dump(tokens, 0);
}
@@ -486,7 +446,7 @@ generate_vs(struct draw_llvm *llvm,
if (llvm->draw->num_sampler_views && llvm->draw->num_samplers)
sampler = draw_sampler;
- lp_build_tgsi_soa(llvm->gallivm,
+ lp_build_tgsi_soa(variant->gallivm,
tokens,
vs_type,
NULL /*struct lp_build_mask_context *mask*/,
@@ -503,7 +463,7 @@ generate_vs(struct draw_llvm *llvm,
unsigned chan, attrib;
struct lp_build_context bld;
struct tgsi_shader_info* info = &llvm->draw->vs.vertex_shader->info;
- lp_build_context_init(&bld, llvm->gallivm, vs_type);
+ lp_build_context_init(&bld, variant->gallivm, vs_type);
for (attrib = 0; attrib < info->num_outputs; ++attrib) {
for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
@@ -531,25 +491,6 @@ generate_vs(struct draw_llvm *llvm,
}
-#if DEBUG_STORE
-static void print_vectorf(LLVMBuilderRef builder,
- LLVMValueRef vec)
-{
- LLVMValueRef val[4];
- val[0] = LLVMBuildExtractElement(builder, vec,
- lp_build_const_int32(gallivm, 0), "");
- val[1] = LLVMBuildExtractElement(builder, vec,
- lp_build_const_int32(gallivm, 1), "");
- val[2] = LLVMBuildExtractElement(builder, vec,
- lp_build_const_int32(gallivm, 2), "");
- val[3] = LLVMBuildExtractElement(builder, vec,
- lp_build_const_int32(gallivm, 3), "");
- lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n",
- val[0], val[1], val[2], val[3]);
-}
-#endif
-
-
static void
generate_fetch(struct gallivm_state *gallivm,
LLVMValueRef vbuffers_ptr,
@@ -559,6 +500,8 @@ generate_fetch(struct gallivm_state *gallivm,
LLVMValueRef index,
LLVMValueRef instance_id)
{
+ const struct util_format_description *format_desc = util_format_description(velem->src_format);
+ LLVMValueRef zero = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef indices =
LLVMConstInt(LLVMInt64TypeInContext(gallivm->context),
@@ -587,118 +530,47 @@ generate_fetch(struct gallivm_state *gallivm,
lp_build_const_int32(gallivm, velem->src_offset),
"");
- /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/
+/* lp_build_printf(gallivm, "vbuf index = %d, stride is %d\n", indices, stride);*/
vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, "");
- *res = draw_llvm_translate_from(gallivm, vbuffer_ptr, velem->src_format);
-}
-
-
-static LLVMValueRef
-aos_to_soa(struct gallivm_state *gallivm,
- LLVMValueRef val0,
- LLVMValueRef val1,
- LLVMValueRef val2,
- LLVMValueRef val3,
- LLVMValueRef channel)
-{
- LLVMBuilderRef builder = gallivm->builder;
- LLVMValueRef ex, res;
-
- ex = LLVMBuildExtractElement(builder, val0,
- channel, "");
- res = LLVMBuildInsertElement(builder,
- LLVMConstNull(LLVMTypeOf(val0)),
- ex,
- lp_build_const_int32(gallivm, 0),
- "");
-
- ex = LLVMBuildExtractElement(builder, val1,
- channel, "");
- res = LLVMBuildInsertElement(builder,
- res, ex,
- lp_build_const_int32(gallivm, 1),
- "");
-
- ex = LLVMBuildExtractElement(builder, val2,
- channel, "");
- res = LLVMBuildInsertElement(builder,
- res, ex,
- lp_build_const_int32(gallivm, 2),
- "");
-
- ex = LLVMBuildExtractElement(builder, val3,
- channel, "");
- res = LLVMBuildInsertElement(builder,
- res, ex,
- lp_build_const_int32(gallivm, 3),
- "");
-
- return res;
+ *res = lp_build_fetch_rgba_aos(gallivm,
+ format_desc,
+ lp_float32_vec4_type(),
+ vbuffer_ptr,
+ zero, zero, zero);
}
-
static void
-soa_to_aos(struct gallivm_state *gallivm,
- LLVMValueRef soa[TGSI_NUM_CHANNELS],
- LLVMValueRef aos[TGSI_NUM_CHANNELS])
+convert_to_soa(struct gallivm_state *gallivm,
+ LLVMValueRef (*src_aos)[LP_MAX_VECTOR_WIDTH / 32],
+ LLVMValueRef (*dst_soa)[TGSI_NUM_CHANNELS],
+ unsigned num_attribs, const struct lp_type soa_type)
{
- LLVMBuilderRef builder = gallivm->builder;
- LLVMValueRef comp;
- int i = 0;
+ unsigned i, j, k;
+ struct lp_type aos_channel_type = soa_type;
debug_assert(TGSI_NUM_CHANNELS == 4);
+ debug_assert((soa_type.length % TGSI_NUM_CHANNELS) == 0);
- aos[0] = LLVMConstNull(LLVMTypeOf(soa[0]));
- aos[1] = aos[2] = aos[3] = aos[0];
-
- for (i = 0; i < TGSI_NUM_CHANNELS; ++i) {
- LLVMValueRef channel = lp_build_const_int32(gallivm, i);
-
- comp = LLVMBuildExtractElement(builder, soa[i],
- lp_build_const_int32(gallivm, 0), "");
- aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, "");
-
- comp = LLVMBuildExtractElement(builder, soa[i],
- lp_build_const_int32(gallivm, 1), "");
- aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, "");
+ aos_channel_type.length >>= 1;
- comp = LLVMBuildExtractElement(builder, soa[i],
- lp_build_const_int32(gallivm, 2), "");
- aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, "");
-
- comp = LLVMBuildExtractElement(builder, soa[i],
- lp_build_const_int32(gallivm, 3), "");
- aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, "");
+ for (i = 0; i < num_attribs; ++i) {
+ LLVMValueRef aos_channels[TGSI_NUM_CHANNELS];
+ unsigned pixels_per_channel = soa_type.length / TGSI_NUM_CHANNELS;
- }
-}
+ for (j = 0; j < TGSI_NUM_CHANNELS; ++j) {
+ LLVMValueRef channel[LP_MAX_VECTOR_LENGTH];
+ assert(pixels_per_channel <= LP_MAX_VECTOR_LENGTH);
-static void
-convert_to_soa(struct gallivm_state *gallivm,
- LLVMValueRef (*aos)[TGSI_NUM_CHANNELS],
- LLVMValueRef (*soa)[TGSI_NUM_CHANNELS],
- int num_attribs)
-{
- int i;
+ for (k = 0; k < pixels_per_channel; ++k) {
+ channel[k] = src_aos[i][j + TGSI_NUM_CHANNELS * k];
+ }
- debug_assert(TGSI_NUM_CHANNELS == 4);
+ aos_channels[j] = lp_build_concat(gallivm, channel, aos_channel_type, pixels_per_channel);
+ }
- for (i = 0; i < num_attribs; ++i) {
- LLVMValueRef val0 = aos[i][0];
- LLVMValueRef val1 = aos[i][1];
- LLVMValueRef val2 = aos[i][2];
- LLVMValueRef val3 = aos[i][3];
-
- soa[i][0] = aos_to_soa(gallivm, val0, val1, val2, val3,
- lp_build_const_int32(gallivm, 0));
- soa[i][1] = aos_to_soa(gallivm, val0, val1, val2, val3,
- lp_build_const_int32(gallivm, 1));
- soa[i][2] = aos_to_soa(gallivm, val0, val1, val2, val3,
- lp_build_const_int32(gallivm, 2));
- soa[i][3] = aos_to_soa(gallivm, val0, val1, val2, val3,
- lp_build_const_int32(gallivm, 3));
+ lp_build_transpose_aos(gallivm, soa_type, aos_channels, dst_soa[i]);
}
}
@@ -707,89 +579,34 @@ static void
store_aos(struct gallivm_state *gallivm,
LLVMValueRef io_ptr,
LLVMValueRef index,
- LLVMValueRef value,
- LLVMValueRef clipmask, boolean have_clipdist)
+ LLVMValueRef value)
{
+ LLVMTypeRef data_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, lp_float32_vec4_type()), 0);
LLVMBuilderRef builder = gallivm->builder;
- LLVMValueRef id_ptr = draw_jit_header_id(gallivm, io_ptr);
LLVMValueRef data_ptr = draw_jit_header_data(gallivm, io_ptr);
LLVMValueRef indices[3];
- LLVMValueRef val;
- int vertex_id_pad_edgeflag;
indices[0] = lp_build_const_int32(gallivm, 0);
indices[1] = index;
indices[2] = lp_build_const_int32(gallivm, 0);
- /* If this assertion fails, it means we need to update the bit twidding
- * code here. See struct vertex_header in draw_private.h.
- */
- assert(DRAW_TOTAL_CLIP_PLANES==14);
- /* initialize vertex id:16 = 0xffff, have_clipdist:1 = 0, edgeflag:1 = 1 */
- vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
- if (have_clipdist)
- vertex_id_pad_edgeflag |= 1 << (DRAW_TOTAL_CLIP_PLANES+1);
- val = lp_build_const_int32(gallivm, vertex_id_pad_edgeflag);
- /* OR with the clipmask */
- val = LLVMBuildOr(builder, val, clipmask, "");
-
- /* store vertex header */
- LLVMBuildStore(builder, val, id_ptr);
-
-
#if DEBUG_STORE
- lp_build_printf(builder, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
-#endif
-#if 0
- /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr);
- print_vectorf(builder, value);*/
- data_ptr = LLVMBuildBitCast(builder, data_ptr,
- LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), 0), 0),
- "datavec");
- data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, "");
-
- LLVMBuildStore(builder, value, data_ptr);
-#else
- {
- LLVMValueRef x, y, z, w;
- LLVMValueRef idx0, idx1, idx2, idx3;
- LLVMValueRef gep0, gep1, gep2, gep3;
- data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
-
- idx0 = lp_build_const_int32(gallivm, 0);
- idx1 = lp_build_const_int32(gallivm, 1);
- idx2 = lp_build_const_int32(gallivm, 2);
- idx3 = lp_build_const_int32(gallivm, 3);
-
- x = LLVMBuildExtractElement(builder, value,
- idx0, "");
- y = LLVMBuildExtractElement(builder, value,
- idx1, "");
- z = LLVMBuildExtractElement(builder, value,
- idx2, "");
- w = LLVMBuildExtractElement(builder, value,
- idx3, "");
-
- gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, "");
- gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, "");
- gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, "");
- gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, "");
-
- /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n",
- x, gep0, y, gep1, z, gep2, w, gep3);*/
- LLVMBuildStore(builder, x, gep0);
- LLVMBuildStore(builder, y, gep1);
- LLVMBuildStore(builder, z, gep2);
- LLVMBuildStore(builder, w, gep3);
- }
+ lp_build_printf(gallivm, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
#endif
+
+ data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
+ data_ptr = LLVMBuildPointerCast(builder, data_ptr, data_ptr_type, "");
+
+ /* Unaligned store due to the vertex header */
+ lp_set_store_alignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float));
}
static void
store_aos_array(struct gallivm_state *gallivm,
+ struct lp_type soa_type,
LLVMValueRef io_ptr,
- LLVMValueRef aos[TGSI_NUM_CHANNELS],
+ LLVMValueRef* aos,
int attrib,
int num_outputs,
LLVMValueRef clipmask,
@@ -797,42 +614,49 @@ store_aos_array(struct gallivm_state *gallivm,
{
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);
- LLVMValueRef ind0 = lp_build_const_int32(gallivm, 0);
- LLVMValueRef ind1 = lp_build_const_int32(gallivm, 1);
- LLVMValueRef ind2 = lp_build_const_int32(gallivm, 2);
- LLVMValueRef ind3 = lp_build_const_int32(gallivm, 3);
- LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
- LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3;
+ LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
+ LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
+ int vector_length = soa_type.length;
+ int i;
debug_assert(TGSI_NUM_CHANNELS == 4);
- io0_ptr = LLVMBuildGEP(builder, io_ptr,
- &ind0, 1, "");
- io1_ptr = LLVMBuildGEP(builder, io_ptr,
- &ind1, 1, "");
- io2_ptr = LLVMBuildGEP(builder, io_ptr,
- &ind2, 1, "");
- io3_ptr = LLVMBuildGEP(builder, io_ptr,
- &ind3, 1, "");
-
- clipmask0 = LLVMBuildExtractElement(builder, clipmask,
- ind0, "");
- clipmask1 = LLVMBuildExtractElement(builder, clipmask,
- ind1, "");
- clipmask2 = LLVMBuildExtractElement(builder, clipmask,
- ind2, "");
- clipmask3 = LLVMBuildExtractElement(builder, clipmask,
- ind3, "");
+ for (i = 0; i < vector_length; i++) {
+ inds[i] = lp_build_const_int32(gallivm, i);
+ io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");
+ }
+ if (attrib == 0) {
+ /* store vertex header for each of the n vertices */
+ LLVMValueRef val, cliptmp;
+ int vertex_id_pad_edgeflag;
+
+ /* If this assertion fails, it means we need to update the bit twidding
+ * code here. See struct vertex_header in draw_private.h.
+ */
+ assert(DRAW_TOTAL_CLIP_PLANES==14);
+ /* initialize vertex id:16 = 0xffff, have_clipdist:1 = 0, edgeflag:1 = 1 */
+ vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
+ if (have_clipdist)
+ vertex_id_pad_edgeflag |= 1 << (DRAW_TOTAL_CLIP_PLANES+1);
+ val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type), vertex_id_pad_edgeflag);
+ /* OR with the clipmask */
+ cliptmp = LLVMBuildOr(builder, val, clipmask, "");
+ for (i = 0; i < vector_length; i++) {
+ LLVMValueRef id_ptr = draw_jit_header_id(gallivm, io_ptrs[i]);
+ val = LLVMBuildExtractElement(builder, cliptmp, inds[i], "");
+ LLVMBuildStore(builder, val, id_ptr);
#if DEBUG_STORE
- lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n",
- io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3);
+ lp_build_printf(gallivm, "io = %p, index %d\n, clipmask = %x\n",
+ io_ptrs[i], inds[i], val);
#endif
- /* store for each of the 4 vertices */
- store_aos(gallivm, io0_ptr, attr_index, aos[0], clipmask0, have_clipdist);
- store_aos(gallivm, io1_ptr, attr_index, aos[1], clipmask1, have_clipdist);
- store_aos(gallivm, io2_ptr, attr_index, aos[2], clipmask2, have_clipdist);
- store_aos(gallivm, io3_ptr, attr_index, aos[3], clipmask3, have_clipdist);
+ }
+ }
+
+ /* store for each of the n vertices */
+ for (i = 0; i < vector_length; i++) {
+ store_aos(gallivm, io_ptrs[i], attr_index, aos[i]);
+ }
}
@@ -842,33 +666,53 @@ convert_to_aos(struct gallivm_state *gallivm,
LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
LLVMValueRef clipmask,
int num_outputs,
- int max_vertices, boolean have_clipdist)
+ struct lp_type soa_type,
+ boolean have_clipdist)
{
LLVMBuilderRef builder = gallivm->builder;
- unsigned chan, attrib;
+ unsigned chan, attrib, i;
#if DEBUG_STORE
- lp_build_printf(builder, " # storing begin\n");
+ lp_build_printf(gallivm, " # storing begin\n");
#endif
for (attrib = 0; attrib < num_outputs; ++attrib) {
- LLVMValueRef soa[4];
- LLVMValueRef aos[4];
+ LLVMValueRef soa[TGSI_NUM_CHANNELS];
+ LLVMValueRef aos[LP_MAX_VECTOR_WIDTH / 32];
for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
if (outputs[attrib][chan]) {
LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
- /*lp_build_printf(builder, "output %d : %d ",
- LLVMConstInt(LLVMInt32Type(), attrib, 0),
- LLVMConstInt(LLVMInt32Type(), chan, 0));
- print_vectorf(builder, out);*/
+#if DEBUG_STORE
+ lp_build_printf(gallivm, "output %d : %d ",
+ LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
+ attrib, 0),
+ LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
+ chan, 0));
+ lp_build_print_value(gallivm, "val = ", out);
+#endif
soa[chan] = out;
}
else {
soa[chan] = 0;
}
}
- soa_to_aos(gallivm, soa, aos);
+
+
+ if (soa_type.length == TGSI_NUM_CHANNELS) {
+ lp_build_transpose_aos(gallivm, soa_type, soa, aos);
+ } else {
+ lp_build_transpose_aos(gallivm, soa_type, soa, soa);
+
+ for (i = 0; i < soa_type.length; ++i) {
+ aos[i] = lp_build_extract_range(gallivm,
+ soa[i % TGSI_NUM_CHANNELS],
+ (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
+ TGSI_NUM_CHANNELS);
+ }
+ }
+
store_aos_array(gallivm,
+ soa_type,
io,
aos,
attrib,
@@ -876,104 +720,71 @@ convert_to_aos(struct gallivm_state *gallivm,
clipmask, have_clipdist);
}
#if DEBUG_STORE
- lp_build_printf(builder, " # storing end\n");
+ lp_build_printf(gallivm, " # storing end\n");
#endif
}
/**
* Stores original vertex positions in clip coordinates
- * There is probably a more efficient way to do this, 4 floats at once
- * rather than extracting each element one by one.
- * idx is the output to store things too, if pre_clip_pos is set
- * we store the pos to the idx, if not we store the clipvertex to it.
*/
static void
store_clip(struct gallivm_state *gallivm,
+ const struct lp_type vs_type,
LLVMValueRef io_ptr,
LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
boolean pre_clip_pos, int idx)
{
LLVMBuilderRef builder = gallivm->builder;
- LLVMValueRef out[4];
+ LLVMValueRef soa[4];
+ LLVMValueRef aos[LP_MAX_VECTOR_LENGTH];
LLVMValueRef indices[2];
- LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
- LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3;
- LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr;
- LLVMValueRef out0elem, out1elem, out2elem, out3elem;
- int i;
+ LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
+ LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
+ LLVMValueRef clip_ptrs[LP_MAX_VECTOR_WIDTH / 32];
+ int i, j;
- LLVMValueRef ind0 = lp_build_const_int32(gallivm, 0);
- LLVMValueRef ind1 = lp_build_const_int32(gallivm, 1);
- LLVMValueRef ind2 = lp_build_const_int32(gallivm, 2);
- LLVMValueRef ind3 = lp_build_const_int32(gallivm, 3);
-
indices[0] =
indices[1] = lp_build_const_int32(gallivm, 0);
- out[0] = LLVMBuildLoad(builder, outputs[idx][0], ""); /*x0 x1 x2 x3*/
- out[1] = LLVMBuildLoad(builder, outputs[idx][1], ""); /*y0 y1 y2 y3*/
- out[2] = LLVMBuildLoad(builder, outputs[idx][2], ""); /*z0 z1 z2 z3*/
- out[3] = LLVMBuildLoad(builder, outputs[idx][3], ""); /*w0 w1 w2 w3*/
+ for (i = 0; i < vs_type.length; i++) {
+ inds[i] = lp_build_const_int32(gallivm, i);
+ io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");
+ }
- io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, "");
- io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, "");
- io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, "");
- io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, "");
+ soa[0] = LLVMBuildLoad(builder, outputs[idx][0], ""); /*x0 x1 .. xn*/
+ soa[1] = LLVMBuildLoad(builder, outputs[idx][1], ""); /*y0 y1 .. yn*/
+ soa[2] = LLVMBuildLoad(builder, outputs[idx][2], ""); /*z0 z1 .. zn*/
+ soa[3] = LLVMBuildLoad(builder, outputs[idx][3], ""); /*w0 w1 .. wn*/
if (!pre_clip_pos) {
- clip_ptr0 = draw_jit_header_clip(gallivm, io0_ptr);
- clip_ptr1 = draw_jit_header_clip(gallivm, io1_ptr);
- clip_ptr2 = draw_jit_header_clip(gallivm, io2_ptr);
- clip_ptr3 = draw_jit_header_clip(gallivm, io3_ptr);
+ for (i = 0; i < vs_type.length; i++) {
+ clip_ptrs[i] = draw_jit_header_clip(gallivm, io_ptrs[i]);
+ }
} else {
- clip_ptr0 = draw_jit_header_pre_clip_pos(gallivm, io0_ptr);
- clip_ptr1 = draw_jit_header_pre_clip_pos(gallivm, io1_ptr);
- clip_ptr2 = draw_jit_header_pre_clip_pos(gallivm, io2_ptr);
- clip_ptr3 = draw_jit_header_pre_clip_pos(gallivm, io3_ptr);
+ for (i = 0; i < vs_type.length; i++) {
+ clip_ptrs[i] = draw_jit_header_pre_clip_pos(gallivm, io_ptrs[i]);
+ }
}
- for (i = 0; i<4; i++) {
- clip0_ptr = LLVMBuildGEP(builder, clip_ptr0, indices, 2, ""); /* x0 */
- clip1_ptr = LLVMBuildGEP(builder, clip_ptr1, indices, 2, ""); /* x1 */
- clip2_ptr = LLVMBuildGEP(builder, clip_ptr2, indices, 2, ""); /* x2 */
- clip3_ptr = LLVMBuildGEP(builder, clip_ptr3, indices, 2, ""); /* x3 */
-
- out0elem = LLVMBuildExtractElement(builder, out[i], ind0, ""); /* x0 */
- out1elem = LLVMBuildExtractElement(builder, out[i], ind1, ""); /* x1 */
- out2elem = LLVMBuildExtractElement(builder, out[i], ind2, ""); /* x2 */
- out3elem = LLVMBuildExtractElement(builder, out[i], ind3, ""); /* x3 */
-
- LLVMBuildStore(builder, out0elem, clip0_ptr);
- LLVMBuildStore(builder, out1elem, clip1_ptr);
- LLVMBuildStore(builder, out2elem, clip2_ptr);
- LLVMBuildStore(builder, out3elem, clip3_ptr);
-
- indices[1]= LLVMBuildAdd(builder, indices[1], ind1, "");
+ lp_build_transpose_aos(gallivm, vs_type, soa, soa);
+ for (i = 0; i < vs_type.length; ++i) {
+ aos[i] = lp_build_extract_range(gallivm,
+ soa[i % TGSI_NUM_CHANNELS],
+ (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
+ TGSI_NUM_CHANNELS);
}
-}
-
+ for (j = 0; j < vs_type.length; j++) {
+ LLVMTypeRef clip_ptr_type = LLVMPointerType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), 0);
+ LLVMValueRef clip_ptr;
-/**
- * Equivalent of _mm_set1_ps(a)
- */
-static LLVMValueRef
-vec4f_from_scalar(struct gallivm_state *gallivm,
- LLVMValueRef a,
- const char *name)
-{
- LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
- LLVMValueRef res = LLVMGetUndef(LLVMVectorType(float_type, 4));
- int i;
+ clip_ptr = LLVMBuildGEP(builder, clip_ptrs[j], indices, 2, "clipo");
+ clip_ptr = LLVMBuildPointerCast(builder, clip_ptr, clip_ptr_type, "");
- for (i = 0; i < 4; ++i) {
- LLVMValueRef index = lp_build_const_int32(gallivm, i);
- res = LLVMBuildInsertElement(gallivm->builder, res, a,
- index, i == 3 ? name : "");
+ /* Unaligned store */
+ lp_set_store_alignment(LLVMBuildStore(builder, aos[j], clip_ptr), sizeof(float));
}
-
- return res;
}
@@ -981,15 +792,17 @@ vec4f_from_scalar(struct gallivm_state *gallivm,
* Transforms the outputs for viewport mapping
*/
static void
-generate_viewport(struct draw_llvm *llvm,
+generate_viewport(struct draw_llvm_variant *variant,
LLVMBuilderRef builder,
+ struct lp_type vs_type,
LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
LLVMValueRef context_ptr)
{
int i;
- struct gallivm_state *gallivm = llvm->gallivm;
- struct lp_type f32_type = lp_type_float_vec(32);
- LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
+ struct gallivm_state *gallivm = variant->gallivm;
+ struct lp_type f32_type = vs_type;
+ LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
+ LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 .. wn*/
LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0); /*1.0 1.0 1.0 1.0*/
LLVMValueRef vp_ptr = draw_jit_context_viewport(gallivm, context_ptr);
@@ -999,7 +812,7 @@ generate_viewport(struct draw_llvm *llvm,
/* Viewport Mapping */
for (i=0; i<3; i++) {
- LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/
+ LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 .. xn*/
LLVMValueRef scale;
LLVMValueRef trans;
LLVMValueRef scale_i;
@@ -1012,8 +825,10 @@ generate_viewport(struct draw_llvm *llvm,
index = lp_build_const_int32(gallivm, i+4);
trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
- scale = vec4f_from_scalar(gallivm, LLVMBuildLoad(builder, scale_i, ""), "scale");
- trans = vec4f_from_scalar(gallivm, LLVMBuildLoad(builder, trans_i, ""), "trans");
+ scale = lp_build_broadcast(gallivm, vs_type_llvm,
+ LLVMBuildLoad(builder, scale_i, "scale"));
+ trans = lp_build_broadcast(gallivm, vs_type_llvm,
+ LLVMBuildLoad(builder, trans_i, "trans"));
/* divide by w */
out = LLVMBuildFMul(builder, out, out3, "");
@@ -1030,10 +845,12 @@ generate_viewport(struct draw_llvm *llvm,
/**
- * Returns clipmask as 4xi32 bitmask for the 4 vertices
+ * Returns clipmask as nxi32 bitmask for the n vertices
*/
static LLVMValueRef
generate_clipmask(struct draw_llvm *llvm,
+ struct gallivm_state *gallivm,
+ struct lp_type vs_type,
LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
boolean clip_xy,
boolean clip_z,
@@ -1043,15 +860,15 @@ generate_clipmask(struct draw_llvm *llvm,
LLVMValueRef context_ptr,
boolean *have_clipdist)
{
- struct gallivm_state *gallivm = llvm->gallivm;
LLVMBuilderRef builder = gallivm->builder;
- LLVMValueRef mask; /* stores the <4xi32> clipmasks */
+ LLVMValueRef mask; /* stores the <nxi32> clipmasks */
LLVMValueRef test, temp;
LLVMValueRef zero, shift;
LLVMValueRef pos_x, pos_y, pos_z, pos_w;
LLVMValueRef cv_x, cv_y, cv_z, cv_w;
LLVMValueRef plane1, planes, plane_ptr, sum;
- struct lp_type f32_type = lp_type_float_vec(32);
+ struct lp_type f32_type = vs_type;
+ struct lp_type i32_type = lp_int_type(vs_type);
const unsigned pos = draw_current_shader_position_output(llvm->draw);
const unsigned cv = draw_current_shader_clipvertex_output(llvm->draw);
int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance;
@@ -1064,25 +881,25 @@ generate_clipmask(struct draw_llvm *llvm,
if (cd[0] != pos || cd[1] != pos)
have_cd = true;
- mask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0);
- temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0);
- zero = lp_build_const_vec(gallivm, f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */
- shift = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 1); /* 1 1 1 1 */
+ mask = lp_build_const_int_vec(gallivm, i32_type, 0);
+ temp = lp_build_const_int_vec(gallivm, i32_type, 0);
+ zero = lp_build_const_vec(gallivm, f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */
+ shift = lp_build_const_int_vec(gallivm, i32_type, 1); /* 1 1 1 1 */
/*
* load clipvertex and position from correct locations.
* if they are the same just load them once.
*/
- pos_x = LLVMBuildLoad(builder, outputs[pos][0], ""); /*x0 x1 x2 x3*/
- pos_y = LLVMBuildLoad(builder, outputs[pos][1], ""); /*y0 y1 y2 y3*/
- pos_z = LLVMBuildLoad(builder, outputs[pos][2], ""); /*z0 z1 z2 z3*/
- pos_w = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 w2 w3*/
+ pos_x = LLVMBuildLoad(builder, outputs[pos][0], ""); /*x0 x1 .. xn */
+ pos_y = LLVMBuildLoad(builder, outputs[pos][1], ""); /*y0 y1 .. yn */
+ pos_z = LLVMBuildLoad(builder, outputs[pos][2], ""); /*z0 z1 .. zn */
+ pos_w = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 .. wn */
if (clip_user && cv != pos) {
- cv_x = LLVMBuildLoad(builder, outputs[cv][0], ""); /*x0 x1 x2 x3*/
- cv_y = LLVMBuildLoad(builder, outputs[cv][1], ""); /*y0 y1 y2 y3*/
- cv_z = LLVMBuildLoad(builder, outputs[cv][2], ""); /*z0 z1 z2 z3*/
- cv_w = LLVMBuildLoad(builder, outputs[cv][3], ""); /*w0 w1 w2 w3*/
+ cv_x = LLVMBuildLoad(builder, outputs[cv][0], ""); /*x0 x1 .. xn */
+ cv_y = LLVMBuildLoad(builder, outputs[cv][1], ""); /*y0 y1 .. yn */
+ cv_z = LLVMBuildLoad(builder, outputs[cv][2], ""); /*z0 z1 .. zn */
+ cv_w = LLVMBuildLoad(builder, outputs[cv][3], ""); /*w0 w1 .. wn */
} else {
cv_x = pos_x;
cv_y = pos_y;
@@ -1120,7 +937,7 @@ generate_clipmask(struct draw_llvm *llvm,
}
if (clip_z) {
- temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 16);
+ temp = lp_build_const_int_vec(gallivm, i32_type, 16);
if (clip_halfz) {
/* plane 5 */
test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
@@ -1163,42 +980,43 @@ generate_clipmask(struct draw_llvm *llvm,
clipdist = LLVMBuildLoad(builder, outputs[cd[1]][i-4], "");
}
test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, clipdist);
- temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 1 << plane_idx);
+ temp = lp_build_const_int_vec(gallivm, i32_type, 1 << plane_idx);
test = LLVMBuildAnd(builder, test, temp, "");
mask = LLVMBuildOr(builder, mask, test, "");
} else {
+ LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
indices[0] = lp_build_const_int32(gallivm, 0);
indices[1] = lp_build_const_int32(gallivm, plane_idx);
indices[2] = lp_build_const_int32(gallivm, 0);
plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x");
- planes = vec4f_from_scalar(gallivm, plane1, "plane4_x");
+ planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
sum = LLVMBuildFMul(builder, planes, cv_x, "");
indices[2] = lp_build_const_int32(gallivm, 1);
plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y");
- planes = vec4f_from_scalar(gallivm, plane1, "plane4_y");
+ planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
test = LLVMBuildFMul(builder, planes, cv_y, "");
sum = LLVMBuildFAdd(builder, sum, test, "");
indices[2] = lp_build_const_int32(gallivm, 2);
plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z");
- planes = vec4f_from_scalar(gallivm, plane1, "plane4_z");
+ planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
test = LLVMBuildFMul(builder, planes, cv_z, "");
sum = LLVMBuildFAdd(builder, sum, test, "");
indices[2] = lp_build_const_int32(gallivm, 3);
plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w");
- planes = vec4f_from_scalar(gallivm, plane1, "plane4_w");
+ planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
test = LLVMBuildFMul(builder, planes, cv_w, "");
sum = LLVMBuildFAdd(builder, sum, test, "");
test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum);
- temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 1 << plane_idx);
+ temp = lp_build_const_int_vec(gallivm, i32_type, 1 << plane_idx);
test = LLVMBuildAnd(builder, test, temp, "");
mask = LLVMBuildOr(builder, mask, test, "");
}
@@ -1212,23 +1030,28 @@ generate_clipmask(struct draw_llvm *llvm,
* Returns boolean if any clipping has occurred
* Used zero/non-zero i32 value to represent boolean
*/
-static void
-clipmask_bool(struct gallivm_state *gallivm,
- LLVMValueRef clipmask,
- LLVMValueRef ret_ptr)
+static LLVMValueRef
+clipmask_booli32(struct gallivm_state *gallivm,
+ const struct lp_type vs_type,
+ LLVMValueRef clipmask_bool_ptr)
{
LLVMBuilderRef builder = gallivm->builder;
- LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, "");
+ LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
+ LLVMValueRef clipmask_bool = LLVMBuildLoad(builder, clipmask_bool_ptr, "");
+ LLVMValueRef ret = LLVMConstNull(int32_type);
LLVMValueRef temp;
int i;
- for (i=0; i<4; i++) {
- temp = LLVMBuildExtractElement(builder, clipmask,
+ /*
+ * Can do this with log2(vector length) pack instructions and one extract
+ * (as we don't actually need a or) with sse2 which would be way better.
+ */
+ for (i=0; i < vs_type.length; i++) {
+ temp = LLVMBuildExtractElement(builder, clipmask_bool,
lp_build_const_int32(gallivm, i) , "");
ret = LLVMBuildOr(builder, ret, temp, "");
}
-
- LLVMBuildStore(builder, ret, ret_ptr);
+ return ret;
}
@@ -1236,7 +1059,7 @@ static void
draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
boolean elts)
{
- struct gallivm_state *gallivm = llvm->gallivm;
+ struct gallivm_state *gallivm = variant->gallivm;
LLVMContextRef context = gallivm->context;
LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
LLVMTypeRef arg_types[8];
@@ -1244,6 +1067,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
LLVMValueRef context_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
+ struct lp_type vs_type;
LLVMValueRef end, start;
LLVMValueRef count, fetch_elts, fetch_count;
LLVMValueRef stride, step, io_itr;
@@ -1255,12 +1079,11 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
unsigned i, j;
struct lp_build_context bld;
struct lp_build_loop_state lp_loop;
- const int max_vertices = 4;
+ const int vector_length = lp_native_vector_width / 32;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
LLVMValueRef fetch_max;
- void *code;
struct lp_build_sampler_soa *sampler = 0;
- LLVMValueRef ret, ret_ptr;
+ LLVMValueRef ret, clipmask_bool_ptr;
const boolean bypass_viewport = variant->key.bypass_viewport;
const boolean enable_cliptest = variant->key.clip_xy ||
variant->key.clip_z ||
@@ -1273,16 +1096,16 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
memset(&system_values, 0, sizeof(system_values));
- arg_types[0] = get_context_ptr_type(llvm); /* context */
- arg_types[1] = get_vertex_header_ptr_type(llvm); /* vertex_header */
- arg_types[2] = get_buffer_ptr_type(llvm); /* vbuffers */
+ arg_types[0] = get_context_ptr_type(variant); /* context */
+ arg_types[1] = get_vertex_header_ptr_type(variant); /* vertex_header */
+ arg_types[2] = get_buffer_ptr_type(variant); /* vbuffers */
if (elts)
arg_types[3] = LLVMPointerType(int32_type, 0);/* fetch_elts * */
else
arg_types[3] = int32_type; /* start */
arg_types[4] = int32_type; /* fetch_count / count */
arg_types[5] = int32_type; /* stride */
- arg_types[6] = get_vb_ptr_type(llvm); /* pipe_vertex_buffer's */
+ arg_types[6] = get_vb_ptr_type(variant); /* pipe_vertex_buffer's */
arg_types[7] = int32_type; /* instance_id */
func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0);
@@ -1341,9 +1164,16 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
lp_build_context_init(&bld, gallivm, lp_type_int(32));
- /* function will return non-zero i32 value if any clipped vertices */
- ret_ptr = lp_build_alloca(gallivm, int32_type, "");
- LLVMBuildStore(builder, zero, ret_ptr);
+ memset(&vs_type, 0, sizeof vs_type);
+ vs_type.floating = TRUE; /* floating point values */
+ vs_type.sign = TRUE; /* values are signed */
+ vs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */
+ vs_type.width = 32; /* 32-bit float */
+ vs_type.length = vector_length;
+
+ /* hold temporary "bool" clipmask */
+ clipmask_bool_ptr = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, vs_type), "");
+ LLVMBuildStore(builder, lp_build_zero(gallivm, lp_int_type(vs_type)), clipmask_bool_ptr);
/* code generated texture sampling */
sampler = draw_llvm_sampler_soa_create(
@@ -1358,14 +1188,14 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
end = lp_build_add(&bld, start, count);
}
- step = lp_build_const_int32(gallivm, max_vertices);
+ step = lp_build_const_int32(gallivm, vector_length);
fetch_max = LLVMBuildSub(builder, end, one, "fetch_max");
lp_build_loop_begin(&lp_loop, gallivm, start);
{
LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
- LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS] = { { 0 } };
+ LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][LP_MAX_VECTOR_WIDTH / 32] = { { 0 } };
LLVMValueRef io;
LLVMValueRef clipmask; /* holds the clipmask value */
const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];
@@ -1377,11 +1207,11 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
#if DEBUG_STORE
- lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
+ lp_build_printf(gallivm, " --- io %d = %p, loop counter %d\n",
io_itr, io, lp_loop.counter);
#endif
- system_values.vertex_id = lp_build_zero(gallivm, lp_type_uint_vec(32));
- for (i = 0; i < TGSI_NUM_CHANNELS; ++i) {
+ system_values.vertex_id = lp_build_zero(gallivm, lp_type_uint_vec(32, 32*vector_length));
+ for (i = 0; i < vector_length; ++i) {
LLVMValueRef true_index =
LLVMBuildAdd(builder,
lp_loop.counter,
@@ -1413,11 +1243,12 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
}
}
convert_to_soa(gallivm, aos_attribs, inputs,
- draw->pt.nr_vertex_elements);
+ draw->pt.nr_vertex_elements, vs_type);
ptr_aos = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) inputs;
- generate_vs(llvm,
+ generate_vs(variant,
builder,
+ vs_type,
outputs,
ptr_aos,
&system_values,
@@ -1426,29 +1257,34 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
variant->key.clamp_vertex_color);
/* store original positions in clip before further manipulation */
- store_clip(gallivm, io, outputs, 0, cv);
- store_clip(gallivm, io, outputs, 1, pos);
+ store_clip(gallivm, vs_type, io, outputs, 0, cv);
+ store_clip(gallivm, vs_type, io, outputs, 1, pos);
/* do cliptest */
if (enable_cliptest) {
+ LLVMValueRef temp = LLVMBuildLoad(builder, clipmask_bool_ptr, "");
/* allocate clipmask, assign it integer type */
- clipmask = generate_clipmask(llvm, outputs,
+ clipmask = generate_clipmask(llvm,
+ gallivm,
+ vs_type,
+ outputs,
variant->key.clip_xy,
variant->key.clip_z,
variant->key.clip_user,
variant->key.clip_halfz,
variant->key.ucp_enable,
context_ptr, &have_clipdist);
- /* return clipping boolean value for function */
- clipmask_bool(gallivm, clipmask, ret_ptr);
+ temp = LLVMBuildOr(builder, clipmask, temp, "");
+ /* store temporary clipping boolean value */
+ LLVMBuildStore(builder, temp, clipmask_bool_ptr);
}
else {
- clipmask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0);
+ clipmask = lp_build_const_int_vec(gallivm, lp_int_type(vs_type), 0);
}
/* do viewport mapping */
if (!bypass_viewport) {
- generate_viewport(llvm, builder, outputs, context_ptr);
+ generate_viewport(variant, builder, vs_type, outputs, context_ptr);
}
/* store clipmask in vertex header,
@@ -1456,43 +1292,20 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
* and transformed positions in data
*/
convert_to_aos(gallivm, io, outputs, clipmask,
- vs_info->num_outputs, max_vertices, have_clipdist);
+ vs_info->num_outputs, vs_type,
+ have_clipdist);
}
lp_build_loop_end_cond(&lp_loop, end, step, LLVMIntUGE);
sampler->destroy(sampler);
- ret = LLVMBuildLoad(builder, ret_ptr, "");
- LLVMBuildRet(builder, ret);
-
- /*
- * Translate the LLVM IR into machine code.
- */
-#ifdef DEBUG
- if (LLVMVerifyFunction(variant_func, LLVMPrintMessageAction)) {
- lp_debug_dump_value(variant_func);
- assert(0);
- }
-#endif
-
- LLVMRunFunctionPassManager(gallivm->passmgr, variant_func);
+ /* return clipping boolean value for function */
+ ret = clipmask_booli32(gallivm, vs_type, clipmask_bool_ptr);
- if (gallivm_debug & GALLIVM_DEBUG_IR) {
- lp_debug_dump_value(variant_func);
- debug_printf("\n");
- }
-
- code = LLVMGetPointerToGlobal(gallivm->engine, variant_func);
- if (elts)
- variant->jit_func_elts = (draw_jit_vert_func_elts) pointer_to_func(code);
- else
- variant->jit_func = (draw_jit_vert_func) pointer_to_func(code);
+ LLVMBuildRet(builder, ret);
- if (gallivm_debug & GALLIVM_DEBUG_ASM) {
- lp_disassemble(code);
- }
- lp_func_delete_body(variant_func);
+ gallivm_verify_function(gallivm, variant_func);
}
@@ -1600,17 +1413,17 @@ draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
struct draw_llvm *llvm = variant->llvm;
if (variant->function_elts) {
- LLVMFreeMachineCodeForFunction(llvm->gallivm->engine,
- variant->function_elts);
- LLVMDeleteFunction(variant->function_elts);
+ gallivm_free_function(variant->gallivm,
+ variant->function_elts, variant->jit_func_elts);
}
if (variant->function) {
- LLVMFreeMachineCodeForFunction(llvm->gallivm->engine,
- variant->function);
- LLVMDeleteFunction(variant->function);
+ gallivm_free_function(variant->gallivm,
+ variant->function, variant->jit_func);
}
+ gallivm_destroy(variant->gallivm);
+
remove_from_list(&variant->list_item_local);
variant->shader->variants_cached--;
remove_from_list(&variant->list_item_global);