diff options
Diffstat (limited to 'src/gallium/auxiliary/draw/draw_llvm.c')
-rw-r--r-- | src/gallium/auxiliary/draw/draw_llvm.c | 759 |
1 files changed, 286 insertions, 473 deletions
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index e08221eb392..8d9b5309aff 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -43,6 +43,8 @@ #include "gallivm/lp_bld_intr.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_pack.h" +#include "gallivm/lp_bld_format.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_dump.h" @@ -56,40 +58,6 @@ #define DEBUG_STORE 0 -/** - * This function is called by the gallivm "garbage collector" when - * the LLVM global data structures are freed. We must free all LLVM-related - * data. Specifically, all JIT'd shader variants. - */ -static void -draw_llvm_garbage_collect_callback(void *cb_data) -{ - struct draw_llvm *llvm = (struct draw_llvm *) cb_data; - struct draw_context *draw = llvm->draw; - struct draw_llvm_variant_list_item *li; - - /* Ensure prepare will be run and shaders recompiled */ - assert(!draw->suspend_flushing); - draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); - - /* free all shader variants */ - li = first_elem(&llvm->vs_variants_list); - while (!at_end(&llvm->vs_variants_list, li)) { - struct draw_llvm_variant_list_item *next = next_elem(li); - draw_llvm_destroy_variant(li->base); - li = next; - } - - /* Null-out these pointers so they get remade next time they're needed. - * See the accessor functions below. - */ - llvm->context_ptr_type = NULL; - llvm->buffer_ptr_type = NULL; - llvm->vb_ptr_type = NULL; - llvm->vertex_header_ptr_type = NULL; -} - - static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var, boolean elts); @@ -316,56 +284,56 @@ create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems) * Create LLVM types for various structures. */ static void -create_jit_types(struct draw_llvm *llvm) +create_jit_types(struct draw_llvm_variant *variant) { - struct gallivm_state *gallivm = llvm->gallivm; + struct gallivm_state *gallivm = variant->gallivm; LLVMTypeRef texture_type, context_type, buffer_type, vb_type; texture_type = create_jit_texture_type(gallivm, "texture"); context_type = create_jit_context_type(gallivm, texture_type, "draw_jit_context"); - llvm->context_ptr_type = LLVMPointerType(context_type, 0); + variant->context_ptr_type = LLVMPointerType(context_type, 0); buffer_type = LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0); - llvm->buffer_ptr_type = LLVMPointerType(buffer_type, 0); + variant->buffer_ptr_type = LLVMPointerType(buffer_type, 0); vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer"); - llvm->vb_ptr_type = LLVMPointerType(vb_type, 0); + variant->vb_ptr_type = LLVMPointerType(vb_type, 0); } static LLVMTypeRef -get_context_ptr_type(struct draw_llvm *llvm) +get_context_ptr_type(struct draw_llvm_variant *variant) { - if (!llvm->context_ptr_type) - create_jit_types(llvm); - return llvm->context_ptr_type; + if (!variant->context_ptr_type) + create_jit_types(variant); + return variant->context_ptr_type; } static LLVMTypeRef -get_buffer_ptr_type(struct draw_llvm *llvm) +get_buffer_ptr_type(struct draw_llvm_variant *variant) { - if (!llvm->buffer_ptr_type) - create_jit_types(llvm); - return llvm->buffer_ptr_type; + if (!variant->buffer_ptr_type) + create_jit_types(variant); + return variant->buffer_ptr_type; } static LLVMTypeRef -get_vb_ptr_type(struct draw_llvm *llvm) +get_vb_ptr_type(struct draw_llvm_variant *variant) { - if (!llvm->vb_ptr_type) - create_jit_types(llvm); - return llvm->vb_ptr_type; + if (!variant->vb_ptr_type) + create_jit_types(variant); + return variant->vb_ptr_type; } static LLVMTypeRef -get_vertex_header_ptr_type(struct draw_llvm *llvm) +get_vertex_header_ptr_type(struct draw_llvm_variant *variant) { - if (!llvm->vertex_header_ptr_type) - create_jit_types(llvm); - return llvm->vertex_header_ptr_type; + if (!variant->vertex_header_ptr_type) + create_jit_types(variant); + return variant->vertex_header_ptr_type; } @@ -373,7 +341,7 @@ get_vertex_header_ptr_type(struct draw_llvm *llvm) * Create per-context LLVM info. */ struct draw_llvm * -draw_llvm_create(struct draw_context *draw, struct gallivm_state *gallivm) +draw_llvm_create(struct draw_context *draw) { struct draw_llvm *llvm; @@ -384,18 +352,10 @@ draw_llvm_create(struct draw_context *draw, struct gallivm_state *gallivm) lp_build_init(); llvm->draw = draw; - llvm->gallivm = gallivm; - - if (gallivm_debug & GALLIVM_DEBUG_IR) { - LLVMDumpModule(llvm->gallivm->module); - } llvm->nr_variants = 0; make_empty_list(&llvm->vs_variants_list); - gallivm_register_garbage_collector_callback( - draw_llvm_garbage_collect_callback, llvm); - return llvm; } @@ -406,9 +366,6 @@ draw_llvm_create(struct draw_context *draw, struct gallivm_state *gallivm) void draw_llvm_destroy(struct draw_llvm *llvm) { - gallivm_remove_garbage_collector_callback( - draw_llvm_garbage_collect_callback, llvm); - /* XXX free other draw_llvm data? */ FREE(llvm); } @@ -435,15 +392,27 @@ draw_llvm_create_variant(struct draw_llvm *llvm, variant->llvm = llvm; + variant->gallivm = gallivm_create(); + + create_jit_types(variant); + memcpy(&variant->key, key, shader->variant_key_size); - vertex_header = create_jit_vertex_header(llvm->gallivm, num_inputs); + vertex_header = create_jit_vertex_header(variant->gallivm, num_inputs); - llvm->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0); + variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0); draw_llvm_generate(llvm, variant, FALSE); /* linear */ draw_llvm_generate(llvm, variant, TRUE); /* elts */ + gallivm_compile_module(variant->gallivm); + + variant->jit_func = (draw_jit_vert_func) + gallivm_jit_function(variant->gallivm, variant->function); + + variant->jit_func_elts = (draw_jit_vert_func_elts) + gallivm_jit_function(variant->gallivm, variant->function_elts); + variant->shader = shader; variant->list_item_global.base = variant; variant->list_item_local.base = variant; @@ -455,8 +424,9 @@ draw_llvm_create_variant(struct draw_llvm *llvm, static void -generate_vs(struct draw_llvm *llvm, +generate_vs(struct draw_llvm_variant *variant, LLVMBuilderRef builder, + struct lp_type vs_type, LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS], const struct lp_bld_tgsi_system_values *system_values, @@ -464,21 +434,11 @@ generate_vs(struct draw_llvm *llvm, struct lp_build_sampler_soa *draw_sampler, boolean clamp_vertex_color) { + struct draw_llvm *llvm = variant->llvm; const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens; - struct lp_type vs_type; - LLVMValueRef consts_ptr = draw_jit_context_vs_constants(llvm->gallivm, context_ptr); + LLVMValueRef consts_ptr = draw_jit_context_vs_constants(variant->gallivm, context_ptr); struct lp_build_sampler_soa *sampler = 0; - memset(&vs_type, 0, sizeof vs_type); - vs_type.floating = TRUE; /* floating point values */ - vs_type.sign = TRUE; /* values are signed */ - vs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ - vs_type.width = 32; /* 32-bit float */ - vs_type.length = 4; /* 4 elements per vector */ -#if 0 - num_vs = 4; /* number of vertices per block */ -#endif - if (gallivm_debug & GALLIVM_DEBUG_IR) { tgsi_dump(tokens, 0); } @@ -486,7 +446,7 @@ generate_vs(struct draw_llvm *llvm, if (llvm->draw->num_sampler_views && llvm->draw->num_samplers) sampler = draw_sampler; - lp_build_tgsi_soa(llvm->gallivm, + lp_build_tgsi_soa(variant->gallivm, tokens, vs_type, NULL /*struct lp_build_mask_context *mask*/, @@ -503,7 +463,7 @@ generate_vs(struct draw_llvm *llvm, unsigned chan, attrib; struct lp_build_context bld; struct tgsi_shader_info* info = &llvm->draw->vs.vertex_shader->info; - lp_build_context_init(&bld, llvm->gallivm, vs_type); + lp_build_context_init(&bld, variant->gallivm, vs_type); for (attrib = 0; attrib < info->num_outputs; ++attrib) { for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { @@ -531,25 +491,6 @@ generate_vs(struct draw_llvm *llvm, } -#if DEBUG_STORE -static void print_vectorf(LLVMBuilderRef builder, - LLVMValueRef vec) -{ - LLVMValueRef val[4]; - val[0] = LLVMBuildExtractElement(builder, vec, - lp_build_const_int32(gallivm, 0), ""); - val[1] = LLVMBuildExtractElement(builder, vec, - lp_build_const_int32(gallivm, 1), ""); - val[2] = LLVMBuildExtractElement(builder, vec, - lp_build_const_int32(gallivm, 2), ""); - val[3] = LLVMBuildExtractElement(builder, vec, - lp_build_const_int32(gallivm, 3), ""); - lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n", - val[0], val[1], val[2], val[3]); -} -#endif - - static void generate_fetch(struct gallivm_state *gallivm, LLVMValueRef vbuffers_ptr, @@ -559,6 +500,8 @@ generate_fetch(struct gallivm_state *gallivm, LLVMValueRef index, LLVMValueRef instance_id) { + const struct util_format_description *format_desc = util_format_description(velem->src_format); + LLVMValueRef zero = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)); LLVMBuilderRef builder = gallivm->builder; LLVMValueRef indices = LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), @@ -587,118 +530,47 @@ generate_fetch(struct gallivm_state *gallivm, lp_build_const_int32(gallivm, velem->src_offset), ""); - /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/ +/* lp_build_printf(gallivm, "vbuf index = %d, stride is %d\n", indices, stride);*/ vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, ""); - *res = draw_llvm_translate_from(gallivm, vbuffer_ptr, velem->src_format); -} - - -static LLVMValueRef -aos_to_soa(struct gallivm_state *gallivm, - LLVMValueRef val0, - LLVMValueRef val1, - LLVMValueRef val2, - LLVMValueRef val3, - LLVMValueRef channel) -{ - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef ex, res; - - ex = LLVMBuildExtractElement(builder, val0, - channel, ""); - res = LLVMBuildInsertElement(builder, - LLVMConstNull(LLVMTypeOf(val0)), - ex, - lp_build_const_int32(gallivm, 0), - ""); - - ex = LLVMBuildExtractElement(builder, val1, - channel, ""); - res = LLVMBuildInsertElement(builder, - res, ex, - lp_build_const_int32(gallivm, 1), - ""); - - ex = LLVMBuildExtractElement(builder, val2, - channel, ""); - res = LLVMBuildInsertElement(builder, - res, ex, - lp_build_const_int32(gallivm, 2), - ""); - - ex = LLVMBuildExtractElement(builder, val3, - channel, ""); - res = LLVMBuildInsertElement(builder, - res, ex, - lp_build_const_int32(gallivm, 3), - ""); - - return res; + *res = lp_build_fetch_rgba_aos(gallivm, + format_desc, + lp_float32_vec4_type(), + vbuffer_ptr, + zero, zero, zero); } - static void -soa_to_aos(struct gallivm_state *gallivm, - LLVMValueRef soa[TGSI_NUM_CHANNELS], - LLVMValueRef aos[TGSI_NUM_CHANNELS]) +convert_to_soa(struct gallivm_state *gallivm, + LLVMValueRef (*src_aos)[LP_MAX_VECTOR_WIDTH / 32], + LLVMValueRef (*dst_soa)[TGSI_NUM_CHANNELS], + unsigned num_attribs, const struct lp_type soa_type) { - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef comp; - int i = 0; + unsigned i, j, k; + struct lp_type aos_channel_type = soa_type; debug_assert(TGSI_NUM_CHANNELS == 4); + debug_assert((soa_type.length % TGSI_NUM_CHANNELS) == 0); - aos[0] = LLVMConstNull(LLVMTypeOf(soa[0])); - aos[1] = aos[2] = aos[3] = aos[0]; - - for (i = 0; i < TGSI_NUM_CHANNELS; ++i) { - LLVMValueRef channel = lp_build_const_int32(gallivm, i); - - comp = LLVMBuildExtractElement(builder, soa[i], - lp_build_const_int32(gallivm, 0), ""); - aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, ""); - - comp = LLVMBuildExtractElement(builder, soa[i], - lp_build_const_int32(gallivm, 1), ""); - aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, ""); + aos_channel_type.length >>= 1; - comp = LLVMBuildExtractElement(builder, soa[i], - lp_build_const_int32(gallivm, 2), ""); - aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, ""); - - comp = LLVMBuildExtractElement(builder, soa[i], - lp_build_const_int32(gallivm, 3), ""); - aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, ""); + for (i = 0; i < num_attribs; ++i) { + LLVMValueRef aos_channels[TGSI_NUM_CHANNELS]; + unsigned pixels_per_channel = soa_type.length / TGSI_NUM_CHANNELS; - } -} + for (j = 0; j < TGSI_NUM_CHANNELS; ++j) { + LLVMValueRef channel[LP_MAX_VECTOR_LENGTH]; + assert(pixels_per_channel <= LP_MAX_VECTOR_LENGTH); -static void -convert_to_soa(struct gallivm_state *gallivm, - LLVMValueRef (*aos)[TGSI_NUM_CHANNELS], - LLVMValueRef (*soa)[TGSI_NUM_CHANNELS], - int num_attribs) -{ - int i; + for (k = 0; k < pixels_per_channel; ++k) { + channel[k] = src_aos[i][j + TGSI_NUM_CHANNELS * k]; + } - debug_assert(TGSI_NUM_CHANNELS == 4); + aos_channels[j] = lp_build_concat(gallivm, channel, aos_channel_type, pixels_per_channel); + } - for (i = 0; i < num_attribs; ++i) { - LLVMValueRef val0 = aos[i][0]; - LLVMValueRef val1 = aos[i][1]; - LLVMValueRef val2 = aos[i][2]; - LLVMValueRef val3 = aos[i][3]; - - soa[i][0] = aos_to_soa(gallivm, val0, val1, val2, val3, - lp_build_const_int32(gallivm, 0)); - soa[i][1] = aos_to_soa(gallivm, val0, val1, val2, val3, - lp_build_const_int32(gallivm, 1)); - soa[i][2] = aos_to_soa(gallivm, val0, val1, val2, val3, - lp_build_const_int32(gallivm, 2)); - soa[i][3] = aos_to_soa(gallivm, val0, val1, val2, val3, - lp_build_const_int32(gallivm, 3)); + lp_build_transpose_aos(gallivm, soa_type, aos_channels, dst_soa[i]); } } @@ -707,89 +579,34 @@ static void store_aos(struct gallivm_state *gallivm, LLVMValueRef io_ptr, LLVMValueRef index, - LLVMValueRef value, - LLVMValueRef clipmask, boolean have_clipdist) + LLVMValueRef value) { + LLVMTypeRef data_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, lp_float32_vec4_type()), 0); LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef id_ptr = draw_jit_header_id(gallivm, io_ptr); LLVMValueRef data_ptr = draw_jit_header_data(gallivm, io_ptr); LLVMValueRef indices[3]; - LLVMValueRef val; - int vertex_id_pad_edgeflag; indices[0] = lp_build_const_int32(gallivm, 0); indices[1] = index; indices[2] = lp_build_const_int32(gallivm, 0); - /* If this assertion fails, it means we need to update the bit twidding - * code here. See struct vertex_header in draw_private.h. - */ - assert(DRAW_TOTAL_CLIP_PLANES==14); - /* initialize vertex id:16 = 0xffff, have_clipdist:1 = 0, edgeflag:1 = 1 */ - vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES); - if (have_clipdist) - vertex_id_pad_edgeflag |= 1 << (DRAW_TOTAL_CLIP_PLANES+1); - val = lp_build_const_int32(gallivm, vertex_id_pad_edgeflag); - /* OR with the clipmask */ - val = LLVMBuildOr(builder, val, clipmask, ""); - - /* store vertex header */ - LLVMBuildStore(builder, val, id_ptr); - - #if DEBUG_STORE - lp_build_printf(builder, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr); -#endif -#if 0 - /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr); - print_vectorf(builder, value);*/ - data_ptr = LLVMBuildBitCast(builder, data_ptr, - LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), 0), 0), - "datavec"); - data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, ""); - - LLVMBuildStore(builder, value, data_ptr); -#else - { - LLVMValueRef x, y, z, w; - LLVMValueRef idx0, idx1, idx2, idx3; - LLVMValueRef gep0, gep1, gep2, gep3; - data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, ""); - - idx0 = lp_build_const_int32(gallivm, 0); - idx1 = lp_build_const_int32(gallivm, 1); - idx2 = lp_build_const_int32(gallivm, 2); - idx3 = lp_build_const_int32(gallivm, 3); - - x = LLVMBuildExtractElement(builder, value, - idx0, ""); - y = LLVMBuildExtractElement(builder, value, - idx1, ""); - z = LLVMBuildExtractElement(builder, value, - idx2, ""); - w = LLVMBuildExtractElement(builder, value, - idx3, ""); - - gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, ""); - gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, ""); - gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, ""); - gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, ""); - - /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n", - x, gep0, y, gep1, z, gep2, w, gep3);*/ - LLVMBuildStore(builder, x, gep0); - LLVMBuildStore(builder, y, gep1); - LLVMBuildStore(builder, z, gep2); - LLVMBuildStore(builder, w, gep3); - } + lp_build_printf(gallivm, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr); #endif + + data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, ""); + data_ptr = LLVMBuildPointerCast(builder, data_ptr, data_ptr_type, ""); + + /* Unaligned store due to the vertex header */ + lp_set_store_alignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float)); } static void store_aos_array(struct gallivm_state *gallivm, + struct lp_type soa_type, LLVMValueRef io_ptr, - LLVMValueRef aos[TGSI_NUM_CHANNELS], + LLVMValueRef* aos, int attrib, int num_outputs, LLVMValueRef clipmask, @@ -797,42 +614,49 @@ store_aos_array(struct gallivm_state *gallivm, { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib); - LLVMValueRef ind0 = lp_build_const_int32(gallivm, 0); - LLVMValueRef ind1 = lp_build_const_int32(gallivm, 1); - LLVMValueRef ind2 = lp_build_const_int32(gallivm, 2); - LLVMValueRef ind3 = lp_build_const_int32(gallivm, 3); - LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; - LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3; + LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32]; + LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32]; + int vector_length = soa_type.length; + int i; debug_assert(TGSI_NUM_CHANNELS == 4); - io0_ptr = LLVMBuildGEP(builder, io_ptr, - &ind0, 1, ""); - io1_ptr = LLVMBuildGEP(builder, io_ptr, - &ind1, 1, ""); - io2_ptr = LLVMBuildGEP(builder, io_ptr, - &ind2, 1, ""); - io3_ptr = LLVMBuildGEP(builder, io_ptr, - &ind3, 1, ""); - - clipmask0 = LLVMBuildExtractElement(builder, clipmask, - ind0, ""); - clipmask1 = LLVMBuildExtractElement(builder, clipmask, - ind1, ""); - clipmask2 = LLVMBuildExtractElement(builder, clipmask, - ind2, ""); - clipmask3 = LLVMBuildExtractElement(builder, clipmask, - ind3, ""); + for (i = 0; i < vector_length; i++) { + inds[i] = lp_build_const_int32(gallivm, i); + io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, ""); + } + if (attrib == 0) { + /* store vertex header for each of the n vertices */ + LLVMValueRef val, cliptmp; + int vertex_id_pad_edgeflag; + + /* If this assertion fails, it means we need to update the bit twidding + * code here. See struct vertex_header in draw_private.h. + */ + assert(DRAW_TOTAL_CLIP_PLANES==14); + /* initialize vertex id:16 = 0xffff, have_clipdist:1 = 0, edgeflag:1 = 1 */ + vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES); + if (have_clipdist) + vertex_id_pad_edgeflag |= 1 << (DRAW_TOTAL_CLIP_PLANES+1); + val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type), vertex_id_pad_edgeflag); + /* OR with the clipmask */ + cliptmp = LLVMBuildOr(builder, val, clipmask, ""); + for (i = 0; i < vector_length; i++) { + LLVMValueRef id_ptr = draw_jit_header_id(gallivm, io_ptrs[i]); + val = LLVMBuildExtractElement(builder, cliptmp, inds[i], ""); + LLVMBuildStore(builder, val, id_ptr); #if DEBUG_STORE - lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n", - io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3); + lp_build_printf(gallivm, "io = %p, index %d\n, clipmask = %x\n", + io_ptrs[i], inds[i], val); #endif - /* store for each of the 4 vertices */ - store_aos(gallivm, io0_ptr, attr_index, aos[0], clipmask0, have_clipdist); - store_aos(gallivm, io1_ptr, attr_index, aos[1], clipmask1, have_clipdist); - store_aos(gallivm, io2_ptr, attr_index, aos[2], clipmask2, have_clipdist); - store_aos(gallivm, io3_ptr, attr_index, aos[3], clipmask3, have_clipdist); + } + } + + /* store for each of the n vertices */ + for (i = 0; i < vector_length; i++) { + store_aos(gallivm, io_ptrs[i], attr_index, aos[i]); + } } @@ -842,33 +666,53 @@ convert_to_aos(struct gallivm_state *gallivm, LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], LLVMValueRef clipmask, int num_outputs, - int max_vertices, boolean have_clipdist) + struct lp_type soa_type, + boolean have_clipdist) { LLVMBuilderRef builder = gallivm->builder; - unsigned chan, attrib; + unsigned chan, attrib, i; #if DEBUG_STORE - lp_build_printf(builder, " # storing begin\n"); + lp_build_printf(gallivm, " # storing begin\n"); #endif for (attrib = 0; attrib < num_outputs; ++attrib) { - LLVMValueRef soa[4]; - LLVMValueRef aos[4]; + LLVMValueRef soa[TGSI_NUM_CHANNELS]; + LLVMValueRef aos[LP_MAX_VECTOR_WIDTH / 32]; for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { if (outputs[attrib][chan]) { LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]); - /*lp_build_printf(builder, "output %d : %d ", - LLVMConstInt(LLVMInt32Type(), attrib, 0), - LLVMConstInt(LLVMInt32Type(), chan, 0)); - print_vectorf(builder, out);*/ +#if DEBUG_STORE + lp_build_printf(gallivm, "output %d : %d ", + LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), + attrib, 0), + LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), + chan, 0)); + lp_build_print_value(gallivm, "val = ", out); +#endif soa[chan] = out; } else { soa[chan] = 0; } } - soa_to_aos(gallivm, soa, aos); + + + if (soa_type.length == TGSI_NUM_CHANNELS) { + lp_build_transpose_aos(gallivm, soa_type, soa, aos); + } else { + lp_build_transpose_aos(gallivm, soa_type, soa, soa); + + for (i = 0; i < soa_type.length; ++i) { + aos[i] = lp_build_extract_range(gallivm, + soa[i % TGSI_NUM_CHANNELS], + (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS, + TGSI_NUM_CHANNELS); + } + } + store_aos_array(gallivm, + soa_type, io, aos, attrib, @@ -876,104 +720,71 @@ convert_to_aos(struct gallivm_state *gallivm, clipmask, have_clipdist); } #if DEBUG_STORE - lp_build_printf(builder, " # storing end\n"); + lp_build_printf(gallivm, " # storing end\n"); #endif } /** * Stores original vertex positions in clip coordinates - * There is probably a more efficient way to do this, 4 floats at once - * rather than extracting each element one by one. - * idx is the output to store things too, if pre_clip_pos is set - * we store the pos to the idx, if not we store the clipvertex to it. */ static void store_clip(struct gallivm_state *gallivm, + const struct lp_type vs_type, LLVMValueRef io_ptr, LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], boolean pre_clip_pos, int idx) { LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef out[4]; + LLVMValueRef soa[4]; + LLVMValueRef aos[LP_MAX_VECTOR_LENGTH]; LLVMValueRef indices[2]; - LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; - LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3; - LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr; - LLVMValueRef out0elem, out1elem, out2elem, out3elem; - int i; + LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32]; + LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32]; + LLVMValueRef clip_ptrs[LP_MAX_VECTOR_WIDTH / 32]; + int i, j; - LLVMValueRef ind0 = lp_build_const_int32(gallivm, 0); - LLVMValueRef ind1 = lp_build_const_int32(gallivm, 1); - LLVMValueRef ind2 = lp_build_const_int32(gallivm, 2); - LLVMValueRef ind3 = lp_build_const_int32(gallivm, 3); - indices[0] = indices[1] = lp_build_const_int32(gallivm, 0); - out[0] = LLVMBuildLoad(builder, outputs[idx][0], ""); /*x0 x1 x2 x3*/ - out[1] = LLVMBuildLoad(builder, outputs[idx][1], ""); /*y0 y1 y2 y3*/ - out[2] = LLVMBuildLoad(builder, outputs[idx][2], ""); /*z0 z1 z2 z3*/ - out[3] = LLVMBuildLoad(builder, outputs[idx][3], ""); /*w0 w1 w2 w3*/ + for (i = 0; i < vs_type.length; i++) { + inds[i] = lp_build_const_int32(gallivm, i); + io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, ""); + } - io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, ""); - io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, ""); - io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, ""); - io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, ""); + soa[0] = LLVMBuildLoad(builder, outputs[idx][0], ""); /*x0 x1 .. xn*/ + soa[1] = LLVMBuildLoad(builder, outputs[idx][1], ""); /*y0 y1 .. yn*/ + soa[2] = LLVMBuildLoad(builder, outputs[idx][2], ""); /*z0 z1 .. zn*/ + soa[3] = LLVMBuildLoad(builder, outputs[idx][3], ""); /*w0 w1 .. wn*/ if (!pre_clip_pos) { - clip_ptr0 = draw_jit_header_clip(gallivm, io0_ptr); - clip_ptr1 = draw_jit_header_clip(gallivm, io1_ptr); - clip_ptr2 = draw_jit_header_clip(gallivm, io2_ptr); - clip_ptr3 = draw_jit_header_clip(gallivm, io3_ptr); + for (i = 0; i < vs_type.length; i++) { + clip_ptrs[i] = draw_jit_header_clip(gallivm, io_ptrs[i]); + } } else { - clip_ptr0 = draw_jit_header_pre_clip_pos(gallivm, io0_ptr); - clip_ptr1 = draw_jit_header_pre_clip_pos(gallivm, io1_ptr); - clip_ptr2 = draw_jit_header_pre_clip_pos(gallivm, io2_ptr); - clip_ptr3 = draw_jit_header_pre_clip_pos(gallivm, io3_ptr); + for (i = 0; i < vs_type.length; i++) { + clip_ptrs[i] = draw_jit_header_pre_clip_pos(gallivm, io_ptrs[i]); + } } - for (i = 0; i<4; i++) { - clip0_ptr = LLVMBuildGEP(builder, clip_ptr0, indices, 2, ""); /* x0 */ - clip1_ptr = LLVMBuildGEP(builder, clip_ptr1, indices, 2, ""); /* x1 */ - clip2_ptr = LLVMBuildGEP(builder, clip_ptr2, indices, 2, ""); /* x2 */ - clip3_ptr = LLVMBuildGEP(builder, clip_ptr3, indices, 2, ""); /* x3 */ - - out0elem = LLVMBuildExtractElement(builder, out[i], ind0, ""); /* x0 */ - out1elem = LLVMBuildExtractElement(builder, out[i], ind1, ""); /* x1 */ - out2elem = LLVMBuildExtractElement(builder, out[i], ind2, ""); /* x2 */ - out3elem = LLVMBuildExtractElement(builder, out[i], ind3, ""); /* x3 */ - - LLVMBuildStore(builder, out0elem, clip0_ptr); - LLVMBuildStore(builder, out1elem, clip1_ptr); - LLVMBuildStore(builder, out2elem, clip2_ptr); - LLVMBuildStore(builder, out3elem, clip3_ptr); - - indices[1]= LLVMBuildAdd(builder, indices[1], ind1, ""); + lp_build_transpose_aos(gallivm, vs_type, soa, soa); + for (i = 0; i < vs_type.length; ++i) { + aos[i] = lp_build_extract_range(gallivm, + soa[i % TGSI_NUM_CHANNELS], + (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS, + TGSI_NUM_CHANNELS); } -} - + for (j = 0; j < vs_type.length; j++) { + LLVMTypeRef clip_ptr_type = LLVMPointerType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), 0); + LLVMValueRef clip_ptr; -/** - * Equivalent of _mm_set1_ps(a) - */ -static LLVMValueRef -vec4f_from_scalar(struct gallivm_state *gallivm, - LLVMValueRef a, - const char *name) -{ - LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); - LLVMValueRef res = LLVMGetUndef(LLVMVectorType(float_type, 4)); - int i; + clip_ptr = LLVMBuildGEP(builder, clip_ptrs[j], indices, 2, "clipo"); + clip_ptr = LLVMBuildPointerCast(builder, clip_ptr, clip_ptr_type, ""); - for (i = 0; i < 4; ++i) { - LLVMValueRef index = lp_build_const_int32(gallivm, i); - res = LLVMBuildInsertElement(gallivm->builder, res, a, - index, i == 3 ? name : ""); + /* Unaligned store */ + lp_set_store_alignment(LLVMBuildStore(builder, aos[j], clip_ptr), sizeof(float)); } - - return res; } @@ -981,15 +792,17 @@ vec4f_from_scalar(struct gallivm_state *gallivm, * Transforms the outputs for viewport mapping */ static void -generate_viewport(struct draw_llvm *llvm, +generate_viewport(struct draw_llvm_variant *variant, LLVMBuilderRef builder, + struct lp_type vs_type, LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], LLVMValueRef context_ptr) { int i; - struct gallivm_state *gallivm = llvm->gallivm; - struct lp_type f32_type = lp_type_float_vec(32); - LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ + struct gallivm_state *gallivm = variant->gallivm; + struct lp_type f32_type = vs_type; + LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type); + LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 .. wn*/ LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0); /*1.0 1.0 1.0 1.0*/ LLVMValueRef vp_ptr = draw_jit_context_viewport(gallivm, context_ptr); @@ -999,7 +812,7 @@ generate_viewport(struct draw_llvm *llvm, /* Viewport Mapping */ for (i=0; i<3; i++) { - LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/ + LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 .. xn*/ LLVMValueRef scale; LLVMValueRef trans; LLVMValueRef scale_i; @@ -1012,8 +825,10 @@ generate_viewport(struct draw_llvm *llvm, index = lp_build_const_int32(gallivm, i+4); trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, ""); - scale = vec4f_from_scalar(gallivm, LLVMBuildLoad(builder, scale_i, ""), "scale"); - trans = vec4f_from_scalar(gallivm, LLVMBuildLoad(builder, trans_i, ""), "trans"); + scale = lp_build_broadcast(gallivm, vs_type_llvm, + LLVMBuildLoad(builder, scale_i, "scale")); + trans = lp_build_broadcast(gallivm, vs_type_llvm, + LLVMBuildLoad(builder, trans_i, "trans")); /* divide by w */ out = LLVMBuildFMul(builder, out, out3, ""); @@ -1030,10 +845,12 @@ generate_viewport(struct draw_llvm *llvm, /** - * Returns clipmask as 4xi32 bitmask for the 4 vertices + * Returns clipmask as nxi32 bitmask for the n vertices */ static LLVMValueRef generate_clipmask(struct draw_llvm *llvm, + struct gallivm_state *gallivm, + struct lp_type vs_type, LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], boolean clip_xy, boolean clip_z, @@ -1043,15 +860,15 @@ generate_clipmask(struct draw_llvm *llvm, LLVMValueRef context_ptr, boolean *have_clipdist) { - struct gallivm_state *gallivm = llvm->gallivm; LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef mask; /* stores the <4xi32> clipmasks */ + LLVMValueRef mask; /* stores the <nxi32> clipmasks */ LLVMValueRef test, temp; LLVMValueRef zero, shift; LLVMValueRef pos_x, pos_y, pos_z, pos_w; LLVMValueRef cv_x, cv_y, cv_z, cv_w; LLVMValueRef plane1, planes, plane_ptr, sum; - struct lp_type f32_type = lp_type_float_vec(32); + struct lp_type f32_type = vs_type; + struct lp_type i32_type = lp_int_type(vs_type); const unsigned pos = draw_current_shader_position_output(llvm->draw); const unsigned cv = draw_current_shader_clipvertex_output(llvm->draw); int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance; @@ -1064,25 +881,25 @@ generate_clipmask(struct draw_llvm *llvm, if (cd[0] != pos || cd[1] != pos) have_cd = true; - mask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0); - temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0); - zero = lp_build_const_vec(gallivm, f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */ - shift = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 1); /* 1 1 1 1 */ + mask = lp_build_const_int_vec(gallivm, i32_type, 0); + temp = lp_build_const_int_vec(gallivm, i32_type, 0); + zero = lp_build_const_vec(gallivm, f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */ + shift = lp_build_const_int_vec(gallivm, i32_type, 1); /* 1 1 1 1 */ /* * load clipvertex and position from correct locations. * if they are the same just load them once. */ - pos_x = LLVMBuildLoad(builder, outputs[pos][0], ""); /*x0 x1 x2 x3*/ - pos_y = LLVMBuildLoad(builder, outputs[pos][1], ""); /*y0 y1 y2 y3*/ - pos_z = LLVMBuildLoad(builder, outputs[pos][2], ""); /*z0 z1 z2 z3*/ - pos_w = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 w2 w3*/ + pos_x = LLVMBuildLoad(builder, outputs[pos][0], ""); /*x0 x1 .. xn */ + pos_y = LLVMBuildLoad(builder, outputs[pos][1], ""); /*y0 y1 .. yn */ + pos_z = LLVMBuildLoad(builder, outputs[pos][2], ""); /*z0 z1 .. zn */ + pos_w = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 .. wn */ if (clip_user && cv != pos) { - cv_x = LLVMBuildLoad(builder, outputs[cv][0], ""); /*x0 x1 x2 x3*/ - cv_y = LLVMBuildLoad(builder, outputs[cv][1], ""); /*y0 y1 y2 y3*/ - cv_z = LLVMBuildLoad(builder, outputs[cv][2], ""); /*z0 z1 z2 z3*/ - cv_w = LLVMBuildLoad(builder, outputs[cv][3], ""); /*w0 w1 w2 w3*/ + cv_x = LLVMBuildLoad(builder, outputs[cv][0], ""); /*x0 x1 .. xn */ + cv_y = LLVMBuildLoad(builder, outputs[cv][1], ""); /*y0 y1 .. yn */ + cv_z = LLVMBuildLoad(builder, outputs[cv][2], ""); /*z0 z1 .. zn */ + cv_w = LLVMBuildLoad(builder, outputs[cv][3], ""); /*w0 w1 .. wn */ } else { cv_x = pos_x; cv_y = pos_y; @@ -1120,7 +937,7 @@ generate_clipmask(struct draw_llvm *llvm, } if (clip_z) { - temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 16); + temp = lp_build_const_int_vec(gallivm, i32_type, 16); if (clip_halfz) { /* plane 5 */ test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z); @@ -1163,42 +980,43 @@ generate_clipmask(struct draw_llvm *llvm, clipdist = LLVMBuildLoad(builder, outputs[cd[1]][i-4], ""); } test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, clipdist); - temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 1 << plane_idx); + temp = lp_build_const_int_vec(gallivm, i32_type, 1 << plane_idx); test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); } else { + LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type); indices[0] = lp_build_const_int32(gallivm, 0); indices[1] = lp_build_const_int32(gallivm, plane_idx); indices[2] = lp_build_const_int32(gallivm, 0); plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x"); - planes = vec4f_from_scalar(gallivm, plane1, "plane4_x"); + planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1); sum = LLVMBuildFMul(builder, planes, cv_x, ""); indices[2] = lp_build_const_int32(gallivm, 1); plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y"); - planes = vec4f_from_scalar(gallivm, plane1, "plane4_y"); + planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1); test = LLVMBuildFMul(builder, planes, cv_y, ""); sum = LLVMBuildFAdd(builder, sum, test, ""); indices[2] = lp_build_const_int32(gallivm, 2); plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z"); - planes = vec4f_from_scalar(gallivm, plane1, "plane4_z"); + planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1); test = LLVMBuildFMul(builder, planes, cv_z, ""); sum = LLVMBuildFAdd(builder, sum, test, ""); indices[2] = lp_build_const_int32(gallivm, 3); plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w"); - planes = vec4f_from_scalar(gallivm, plane1, "plane4_w"); + planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1); test = LLVMBuildFMul(builder, planes, cv_w, ""); sum = LLVMBuildFAdd(builder, sum, test, ""); test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum); - temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 1 << plane_idx); + temp = lp_build_const_int_vec(gallivm, i32_type, 1 << plane_idx); test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); } @@ -1212,23 +1030,28 @@ generate_clipmask(struct draw_llvm *llvm, * Returns boolean if any clipping has occurred * Used zero/non-zero i32 value to represent boolean */ -static void -clipmask_bool(struct gallivm_state *gallivm, - LLVMValueRef clipmask, - LLVMValueRef ret_ptr) +static LLVMValueRef +clipmask_booli32(struct gallivm_state *gallivm, + const struct lp_type vs_type, + LLVMValueRef clipmask_bool_ptr) { LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, ""); + LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context); + LLVMValueRef clipmask_bool = LLVMBuildLoad(builder, clipmask_bool_ptr, ""); + LLVMValueRef ret = LLVMConstNull(int32_type); LLVMValueRef temp; int i; - for (i=0; i<4; i++) { - temp = LLVMBuildExtractElement(builder, clipmask, + /* + * Can do this with log2(vector length) pack instructions and one extract + * (as we don't actually need a or) with sse2 which would be way better. + */ + for (i=0; i < vs_type.length; i++) { + temp = LLVMBuildExtractElement(builder, clipmask_bool, lp_build_const_int32(gallivm, i) , ""); ret = LLVMBuildOr(builder, ret, temp, ""); } - - LLVMBuildStore(builder, ret, ret_ptr); + return ret; } @@ -1236,7 +1059,7 @@ static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, boolean elts) { - struct gallivm_state *gallivm = llvm->gallivm; + struct gallivm_state *gallivm = variant->gallivm; LLVMContextRef context = gallivm->context; LLVMTypeRef int32_type = LLVMInt32TypeInContext(context); LLVMTypeRef arg_types[8]; @@ -1244,6 +1067,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, LLVMValueRef context_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; + struct lp_type vs_type; LLVMValueRef end, start; LLVMValueRef count, fetch_elts, fetch_count; LLVMValueRef stride, step, io_itr; @@ -1255,12 +1079,11 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, unsigned i, j; struct lp_build_context bld; struct lp_build_loop_state lp_loop; - const int max_vertices = 4; + const int vector_length = lp_native_vector_width / 32; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; LLVMValueRef fetch_max; - void *code; struct lp_build_sampler_soa *sampler = 0; - LLVMValueRef ret, ret_ptr; + LLVMValueRef ret, clipmask_bool_ptr; const boolean bypass_viewport = variant->key.bypass_viewport; const boolean enable_cliptest = variant->key.clip_xy || variant->key.clip_z || @@ -1273,16 +1096,16 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, memset(&system_values, 0, sizeof(system_values)); - arg_types[0] = get_context_ptr_type(llvm); /* context */ - arg_types[1] = get_vertex_header_ptr_type(llvm); /* vertex_header */ - arg_types[2] = get_buffer_ptr_type(llvm); /* vbuffers */ + arg_types[0] = get_context_ptr_type(variant); /* context */ + arg_types[1] = get_vertex_header_ptr_type(variant); /* vertex_header */ + arg_types[2] = get_buffer_ptr_type(variant); /* vbuffers */ if (elts) arg_types[3] = LLVMPointerType(int32_type, 0);/* fetch_elts * */ else arg_types[3] = int32_type; /* start */ arg_types[4] = int32_type; /* fetch_count / count */ arg_types[5] = int32_type; /* stride */ - arg_types[6] = get_vb_ptr_type(llvm); /* pipe_vertex_buffer's */ + arg_types[6] = get_vb_ptr_type(variant); /* pipe_vertex_buffer's */ arg_types[7] = int32_type; /* instance_id */ func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0); @@ -1341,9 +1164,16 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, lp_build_context_init(&bld, gallivm, lp_type_int(32)); - /* function will return non-zero i32 value if any clipped vertices */ - ret_ptr = lp_build_alloca(gallivm, int32_type, ""); - LLVMBuildStore(builder, zero, ret_ptr); + memset(&vs_type, 0, sizeof vs_type); + vs_type.floating = TRUE; /* floating point values */ + vs_type.sign = TRUE; /* values are signed */ + vs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ + vs_type.width = 32; /* 32-bit float */ + vs_type.length = vector_length; + + /* hold temporary "bool" clipmask */ + clipmask_bool_ptr = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, vs_type), ""); + LLVMBuildStore(builder, lp_build_zero(gallivm, lp_int_type(vs_type)), clipmask_bool_ptr); /* code generated texture sampling */ sampler = draw_llvm_sampler_soa_create( @@ -1358,14 +1188,14 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, end = lp_build_add(&bld, start, count); } - step = lp_build_const_int32(gallivm, max_vertices); + step = lp_build_const_int32(gallivm, vector_length); fetch_max = LLVMBuildSub(builder, end, one, "fetch_max"); lp_build_loop_begin(&lp_loop, gallivm, start); { LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; - LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS] = { { 0 } }; + LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][LP_MAX_VECTOR_WIDTH / 32] = { { 0 } }; LLVMValueRef io; LLVMValueRef clipmask; /* holds the clipmask value */ const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS]; @@ -1377,11 +1207,11 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); #if DEBUG_STORE - lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", + lp_build_printf(gallivm, " --- io %d = %p, loop counter %d\n", io_itr, io, lp_loop.counter); #endif - system_values.vertex_id = lp_build_zero(gallivm, lp_type_uint_vec(32)); - for (i = 0; i < TGSI_NUM_CHANNELS; ++i) { + system_values.vertex_id = lp_build_zero(gallivm, lp_type_uint_vec(32, 32*vector_length)); + for (i = 0; i < vector_length; ++i) { LLVMValueRef true_index = LLVMBuildAdd(builder, lp_loop.counter, @@ -1413,11 +1243,12 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, } } convert_to_soa(gallivm, aos_attribs, inputs, - draw->pt.nr_vertex_elements); + draw->pt.nr_vertex_elements, vs_type); ptr_aos = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) inputs; - generate_vs(llvm, + generate_vs(variant, builder, + vs_type, outputs, ptr_aos, &system_values, @@ -1426,29 +1257,34 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, variant->key.clamp_vertex_color); /* store original positions in clip before further manipulation */ - store_clip(gallivm, io, outputs, 0, cv); - store_clip(gallivm, io, outputs, 1, pos); + store_clip(gallivm, vs_type, io, outputs, 0, cv); + store_clip(gallivm, vs_type, io, outputs, 1, pos); /* do cliptest */ if (enable_cliptest) { + LLVMValueRef temp = LLVMBuildLoad(builder, clipmask_bool_ptr, ""); /* allocate clipmask, assign it integer type */ - clipmask = generate_clipmask(llvm, outputs, + clipmask = generate_clipmask(llvm, + gallivm, + vs_type, + outputs, variant->key.clip_xy, variant->key.clip_z, variant->key.clip_user, variant->key.clip_halfz, variant->key.ucp_enable, context_ptr, &have_clipdist); - /* return clipping boolean value for function */ - clipmask_bool(gallivm, clipmask, ret_ptr); + temp = LLVMBuildOr(builder, clipmask, temp, ""); + /* store temporary clipping boolean value */ + LLVMBuildStore(builder, temp, clipmask_bool_ptr); } else { - clipmask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0); + clipmask = lp_build_const_int_vec(gallivm, lp_int_type(vs_type), 0); } /* do viewport mapping */ if (!bypass_viewport) { - generate_viewport(llvm, builder, outputs, context_ptr); + generate_viewport(variant, builder, vs_type, outputs, context_ptr); } /* store clipmask in vertex header, @@ -1456,43 +1292,20 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, * and transformed positions in data */ convert_to_aos(gallivm, io, outputs, clipmask, - vs_info->num_outputs, max_vertices, have_clipdist); + vs_info->num_outputs, vs_type, + have_clipdist); } lp_build_loop_end_cond(&lp_loop, end, step, LLVMIntUGE); sampler->destroy(sampler); - ret = LLVMBuildLoad(builder, ret_ptr, ""); - LLVMBuildRet(builder, ret); - - /* - * Translate the LLVM IR into machine code. - */ -#ifdef DEBUG - if (LLVMVerifyFunction(variant_func, LLVMPrintMessageAction)) { - lp_debug_dump_value(variant_func); - assert(0); - } -#endif - - LLVMRunFunctionPassManager(gallivm->passmgr, variant_func); + /* return clipping boolean value for function */ + ret = clipmask_booli32(gallivm, vs_type, clipmask_bool_ptr); - if (gallivm_debug & GALLIVM_DEBUG_IR) { - lp_debug_dump_value(variant_func); - debug_printf("\n"); - } - - code = LLVMGetPointerToGlobal(gallivm->engine, variant_func); - if (elts) - variant->jit_func_elts = (draw_jit_vert_func_elts) pointer_to_func(code); - else - variant->jit_func = (draw_jit_vert_func) pointer_to_func(code); + LLVMBuildRet(builder, ret); - if (gallivm_debug & GALLIVM_DEBUG_ASM) { - lp_disassemble(code); - } - lp_func_delete_body(variant_func); + gallivm_verify_function(gallivm, variant_func); } @@ -1600,17 +1413,17 @@ draw_llvm_destroy_variant(struct draw_llvm_variant *variant) struct draw_llvm *llvm = variant->llvm; if (variant->function_elts) { - LLVMFreeMachineCodeForFunction(llvm->gallivm->engine, - variant->function_elts); - LLVMDeleteFunction(variant->function_elts); + gallivm_free_function(variant->gallivm, + variant->function_elts, variant->jit_func_elts); } if (variant->function) { - LLVMFreeMachineCodeForFunction(llvm->gallivm->engine, - variant->function); - LLVMDeleteFunction(variant->function); + gallivm_free_function(variant->gallivm, + variant->function, variant->jit_func); } + gallivm_destroy(variant->gallivm); + remove_from_list(&variant->list_item_local); variant->shader->variants_cached--; remove_from_list(&variant->list_item_global); |