diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2014-02-04 18:09:51 -0500 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2014-02-05 14:59:57 -0500 |
commit | 1431bacf516bce797f4711f6dc36d550a8b81fb0 (patch) | |
tree | cf24765168ebd93c4c53aa3358bb6c8aa07d46b0 | |
parent | e4213bf09b5eed9b44dacb254c195e06678af41a (diff) |
clover: WIP: Add support for ELF loadingclover-elf-v2
We now pass ELF binaries to the driver rather than LLVM IR.
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute.c | 87 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute_internal.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_llvm.c | 61 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_llvm.h | 5 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/Makefile.sources | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/radeon_elf_util.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/radeon_llvm_util.c | 79 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/radeon_llvm_util.h | 39 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_compute.c | 56 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 56 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 4 | ||||
-rw-r--r-- | src/gallium/state_trackers/clover/llvm/invocation.cpp | 118 |
12 files changed, 236 insertions, 275 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 70efe5c5a87..1885d450370 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -47,9 +47,7 @@ #include "evergreen_compute_internal.h" #include "compute_memory_pool.h" #include "sb/sb_public.h" -#ifdef HAVE_OPENCL -#include "radeon_llvm_util.h" -#endif +#include "radeon_elf_util.h" /** RAT0 is for global binding write @@ -198,36 +196,21 @@ void *evergreen_create_compute_state( { struct r600_context *ctx = (struct r600_context *)ctx_; struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute); - -#ifdef HAVE_OPENCL + struct radeon_shader_binary binary; const struct pipe_llvm_program_header * header; - const unsigned char * code; - unsigned i; - - shader->llvm_ctx = LLVMContextCreate(); - - COMPUTE_DBG(ctx->screen, "*** evergreen_create_compute_state\n"); + const char * code; + boolean use_kill; header = cso->prog; code = cso->prog + sizeof(struct pipe_llvm_program_header); -#endif + radeon_elf_read(code, header->num_bytes, &binary, true); + r600_create_shader(&shader->active_kernel.bc, &binary, &use_kill); shader->ctx = (struct r600_context*)ctx; shader->local_size = cso->req_local_mem; shader->private_size = cso->req_private_mem; shader->input_size = cso->req_input_mem; -#ifdef HAVE_OPENCL - shader->num_kernels = radeon_llvm_get_num_kernels(shader->llvm_ctx, code, - header->num_bytes); - shader->kernels = CALLOC(sizeof(struct r600_kernel), shader->num_kernels); - - for (i = 0; i < shader->num_kernels; i++) { - struct r600_kernel *kernel = &shader->kernels[i]; - kernel->llvm_module = radeon_llvm_get_kernel_module(shader->llvm_ctx, i, - code, header->num_bytes); - } -#endif return shader; } @@ -238,14 +221,6 @@ void evergreen_delete_compute_state(struct pipe_context *ctx, void* state) if (!shader) return; - FREE(shader->kernels); - -#ifdef HAVE_OPENCL - if (shader->llvm_ctx){ - LLVMContextDispose(shader->llvm_ctx); - } -#endif - FREE(shader); } @@ -347,7 +322,7 @@ static void evergreen_emit_direct_dispatch( unsigned wave_divisor = (16 * num_pipes); int group_size = 1; int grid_size = 1; - unsigned lds_size = shader->local_size / 4 + shader->active_kernel->bc.nlds_dw; + unsigned lds_size = shader->local_size / 4 + shader->active_kernel.bc.nlds_dw; /* Calculate group_size/grid_size */ for (i = 0; i < 3; i++) { @@ -519,9 +494,19 @@ void evergreen_emit_cs_shader( struct r600_cs_shader_state *state = (struct r600_cs_shader_state*)atom; struct r600_pipe_compute *shader = state->shader; - struct r600_kernel *kernel = &shader->kernels[state->kernel_index]; + struct r600_kernel *kernel = &shader->active_kernel; struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; uint64_t va; + void *p; + + if (shader->active_kernel.code_bo) { + pipe_resource_reference((struct pipe_resource**)&shader->active_kernel.code_bo, NULL); + } + shader->active_kernel.code_bo = r600_compute_buffer_alloc_vram(rctx->screen, + shader->active_kernel.bc.ndw * 4); + p = r600_buffer_map_sync_with_rings(&rctx->b, shader->active_kernel.code_bo, PIPE_TRANSFER_WRITE); + memcpy(p, shader->active_kernel.bc.bytecode + (state->kernel_index / 4), shader->active_kernel.bc.ndw * 4); + rctx->b.ws->buffer_unmap(shader->active_kernel.code_bo->cs_buf); va = r600_resource_va(&rctx->screen->b.b, &kernel->code_bo->b.b); @@ -544,44 +529,8 @@ static void evergreen_launch_grid( { struct r600_context *ctx = (struct r600_context *)ctx_; - struct r600_pipe_compute *shader = ctx->cs_shader_state.shader; - struct r600_kernel *kernel = &shader->kernels[pc]; - COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc); -#ifdef HAVE_OPENCL - - if (!kernel->code_bo) { - void *p; - struct r600_bytecode *bc = &kernel->bc; - LLVMModuleRef mod = kernel->llvm_module; - boolean use_kill = false; - bool dump = (ctx->screen->b.debug_flags & DBG_CS) != 0; - unsigned use_sb = ctx->screen->b.debug_flags & DBG_SB_CS; - unsigned sb_disasm = use_sb || - (ctx->screen->b.debug_flags & DBG_SB_DISASM); - - r600_bytecode_init(bc, ctx->b.chip_class, ctx->b.family, - ctx->screen->has_compressed_msaa_texturing); - bc->type = TGSI_PROCESSOR_COMPUTE; - bc->isa = ctx->isa; - r600_llvm_compile(mod, ctx->b.family, bc, &use_kill, dump); - - if (dump && !sb_disasm) { - r600_bytecode_disasm(bc); - } else if ((dump && sb_disasm) || use_sb) { - if (r600_sb_bytecode_process(ctx, bc, NULL, dump, use_sb)) - R600_ERR("r600_sb_bytecode_process failed!\n"); - } - - kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen, - kernel->bc.ndw * 4); - p = r600_buffer_map_sync_with_rings(&ctx->b, kernel->code_bo, PIPE_TRANSFER_WRITE); - memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4); - ctx->b.ws->buffer_unmap(kernel->code_bo->cs_buf); - } -#endif - shader->active_kernel = kernel; ctx->cs_shader_state.kernel_index = pc; evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input); compute_emit_cs(ctx, block_layout, grid_layout); diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.h b/src/gallium/drivers/r600/evergreen_compute_internal.h index 0929d8dcf27..44c7fb3d0e3 100644 --- a/src/gallium/drivers/r600/evergreen_compute_internal.h +++ b/src/gallium/drivers/r600/evergreen_compute_internal.h @@ -42,7 +42,7 @@ struct r600_pipe_compute { unsigned num_kernels; struct r600_kernel *kernels; - struct r600_kernel *active_kernel; + struct r600_kernel active_kernel; unsigned local_size; unsigned private_size; unsigned input_size; diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index 4fcca697741..a6f16f99dc7 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -15,6 +15,7 @@ #include "r600_pipe.h" #include "radeon_llvm.h" #include "radeon_llvm_emit.h" +#include "radeon_elf_util.h" #include <stdio.h> @@ -819,31 +820,22 @@ LLVMModuleRef r600_tgsi_llvm( #define R_028868_SQ_PGM_RESOURCES_VS 0x028868 #define R_028850_SQ_PGM_RESOURCES_PS 0x028850 -unsigned r600_llvm_compile( - LLVMModuleRef mod, - enum radeon_family family, - struct r600_bytecode *bc, - boolean *use_kill, - unsigned dump) -{ - unsigned r; - struct radeon_shader_binary binary; - const char * gpu_family = r600_get_llvm_processor_name(family); - unsigned i; - - memset(&binary, 0, sizeof(struct radeon_shader_binary)); - r = radeon_llvm_compile(mod, &binary, gpu_family, dump); +unsigned r600_create_shader(struct r600_bytecode *bc, + const struct radeon_shader_binary *binary, + boolean *use_kill) - assert(binary.code_size % 4 == 0); - bc->bytecode = CALLOC(1, binary.code_size); - memcpy(bc->bytecode, binary.code, binary.code_size); - bc->ndw = binary.code_size / 4; +{ + int i; + assert(binary->code_size % 4 == 0); + bc->bytecode = CALLOC(1, binary->code_size); + memcpy(bc->bytecode, binary->code, binary->code_size); + bc->ndw = binary->code_size / 4; - for (i = 0; i < binary.config_size; i+= 8) { + for (i = 0; i < binary->config_size; i+= 8) { unsigned reg = - util_le32_to_cpu(*(uint32_t*)(binary.config + i)); + util_le32_to_cpu(*(uint32_t*)(binary->config + i)); unsigned value = - util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4)); + util_le32_to_cpu(*(uint32_t*)(binary->config + i + 4)); switch (reg) { /* R600 / R700 */ case R_028850_SQ_PGM_RESOURCES_PS: @@ -852,8 +844,8 @@ unsigned r600_llvm_compile( case R_028844_SQ_PGM_RESOURCES_PS: case R_028860_SQ_PGM_RESOURCES_VS: case R_0288D4_SQ_PGM_RESOURCES_LS: - bc->ngpr = G_028844_NUM_GPRS(value); - bc->nstack = G_028844_STACK_SIZE(value); + bc->ngpr = MAX2(bc->ngpr, G_028844_NUM_GPRS(value)); + bc->nstack = MAX2(bc->nstack, G_028844_STACK_SIZE(value)); break; case R_02880C_DB_SHADER_CONTROL: *use_kill = G_02880C_KILL_ENABLE(value); @@ -864,10 +856,27 @@ unsigned r600_llvm_compile( } } - FREE(binary.code); - FREE(binary.config); + FREE(binary->code); + FREE(binary->config); + return 0; +} + +unsigned r600_llvm_compile( + LLVMModuleRef mod, + enum radeon_family family, + struct r600_bytecode *bc, + boolean *use_kill, + unsigned dump) +{ + unsigned r; + struct radeon_shader_binary binary; + const char * gpu_family = r600_get_llvm_processor_name(family); + unsigned i; + + memset(&binary, 0, sizeof(struct radeon_shader_binary)); + r = radeon_llvm_compile(mod, &binary, gpu_family, dump); - return r; + return r600_create_shader(bc, &binary, use_kill); } #endif diff --git a/src/gallium/drivers/r600/r600_llvm.h b/src/gallium/drivers/r600/r600_llvm.h index e036bfffac2..2419851c339 100644 --- a/src/gallium/drivers/r600/r600_llvm.h +++ b/src/gallium/drivers/r600/r600_llvm.h @@ -10,6 +10,7 @@ struct r600_bytecode; struct r600_shader_ctx; struct radeon_llvm_context; +struct radeon_shader_binary; enum radeon_family; LLVMModuleRef r600_tgsi_llvm( @@ -23,6 +24,10 @@ unsigned r600_llvm_compile( boolean *use_kill, unsigned dump); +unsigned r600_create_shader(struct r600_bytecode *bc, + const struct radeon_shader_binary *binary, + boolean *use_kill); + #endif /* defined R600_USE_LLVM || defined HAVE_OPENCL */ #endif /* R600_LLVM_H */ diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources index 93adb655b6a..7b3b2a1e5a7 100644 --- a/src/gallium/drivers/radeon/Makefile.sources +++ b/src/gallium/drivers/radeon/Makefile.sources @@ -9,5 +9,4 @@ C_SOURCES := \ LLVM_C_FILES := \ radeon_setup_tgsi_llvm.c \ - radeon_llvm_emit.c \ - radeon_llvm_util.c + radeon_llvm_emit.c diff --git a/src/gallium/drivers/radeon/radeon_elf_util.c b/src/gallium/drivers/radeon/radeon_elf_util.c index 7d929623937..f448ceeda37 100644 --- a/src/gallium/drivers/radeon/radeon_elf_util.c +++ b/src/gallium/drivers/radeon/radeon_elf_util.c @@ -77,7 +77,6 @@ void radeon_elf_read(const char *elf_data, unsigned elf_size, } else if (debug && !strcmp(name, ".AMDGPU.disasm")) { binary->disassembled = 1; section_data = elf_getdata(section, section_data); - fprintf(stderr, "\nShader Disassembly:\n\n"); fprintf(stderr, "%.*s\n", (int)section_data->d_size, (char *)section_data->d_buf); } diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c b/src/gallium/drivers/radeon/radeon_llvm_util.c deleted file mode 100644 index fe7f9a62ae2..00000000000 --- a/src/gallium/drivers/radeon/radeon_llvm_util.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright 2012, 2013 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: Tom Stellard <thomas.stellard@amd.com> - * - */ - -#include "radeon_llvm_util.h" -#include "util/u_memory.h" - -#include <llvm-c/BitReader.h> -#include <llvm-c/Core.h> -#include <llvm-c/Target.h> -#include <llvm-c/Transforms/IPO.h> -#include <llvm-c/Transforms/PassManagerBuilder.h> - -LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx, - const unsigned char * bitcode, unsigned bitcode_len) -{ - LLVMMemoryBufferRef buf; - LLVMModuleRef module; - - buf = LLVMCreateMemoryBufferWithMemoryRangeCopy((const char*)bitcode, - bitcode_len, "radeon"); - LLVMParseBitcodeInContext(ctx, buf, &module, NULL); - LLVMDisposeMemoryBuffer(buf); - return module; -} - -unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx, - const unsigned char *bitcode, unsigned bitcode_len) -{ - LLVMModuleRef mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len); - return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels"); -} - -LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index, - const unsigned char *bitcode, unsigned bitcode_len) -{ - LLVMModuleRef mod; - unsigned num_kernels; - LLVMValueRef *kernel_metadata; - unsigned i; - - mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len); - num_kernels = LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels"); - kernel_metadata = MALLOC(num_kernels * sizeof(LLVMValueRef)); - LLVMGetNamedMetadataOperands(mod, "opencl.kernels", kernel_metadata); - for (i = 0; i < num_kernels; i++) { - LLVMValueRef kernel_signature, kernel_function; - if (i == index) { - continue; - } - kernel_signature = kernel_metadata[i]; - LLVMGetMDNodeOperands(kernel_signature, &kernel_function); - LLVMDeleteFunction(kernel_function); - } - FREE(kernel_metadata); - return mod; -} diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.h b/src/gallium/drivers/radeon/radeon_llvm_util.h deleted file mode 100644 index 733c329e99e..00000000000 --- a/src/gallium/drivers/radeon/radeon_llvm_util.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2012, 2013 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: Tom Stellard <thomas.stellard@amd.com> - * - */ - -#ifndef RADEON_LLVM_UTIL_H -#define RADEON_LLVM_UTIL_H - -#include <llvm-c/Core.h> - -LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx, - const unsigned char * bitcode, unsigned bitcode_len); -unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx, - const unsigned char *bitcode, unsigned bitcode_len); -LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index, - const unsigned char *bitcode, unsigned bitcode_len); - -#endif diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index a7f49e790af..c04f5497b9d 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -23,14 +23,14 @@ */ #include "util/u_memory.h" +#include "../radeon/r600_pipe_common.h" +#include "../radeon/radeon_elf_util.h" #include "../radeon/r600_cs.h" #include "si_pipe.h" #include "si_shader.h" #include "sid.h" -#include "radeon_llvm_util.h" - #define MAX_GLOBAL_BUFFERS 20 struct si_pipe_compute { @@ -39,13 +39,11 @@ struct si_pipe_compute { unsigned local_size; unsigned private_size; unsigned input_size; - unsigned num_kernels; - struct si_pipe_shader *kernels; + struct radeon_shader_binary binary; + struct si_pipe_shader program; unsigned num_user_sgprs; struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS]; - - LLVMContextRef llvm_ctx; }; static void *si_create_compute_state( @@ -56,10 +54,7 @@ static void *si_create_compute_state( struct si_pipe_compute *program = CALLOC_STRUCT(si_pipe_compute); const struct pipe_llvm_program_header *header; - const unsigned char *code; - unsigned i; - - program->llvm_ctx = LLVMContextCreate(); + const char *code; header = cso->prog; code = cso->prog + sizeof(struct pipe_llvm_program_header); @@ -69,16 +64,9 @@ static void *si_create_compute_state( program->private_size = cso->req_private_mem; program->input_size = cso->req_input_mem; - program->num_kernels = radeon_llvm_get_num_kernels(program->llvm_ctx, code, - header->num_bytes); - program->kernels = CALLOC(sizeof(struct si_pipe_shader), - program->num_kernels); - for (i = 0; i < program->num_kernels; i++) { - LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i, - code, header->num_bytes); - si_compile_llvm(sctx, &program->kernels[i], mod); - LLVMDisposeModule(mod); - } + memset(&program->binary, 0, sizeof(program->binary)); + radeon_elf_read(code, header->num_bytes, &program->binary, true); + si_create_shader(sctx, &program->program, &program->binary); return program; } @@ -130,8 +118,9 @@ static void si_launch_grid( uint64_t shader_va; unsigned arg_user_sgpr_count = 2; unsigned i; - struct si_pipe_shader *shader = &program->kernels[pc]; + struct si_pipe_shader *shader = &program->program; unsigned lds_blocks; + uint32_t *ptr; pm4->compute_pkt = true; si_cmd_context_control(pm4); @@ -204,6 +193,23 @@ static void si_launch_grid( 0x190 /* Default value */); } + /* copy new shader */ + r600_resource_reference(&shader->bo, NULL); + shader->bo = si_resource_create_custom(sctx->b.b.screen, PIPE_USAGE_IMMUTABLE, + program->binary.code_size); + + ptr = (uint32_t*)sctx->b.ws->buffer_map(shader->bo->cs_buf, sctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE); + if (0 /*SI_BIG_ENDIAN*/) { + for (i = 0; i < program->binary.code_size / 4; ++i) { + ptr[i] = util_bswap32(*(uint32_t*)(program->binary.code + pc + i*4)); + } + } else { + memcpy(ptr, program->binary.code + pc, program->binary.code_size); + } + sctx->b.ws->buffer_unmap(shader->bo->cs_buf); + + fprintf(stderr, "pc = %u\n", pc); + shader_va = r600_resource_va(ctx->screen, (void *)shader->bo); si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ); si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff); @@ -296,14 +302,6 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){ return; } - if (program->kernels) { - FREE(program->kernels); - } - - if (program->llvm_ctx){ - LLVMContextDispose(program->llvm_ctx); - } - //And then free the program itself. FREE(program); } diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index ae40f4e7413..6abd42b64d1 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2274,45 +2274,41 @@ static void preload_streamout_buffers(struct si_shader_context *si_shader_ctx) } } -int si_compile_llvm(struct si_context *sctx, struct si_pipe_shader *shader, - LLVMModuleRef mod) +int si_create_shader(struct si_context *sctx, struct si_pipe_shader *shader, + const struct radeon_shader_binary *binary) { + unsigned i; - uint32_t *ptr; - struct radeon_shader_binary binary; - bool dump = r600_can_dump_shader(&sctx->screen->b, - shader->selector ? shader->selector->tokens : NULL); - memset(&binary, 0, sizeof(binary)); - radeon_llvm_compile(mod, &binary, - r600_get_llvm_processor_name(sctx->screen->b.family), dump); - if (dump && ! binary.disassembled) { + bool dump = false; + + if (dump && ! binary->disassembled) { fprintf(stderr, "SI CODE:\n"); - for (i = 0; i < binary.code_size; i+=4 ) { - fprintf(stderr, "%02x%02x%02x%02x\n", binary.code[i + 3], - binary.code[i + 2], binary.code[i + 1], - binary.code[i]); + for (i = 0; i < binary->code_size; i+=4 ) { + fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3], + binary->code[i + 2], binary->code[i + 1], + binary->code[i]); } } /* XXX: We may be able to emit some of these values directly rather than * extracting fields to be emitted later. */ - for (i = 0; i < binary.config_size; i+= 8) { - unsigned reg = util_le32_to_cpu(*(uint32_t*)(binary.config + i)); - unsigned value = util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4)); + for (i = 0; i < binary->config_size; i+= 8) { + unsigned reg = util_le32_to_cpu(*(uint32_t*)(binary->config + i)); + unsigned value = util_le32_to_cpu(*(uint32_t*)(binary->config + i + 4)); switch (reg) { case R_00B028_SPI_SHADER_PGM_RSRC1_PS: case R_00B128_SPI_SHADER_PGM_RSRC1_VS: case R_00B228_SPI_SHADER_PGM_RSRC1_GS: case R_00B848_COMPUTE_PGM_RSRC1: - shader->num_sgprs = (G_00B028_SGPRS(value) + 1) * 8; - shader->num_vgprs = (G_00B028_VGPRS(value) + 1) * 4; + shader->num_sgprs = MAX2(shader->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8); + shader->num_vgprs = MAX2(shader->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4); break; case R_00B02C_SPI_SHADER_PGM_RSRC2_PS: - shader->lds_size = G_00B02C_EXTRA_LDS_SIZE(value); + shader->lds_size = MAX2(shader->lds_size, G_00B02C_EXTRA_LDS_SIZE(value)); break; case R_00B84C_COMPUTE_PGM_RSRC2: - shader->lds_size = G_00B84C_LDS_SIZE(value); + shader->lds_size = MAX2(shader->lds_size, G_00B84C_LDS_SIZE(value)); break; case R_0286CC_SPI_PS_INPUT_ENA: shader->spi_ps_input_ena = value; @@ -2324,6 +2320,24 @@ int si_compile_llvm(struct si_context *sctx, struct si_pipe_shader *shader, } } + + return 0; +} + +int si_compile_llvm(struct si_context *sctx, struct si_pipe_shader *shader, + LLVMModuleRef mod) +{ + int i; + struct radeon_shader_binary binary; + bool dump = r600_can_dump_shader(&sctx->screen->b, + shader->selector ? shader->selector->tokens : NULL); + uint32_t *ptr; + memset(&binary, 0, sizeof(binary)); + radeon_llvm_compile(mod, &binary, + r600_get_llvm_processor_name(sctx->screen->b.family), dump); + + si_create_shader(sctx, shader, &binary); + /* copy new shader */ r600_resource_reference(&shader->bo, NULL); shader->bo = si_resource_create_custom(sctx->b.b.screen, PIPE_USAGE_IMMUTABLE, diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index d667baf402d..fd2b57f9cff 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -31,6 +31,8 @@ #include <llvm-c/Core.h> /* LLVMModuleRef */ +struct radeon_shader_binary; + #define SI_SGPR_CONST 0 #define SI_SGPR_SAMPLER 2 #define SI_SGPR_RESOURCE 4 @@ -198,6 +200,8 @@ int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shade int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shader); int si_compile_llvm(struct si_context *sctx, struct si_pipe_shader *shader, LLVMModuleRef mod); +int si_create_shader(struct si_context *sctx, struct si_pipe_shader *shader, + const struct radeon_shader_binary *binary); void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader); #endif diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index 0608cf94827..671cc330210 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -41,6 +41,7 @@ #include <llvm/Support/SourceMgr.h> #include <llvm/IRReader/IRReader.h> #endif +#include <llvm/ADT/OwningPtr.h> #include <llvm/PassManager.h> #include <llvm/Support/CodeGen.h> #include <llvm/Support/TargetSelect.h> @@ -48,6 +49,8 @@ #if HAVE_LLVM < 0x0303 #include <llvm/Support/PathV1.h> #endif +#include <llvm/Support/FormattedStream.h> +#include <llvm/Support/TargetRegistry.h> #include <llvm/Transforms/IPO.h> #include <llvm/Transforms/IPO/PassManagerBuilder.h> @@ -58,6 +61,8 @@ #else #include <llvm/IR/DataLayout.h> #endif +#include <llvm/Target/TargetMachine.h> +#include <llvm/Target/TargetOptions.h> #include "pipe/p_state.h" #include "util/u_memory.h" @@ -67,6 +72,8 @@ #include <fstream> #include <cstdio> #include <sstream> +#include <libelf.h> +#include <gelf.h> using namespace clover; @@ -297,17 +304,109 @@ namespace { module build_module_llvm(llvm::Module *mod, const std::vector<llvm::Function *> &kernels, - clang::LangAS::Map& address_spaces) { + clang::LangAS::Map& address_spaces, + std::string triple, std::string processor) { + std::string log; + const llvm::Target *target = llvm::TargetRegistry::lookupTarget(triple, log); + if (!target) + throw build_error(log); + + llvm::TargetOptions options; + llvm::OwningPtr<llvm::TargetMachine> tm(target->createTargetMachine(triple, processor, "", options)); + if (!tm.get()) + throw build_error("Target not found: " + triple); + + llvm::PassManager pm; + pm.add(new llvm::DataLayout(mod)); + std::string object_file; + llvm::raw_string_ostream os(object_file); + llvm::formatted_raw_ostream fos(os); + tm->addPassesToEmitFile(pm, fos, llvm::TargetMachine::CGFT_ObjectFile); + pm.run(*mod); + os.flush(); + fos.flush(); + + // One of the libelf implementations + // (http://www.mr511.de/software/english.htm) requires calling + // elf_version() before elf_memory(). + // + elf_version(EV_CURRENT); + char *elf_buffer = (char*)MALLOC(object_file.size()); + memcpy(elf_buffer, &object_file[0], object_file.size()); + Elf *elf = elf_memory(elf_buffer, object_file.size()); + size_t section_str_index; + elf_getshdrstrndx(elf, §ion_str_index); + Elf_Scn *section = NULL; + Elf_Scn *symbol_table = NULL; + Elf_Scn *string_table = NULL; + + // Find the symbol table + while ((section = elf_nextscn(elf, section))) { + const char *name; + GElf_Shdr section_header; + if (gelf_getshdr(section, §ion_header) != §ion_header) { + throw build_error("Failed to read ELF section header\n"); + } + name = elf_strptr(elf, section_str_index, section_header.sh_name); + if (!strcmp(name, ".symtab")) { + symbol_table = section; + string_table = elf_getscn(elf, section_header.sh_link); + if (!string_table) + throw build_error(elf_errmsg(-1)); + assert(string_table); + break; + } + } + if (!symbol_table) { + throw build_error("Unable to find symbol table."); + } + + // Extract symbol information from the table + + Elf_Data *symbol_table_data; + Elf_Data *string_table_data; + GElf_Sym *symbol; + GElf_Sym s; + + symbol_table_data = elf_getdata(symbol_table, NULL); + string_table_data = elf_getdata(string_table, NULL); + if (!string_table_data) + throw build_error(elf_errmsg(-1)); + + assert(string_table_data); + char *names = (char*)MALLOC(string_table_data->d_size); + memcpy(names, string_table_data->d_buf, string_table_data->d_size); + int i = 0; + assert(symbol_table_data); + + std::map<std::string, unsigned> kernel_offsets; + + // Determine the offsets for each kernel + while ((symbol = gelf_getsym(symbol_table_data, i++, &s))) { + char *symbol_name_start = names + symbol->st_name; + for (std::vector<llvm::Function*>::const_iterator i = kernels.begin(), + e = kernels.end(); i != e; ++i) { + llvm::Function *f = *i; + if (f->getName() == std::string(symbol_name_start)) + kernel_offsets[f->getName()] = symbol->st_value; + } + } + FREE(names); + // Begin building the clover module module m; struct pipe_llvm_program_header header; - llvm::SmallVector<char, 1024> llvm_bitcode; - llvm::raw_svector_ostream bitcode_ostream(llvm_bitcode); - llvm::BitstreamWriter writer(llvm_bitcode); - llvm::WriteBitcodeToFile(mod, bitcode_ostream); - bitcode_ostream.flush(); + // Store the generated ELF binary in the module's text section. + header.num_bytes = object_file.size(); + std::string data; + data.insert(0, (char*)(&header), sizeof(header)); + data.insert(data.end(), object_file.begin(), + object_file.end()); + m.secs.push_back(module::section(0, module::section::text, + header.num_bytes, data)); + // Extract the argument information from the kernel for (unsigned i = 0; i < kernels.size(); ++i) { llvm::Function *kernel_func; std::string kernel_name; @@ -375,9 +474,10 @@ namespace { } } - m.syms.push_back(module::symbol(kernel_name, 0, i, args )); + m.syms.push_back(module::symbol(kernel_name, 0, kernel_offsets[kernel_name], args )); } +#if 0 header.num_bytes = llvm_bitcode.size(); std::string data; data.insert(0, (char*)(&header), sizeof(header)); @@ -385,6 +485,8 @@ namespace { llvm_bitcode.end()); m.secs.push_back(module::section(0, module::section::text, header.num_bytes, data)); +#endif + return m; } @@ -420,6 +522,6 @@ clover::compile_program_llvm(const compat::string &source, assert(0); return module(); default: - return build_module_llvm(mod, kernels, address_spaces); + return build_module_llvm(mod, kernels, address_spaces, triple, processor); } } |