summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2014-02-04 18:09:51 -0500
committerTom Stellard <thomas.stellard@amd.com>2014-02-05 14:59:57 -0500
commit1431bacf516bce797f4711f6dc36d550a8b81fb0 (patch)
treecf24765168ebd93c4c53aa3358bb6c8aa07d46b0
parente4213bf09b5eed9b44dacb254c195e06678af41a (diff)
clover: WIP: Add support for ELF loadingclover-elf-v2
We now pass ELF binaries to the driver rather than LLVM IR.
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c87
-rw-r--r--src/gallium/drivers/r600/evergreen_compute_internal.h2
-rw-r--r--src/gallium/drivers/r600/r600_llvm.c61
-rw-r--r--src/gallium/drivers/r600/r600_llvm.h5
-rw-r--r--src/gallium/drivers/radeon/Makefile.sources3
-rw-r--r--src/gallium/drivers/radeon/radeon_elf_util.c1
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm_util.c79
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm_util.h39
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c56
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c56
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h4
-rw-r--r--src/gallium/state_trackers/clover/llvm/invocation.cpp118
12 files changed, 236 insertions, 275 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 70efe5c5a87..1885d450370 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -47,9 +47,7 @@
#include "evergreen_compute_internal.h"
#include "compute_memory_pool.h"
#include "sb/sb_public.h"
-#ifdef HAVE_OPENCL
-#include "radeon_llvm_util.h"
-#endif
+#include "radeon_elf_util.h"
/**
RAT0 is for global binding write
@@ -198,36 +196,21 @@ void *evergreen_create_compute_state(
{
struct r600_context *ctx = (struct r600_context *)ctx_;
struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute);
-
-#ifdef HAVE_OPENCL
+ struct radeon_shader_binary binary;
const struct pipe_llvm_program_header * header;
- const unsigned char * code;
- unsigned i;
-
- shader->llvm_ctx = LLVMContextCreate();
-
- COMPUTE_DBG(ctx->screen, "*** evergreen_create_compute_state\n");
+ const char * code;
+ boolean use_kill;
header = cso->prog;
code = cso->prog + sizeof(struct pipe_llvm_program_header);
-#endif
+ radeon_elf_read(code, header->num_bytes, &binary, true);
+ r600_create_shader(&shader->active_kernel.bc, &binary, &use_kill);
shader->ctx = (struct r600_context*)ctx;
shader->local_size = cso->req_local_mem;
shader->private_size = cso->req_private_mem;
shader->input_size = cso->req_input_mem;
-#ifdef HAVE_OPENCL
- shader->num_kernels = radeon_llvm_get_num_kernels(shader->llvm_ctx, code,
- header->num_bytes);
- shader->kernels = CALLOC(sizeof(struct r600_kernel), shader->num_kernels);
-
- for (i = 0; i < shader->num_kernels; i++) {
- struct r600_kernel *kernel = &shader->kernels[i];
- kernel->llvm_module = radeon_llvm_get_kernel_module(shader->llvm_ctx, i,
- code, header->num_bytes);
- }
-#endif
return shader;
}
@@ -238,14 +221,6 @@ void evergreen_delete_compute_state(struct pipe_context *ctx, void* state)
if (!shader)
return;
- FREE(shader->kernels);
-
-#ifdef HAVE_OPENCL
- if (shader->llvm_ctx){
- LLVMContextDispose(shader->llvm_ctx);
- }
-#endif
-
FREE(shader);
}
@@ -347,7 +322,7 @@ static void evergreen_emit_direct_dispatch(
unsigned wave_divisor = (16 * num_pipes);
int group_size = 1;
int grid_size = 1;
- unsigned lds_size = shader->local_size / 4 + shader->active_kernel->bc.nlds_dw;
+ unsigned lds_size = shader->local_size / 4 + shader->active_kernel.bc.nlds_dw;
/* Calculate group_size/grid_size */
for (i = 0; i < 3; i++) {
@@ -519,9 +494,19 @@ void evergreen_emit_cs_shader(
struct r600_cs_shader_state *state =
(struct r600_cs_shader_state*)atom;
struct r600_pipe_compute *shader = state->shader;
- struct r600_kernel *kernel = &shader->kernels[state->kernel_index];
+ struct r600_kernel *kernel = &shader->active_kernel;
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
uint64_t va;
+ void *p;
+
+ if (shader->active_kernel.code_bo) {
+ pipe_resource_reference((struct pipe_resource**)&shader->active_kernel.code_bo, NULL);
+ }
+ shader->active_kernel.code_bo = r600_compute_buffer_alloc_vram(rctx->screen,
+ shader->active_kernel.bc.ndw * 4);
+ p = r600_buffer_map_sync_with_rings(&rctx->b, shader->active_kernel.code_bo, PIPE_TRANSFER_WRITE);
+ memcpy(p, shader->active_kernel.bc.bytecode + (state->kernel_index / 4), shader->active_kernel.bc.ndw * 4);
+ rctx->b.ws->buffer_unmap(shader->active_kernel.code_bo->cs_buf);
va = r600_resource_va(&rctx->screen->b.b, &kernel->code_bo->b.b);
@@ -544,44 +529,8 @@ static void evergreen_launch_grid(
{
struct r600_context *ctx = (struct r600_context *)ctx_;
- struct r600_pipe_compute *shader = ctx->cs_shader_state.shader;
- struct r600_kernel *kernel = &shader->kernels[pc];
-
COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc);
-#ifdef HAVE_OPENCL
-
- if (!kernel->code_bo) {
- void *p;
- struct r600_bytecode *bc = &kernel->bc;
- LLVMModuleRef mod = kernel->llvm_module;
- boolean use_kill = false;
- bool dump = (ctx->screen->b.debug_flags & DBG_CS) != 0;
- unsigned use_sb = ctx->screen->b.debug_flags & DBG_SB_CS;
- unsigned sb_disasm = use_sb ||
- (ctx->screen->b.debug_flags & DBG_SB_DISASM);
-
- r600_bytecode_init(bc, ctx->b.chip_class, ctx->b.family,
- ctx->screen->has_compressed_msaa_texturing);
- bc->type = TGSI_PROCESSOR_COMPUTE;
- bc->isa = ctx->isa;
- r600_llvm_compile(mod, ctx->b.family, bc, &use_kill, dump);
-
- if (dump && !sb_disasm) {
- r600_bytecode_disasm(bc);
- } else if ((dump && sb_disasm) || use_sb) {
- if (r600_sb_bytecode_process(ctx, bc, NULL, dump, use_sb))
- R600_ERR("r600_sb_bytecode_process failed!\n");
- }
-
- kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
- kernel->bc.ndw * 4);
- p = r600_buffer_map_sync_with_rings(&ctx->b, kernel->code_bo, PIPE_TRANSFER_WRITE);
- memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4);
- ctx->b.ws->buffer_unmap(kernel->code_bo->cs_buf);
- }
-#endif
- shader->active_kernel = kernel;
ctx->cs_shader_state.kernel_index = pc;
evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input);
compute_emit_cs(ctx, block_layout, grid_layout);
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.h b/src/gallium/drivers/r600/evergreen_compute_internal.h
index 0929d8dcf27..44c7fb3d0e3 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.h
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.h
@@ -42,7 +42,7 @@ struct r600_pipe_compute {
unsigned num_kernels;
struct r600_kernel *kernels;
- struct r600_kernel *active_kernel;
+ struct r600_kernel active_kernel;
unsigned local_size;
unsigned private_size;
unsigned input_size;
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index 4fcca697741..a6f16f99dc7 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -15,6 +15,7 @@
#include "r600_pipe.h"
#include "radeon_llvm.h"
#include "radeon_llvm_emit.h"
+#include "radeon_elf_util.h"
#include <stdio.h>
@@ -819,31 +820,22 @@ LLVMModuleRef r600_tgsi_llvm(
#define R_028868_SQ_PGM_RESOURCES_VS 0x028868
#define R_028850_SQ_PGM_RESOURCES_PS 0x028850
-unsigned r600_llvm_compile(
- LLVMModuleRef mod,
- enum radeon_family family,
- struct r600_bytecode *bc,
- boolean *use_kill,
- unsigned dump)
-{
- unsigned r;
- struct radeon_shader_binary binary;
- const char * gpu_family = r600_get_llvm_processor_name(family);
- unsigned i;
-
- memset(&binary, 0, sizeof(struct radeon_shader_binary));
- r = radeon_llvm_compile(mod, &binary, gpu_family, dump);
+unsigned r600_create_shader(struct r600_bytecode *bc,
+ const struct radeon_shader_binary *binary,
+ boolean *use_kill)
- assert(binary.code_size % 4 == 0);
- bc->bytecode = CALLOC(1, binary.code_size);
- memcpy(bc->bytecode, binary.code, binary.code_size);
- bc->ndw = binary.code_size / 4;
+{
+ int i;
+ assert(binary->code_size % 4 == 0);
+ bc->bytecode = CALLOC(1, binary->code_size);
+ memcpy(bc->bytecode, binary->code, binary->code_size);
+ bc->ndw = binary->code_size / 4;
- for (i = 0; i < binary.config_size; i+= 8) {
+ for (i = 0; i < binary->config_size; i+= 8) {
unsigned reg =
- util_le32_to_cpu(*(uint32_t*)(binary.config + i));
+ util_le32_to_cpu(*(uint32_t*)(binary->config + i));
unsigned value =
- util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4));
+ util_le32_to_cpu(*(uint32_t*)(binary->config + i + 4));
switch (reg) {
/* R600 / R700 */
case R_028850_SQ_PGM_RESOURCES_PS:
@@ -852,8 +844,8 @@ unsigned r600_llvm_compile(
case R_028844_SQ_PGM_RESOURCES_PS:
case R_028860_SQ_PGM_RESOURCES_VS:
case R_0288D4_SQ_PGM_RESOURCES_LS:
- bc->ngpr = G_028844_NUM_GPRS(value);
- bc->nstack = G_028844_STACK_SIZE(value);
+ bc->ngpr = MAX2(bc->ngpr, G_028844_NUM_GPRS(value));
+ bc->nstack = MAX2(bc->nstack, G_028844_STACK_SIZE(value));
break;
case R_02880C_DB_SHADER_CONTROL:
*use_kill = G_02880C_KILL_ENABLE(value);
@@ -864,10 +856,27 @@ unsigned r600_llvm_compile(
}
}
- FREE(binary.code);
- FREE(binary.config);
+ FREE(binary->code);
+ FREE(binary->config);
+ return 0;
+}
+
+unsigned r600_llvm_compile(
+ LLVMModuleRef mod,
+ enum radeon_family family,
+ struct r600_bytecode *bc,
+ boolean *use_kill,
+ unsigned dump)
+{
+ unsigned r;
+ struct radeon_shader_binary binary;
+ const char * gpu_family = r600_get_llvm_processor_name(family);
+ unsigned i;
+
+ memset(&binary, 0, sizeof(struct radeon_shader_binary));
+ r = radeon_llvm_compile(mod, &binary, gpu_family, dump);
- return r;
+ return r600_create_shader(bc, &binary, use_kill);
}
#endif
diff --git a/src/gallium/drivers/r600/r600_llvm.h b/src/gallium/drivers/r600/r600_llvm.h
index e036bfffac2..2419851c339 100644
--- a/src/gallium/drivers/r600/r600_llvm.h
+++ b/src/gallium/drivers/r600/r600_llvm.h
@@ -10,6 +10,7 @@
struct r600_bytecode;
struct r600_shader_ctx;
struct radeon_llvm_context;
+struct radeon_shader_binary;
enum radeon_family;
LLVMModuleRef r600_tgsi_llvm(
@@ -23,6 +24,10 @@ unsigned r600_llvm_compile(
boolean *use_kill,
unsigned dump);
+unsigned r600_create_shader(struct r600_bytecode *bc,
+ const struct radeon_shader_binary *binary,
+ boolean *use_kill);
+
#endif /* defined R600_USE_LLVM || defined HAVE_OPENCL */
#endif /* R600_LLVM_H */
diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources
index 93adb655b6a..7b3b2a1e5a7 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -9,5 +9,4 @@ C_SOURCES := \
LLVM_C_FILES := \
radeon_setup_tgsi_llvm.c \
- radeon_llvm_emit.c \
- radeon_llvm_util.c
+ radeon_llvm_emit.c
diff --git a/src/gallium/drivers/radeon/radeon_elf_util.c b/src/gallium/drivers/radeon/radeon_elf_util.c
index 7d929623937..f448ceeda37 100644
--- a/src/gallium/drivers/radeon/radeon_elf_util.c
+++ b/src/gallium/drivers/radeon/radeon_elf_util.c
@@ -77,7 +77,6 @@ void radeon_elf_read(const char *elf_data, unsigned elf_size,
} else if (debug && !strcmp(name, ".AMDGPU.disasm")) {
binary->disassembled = 1;
section_data = elf_getdata(section, section_data);
- fprintf(stderr, "\nShader Disassembly:\n\n");
fprintf(stderr, "%.*s\n", (int)section_data->d_size,
(char *)section_data->d_buf);
}
diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c b/src/gallium/drivers/radeon/radeon_llvm_util.c
deleted file mode 100644
index fe7f9a62ae2..00000000000
--- a/src/gallium/drivers/radeon/radeon_llvm_util.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright 2012, 2013 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors: Tom Stellard <thomas.stellard@amd.com>
- *
- */
-
-#include "radeon_llvm_util.h"
-#include "util/u_memory.h"
-
-#include <llvm-c/BitReader.h>
-#include <llvm-c/Core.h>
-#include <llvm-c/Target.h>
-#include <llvm-c/Transforms/IPO.h>
-#include <llvm-c/Transforms/PassManagerBuilder.h>
-
-LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
- const unsigned char * bitcode, unsigned bitcode_len)
-{
- LLVMMemoryBufferRef buf;
- LLVMModuleRef module;
-
- buf = LLVMCreateMemoryBufferWithMemoryRangeCopy((const char*)bitcode,
- bitcode_len, "radeon");
- LLVMParseBitcodeInContext(ctx, buf, &module, NULL);
- LLVMDisposeMemoryBuffer(buf);
- return module;
-}
-
-unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
- const unsigned char *bitcode, unsigned bitcode_len)
-{
- LLVMModuleRef mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len);
- return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
-}
-
-LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
- const unsigned char *bitcode, unsigned bitcode_len)
-{
- LLVMModuleRef mod;
- unsigned num_kernels;
- LLVMValueRef *kernel_metadata;
- unsigned i;
-
- mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len);
- num_kernels = LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
- kernel_metadata = MALLOC(num_kernels * sizeof(LLVMValueRef));
- LLVMGetNamedMetadataOperands(mod, "opencl.kernels", kernel_metadata);
- for (i = 0; i < num_kernels; i++) {
- LLVMValueRef kernel_signature, kernel_function;
- if (i == index) {
- continue;
- }
- kernel_signature = kernel_metadata[i];
- LLVMGetMDNodeOperands(kernel_signature, &kernel_function);
- LLVMDeleteFunction(kernel_function);
- }
- FREE(kernel_metadata);
- return mod;
-}
diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.h b/src/gallium/drivers/radeon/radeon_llvm_util.h
deleted file mode 100644
index 733c329e99e..00000000000
--- a/src/gallium/drivers/radeon/radeon_llvm_util.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2012, 2013 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors: Tom Stellard <thomas.stellard@amd.com>
- *
- */
-
-#ifndef RADEON_LLVM_UTIL_H
-#define RADEON_LLVM_UTIL_H
-
-#include <llvm-c/Core.h>
-
-LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
- const unsigned char * bitcode, unsigned bitcode_len);
-unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
- const unsigned char *bitcode, unsigned bitcode_len);
-LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
- const unsigned char *bitcode, unsigned bitcode_len);
-
-#endif
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index a7f49e790af..c04f5497b9d 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -23,14 +23,14 @@
*/
#include "util/u_memory.h"
+#include "../radeon/r600_pipe_common.h"
+#include "../radeon/radeon_elf_util.h"
#include "../radeon/r600_cs.h"
#include "si_pipe.h"
#include "si_shader.h"
#include "sid.h"
-#include "radeon_llvm_util.h"
-
#define MAX_GLOBAL_BUFFERS 20
struct si_pipe_compute {
@@ -39,13 +39,11 @@ struct si_pipe_compute {
unsigned local_size;
unsigned private_size;
unsigned input_size;
- unsigned num_kernels;
- struct si_pipe_shader *kernels;
+ struct radeon_shader_binary binary;
+ struct si_pipe_shader program;
unsigned num_user_sgprs;
struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS];
-
- LLVMContextRef llvm_ctx;
};
static void *si_create_compute_state(
@@ -56,10 +54,7 @@ static void *si_create_compute_state(
struct si_pipe_compute *program =
CALLOC_STRUCT(si_pipe_compute);
const struct pipe_llvm_program_header *header;
- const unsigned char *code;
- unsigned i;
-
- program->llvm_ctx = LLVMContextCreate();
+ const char *code;
header = cso->prog;
code = cso->prog + sizeof(struct pipe_llvm_program_header);
@@ -69,16 +64,9 @@ static void *si_create_compute_state(
program->private_size = cso->req_private_mem;
program->input_size = cso->req_input_mem;
- program->num_kernels = radeon_llvm_get_num_kernels(program->llvm_ctx, code,
- header->num_bytes);
- program->kernels = CALLOC(sizeof(struct si_pipe_shader),
- program->num_kernels);
- for (i = 0; i < program->num_kernels; i++) {
- LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i,
- code, header->num_bytes);
- si_compile_llvm(sctx, &program->kernels[i], mod);
- LLVMDisposeModule(mod);
- }
+ memset(&program->binary, 0, sizeof(program->binary));
+ radeon_elf_read(code, header->num_bytes, &program->binary, true);
+ si_create_shader(sctx, &program->program, &program->binary);
return program;
}
@@ -130,8 +118,9 @@ static void si_launch_grid(
uint64_t shader_va;
unsigned arg_user_sgpr_count = 2;
unsigned i;
- struct si_pipe_shader *shader = &program->kernels[pc];
+ struct si_pipe_shader *shader = &program->program;
unsigned lds_blocks;
+ uint32_t *ptr;
pm4->compute_pkt = true;
si_cmd_context_control(pm4);
@@ -204,6 +193,23 @@ static void si_launch_grid(
0x190 /* Default value */);
}
+ /* copy new shader */
+ r600_resource_reference(&shader->bo, NULL);
+ shader->bo = si_resource_create_custom(sctx->b.b.screen, PIPE_USAGE_IMMUTABLE,
+ program->binary.code_size);
+
+ ptr = (uint32_t*)sctx->b.ws->buffer_map(shader->bo->cs_buf, sctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
+ if (0 /*SI_BIG_ENDIAN*/) {
+ for (i = 0; i < program->binary.code_size / 4; ++i) {
+ ptr[i] = util_bswap32(*(uint32_t*)(program->binary.code + pc + i*4));
+ }
+ } else {
+ memcpy(ptr, program->binary.code + pc, program->binary.code_size);
+ }
+ sctx->b.ws->buffer_unmap(shader->bo->cs_buf);
+
+ fprintf(stderr, "pc = %u\n", pc);
+
shader_va = r600_resource_va(ctx->screen, (void *)shader->bo);
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ);
si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff);
@@ -296,14 +302,6 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){
return;
}
- if (program->kernels) {
- FREE(program->kernels);
- }
-
- if (program->llvm_ctx){
- LLVMContextDispose(program->llvm_ctx);
- }
-
//And then free the program itself.
FREE(program);
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index ae40f4e7413..6abd42b64d1 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2274,45 +2274,41 @@ static void preload_streamout_buffers(struct si_shader_context *si_shader_ctx)
}
}
-int si_compile_llvm(struct si_context *sctx, struct si_pipe_shader *shader,
- LLVMModuleRef mod)
+int si_create_shader(struct si_context *sctx, struct si_pipe_shader *shader,
+ const struct radeon_shader_binary *binary)
{
+
unsigned i;
- uint32_t *ptr;
- struct radeon_shader_binary binary;
- bool dump = r600_can_dump_shader(&sctx->screen->b,
- shader->selector ? shader->selector->tokens : NULL);
- memset(&binary, 0, sizeof(binary));
- radeon_llvm_compile(mod, &binary,
- r600_get_llvm_processor_name(sctx->screen->b.family), dump);
- if (dump && ! binary.disassembled) {
+ bool dump = false;
+
+ if (dump && ! binary->disassembled) {
fprintf(stderr, "SI CODE:\n");
- for (i = 0; i < binary.code_size; i+=4 ) {
- fprintf(stderr, "%02x%02x%02x%02x\n", binary.code[i + 3],
- binary.code[i + 2], binary.code[i + 1],
- binary.code[i]);
+ for (i = 0; i < binary->code_size; i+=4 ) {
+ fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3],
+ binary->code[i + 2], binary->code[i + 1],
+ binary->code[i]);
}
}
/* XXX: We may be able to emit some of these values directly rather than
* extracting fields to be emitted later.
*/
- for (i = 0; i < binary.config_size; i+= 8) {
- unsigned reg = util_le32_to_cpu(*(uint32_t*)(binary.config + i));
- unsigned value = util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4));
+ for (i = 0; i < binary->config_size; i+= 8) {
+ unsigned reg = util_le32_to_cpu(*(uint32_t*)(binary->config + i));
+ unsigned value = util_le32_to_cpu(*(uint32_t*)(binary->config + i + 4));
switch (reg) {
case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
case R_00B848_COMPUTE_PGM_RSRC1:
- shader->num_sgprs = (G_00B028_SGPRS(value) + 1) * 8;
- shader->num_vgprs = (G_00B028_VGPRS(value) + 1) * 4;
+ shader->num_sgprs = MAX2(shader->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
+ shader->num_vgprs = MAX2(shader->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
break;
case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
- shader->lds_size = G_00B02C_EXTRA_LDS_SIZE(value);
+ shader->lds_size = MAX2(shader->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
break;
case R_00B84C_COMPUTE_PGM_RSRC2:
- shader->lds_size = G_00B84C_LDS_SIZE(value);
+ shader->lds_size = MAX2(shader->lds_size, G_00B84C_LDS_SIZE(value));
break;
case R_0286CC_SPI_PS_INPUT_ENA:
shader->spi_ps_input_ena = value;
@@ -2324,6 +2320,24 @@ int si_compile_llvm(struct si_context *sctx, struct si_pipe_shader *shader,
}
}
+
+ return 0;
+}
+
+int si_compile_llvm(struct si_context *sctx, struct si_pipe_shader *shader,
+ LLVMModuleRef mod)
+{
+ int i;
+ struct radeon_shader_binary binary;
+ bool dump = r600_can_dump_shader(&sctx->screen->b,
+ shader->selector ? shader->selector->tokens : NULL);
+ uint32_t *ptr;
+ memset(&binary, 0, sizeof(binary));
+ radeon_llvm_compile(mod, &binary,
+ r600_get_llvm_processor_name(sctx->screen->b.family), dump);
+
+ si_create_shader(sctx, shader, &binary);
+
/* copy new shader */
r600_resource_reference(&shader->bo, NULL);
shader->bo = si_resource_create_custom(sctx->b.b.screen, PIPE_USAGE_IMMUTABLE,
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index d667baf402d..fd2b57f9cff 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -31,6 +31,8 @@
#include <llvm-c/Core.h> /* LLVMModuleRef */
+struct radeon_shader_binary;
+
#define SI_SGPR_CONST 0
#define SI_SGPR_SAMPLER 2
#define SI_SGPR_RESOURCE 4
@@ -198,6 +200,8 @@ int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shade
int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shader);
int si_compile_llvm(struct si_context *sctx, struct si_pipe_shader *shader,
LLVMModuleRef mod);
+int si_create_shader(struct si_context *sctx, struct si_pipe_shader *shader,
+ const struct radeon_shader_binary *binary);
void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader);
#endif
diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index 0608cf94827..671cc330210 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -41,6 +41,7 @@
#include <llvm/Support/SourceMgr.h>
#include <llvm/IRReader/IRReader.h>
#endif
+#include <llvm/ADT/OwningPtr.h>
#include <llvm/PassManager.h>
#include <llvm/Support/CodeGen.h>
#include <llvm/Support/TargetSelect.h>
@@ -48,6 +49,8 @@
#if HAVE_LLVM < 0x0303
#include <llvm/Support/PathV1.h>
#endif
+#include <llvm/Support/FormattedStream.h>
+#include <llvm/Support/TargetRegistry.h>
#include <llvm/Transforms/IPO.h>
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
@@ -58,6 +61,8 @@
#else
#include <llvm/IR/DataLayout.h>
#endif
+#include <llvm/Target/TargetMachine.h>
+#include <llvm/Target/TargetOptions.h>
#include "pipe/p_state.h"
#include "util/u_memory.h"
@@ -67,6 +72,8 @@
#include <fstream>
#include <cstdio>
#include <sstream>
+#include <libelf.h>
+#include <gelf.h>
using namespace clover;
@@ -297,17 +304,109 @@ namespace {
module
build_module_llvm(llvm::Module *mod,
const std::vector<llvm::Function *> &kernels,
- clang::LangAS::Map& address_spaces) {
+ clang::LangAS::Map& address_spaces,
+ std::string triple, std::string processor) {
+ std::string log;
+ const llvm::Target *target = llvm::TargetRegistry::lookupTarget(triple, log);
+ if (!target)
+ throw build_error(log);
+
+ llvm::TargetOptions options;
+ llvm::OwningPtr<llvm::TargetMachine> tm(target->createTargetMachine(triple, processor, "", options));
+ if (!tm.get())
+ throw build_error("Target not found: " + triple);
+
+ llvm::PassManager pm;
+ pm.add(new llvm::DataLayout(mod));
+ std::string object_file;
+ llvm::raw_string_ostream os(object_file);
+ llvm::formatted_raw_ostream fos(os);
+ tm->addPassesToEmitFile(pm, fos, llvm::TargetMachine::CGFT_ObjectFile);
+ pm.run(*mod);
+ os.flush();
+ fos.flush();
+
+ // One of the libelf implementations
+ // (http://www.mr511.de/software/english.htm) requires calling
+ // elf_version() before elf_memory().
+ //
+ elf_version(EV_CURRENT);
+ char *elf_buffer = (char*)MALLOC(object_file.size());
+ memcpy(elf_buffer, &object_file[0], object_file.size());
+ Elf *elf = elf_memory(elf_buffer, object_file.size());
+ size_t section_str_index;
+ elf_getshdrstrndx(elf, &section_str_index);
+ Elf_Scn *section = NULL;
+ Elf_Scn *symbol_table = NULL;
+ Elf_Scn *string_table = NULL;
+
+ // Find the symbol table
+ while ((section = elf_nextscn(elf, section))) {
+ const char *name;
+ GElf_Shdr section_header;
+ if (gelf_getshdr(section, &section_header) != &section_header) {
+ throw build_error("Failed to read ELF section header\n");
+ }
+ name = elf_strptr(elf, section_str_index, section_header.sh_name);
+ if (!strcmp(name, ".symtab")) {
+ symbol_table = section;
+ string_table = elf_getscn(elf, section_header.sh_link);
+ if (!string_table)
+ throw build_error(elf_errmsg(-1));
+ assert(string_table);
+ break;
+ }
+ }
+ if (!symbol_table) {
+ throw build_error("Unable to find symbol table.");
+ }
+
+ // Extract symbol information from the table
+
+ Elf_Data *symbol_table_data;
+ Elf_Data *string_table_data;
+ GElf_Sym *symbol;
+ GElf_Sym s;
+
+ symbol_table_data = elf_getdata(symbol_table, NULL);
+ string_table_data = elf_getdata(string_table, NULL);
+ if (!string_table_data)
+ throw build_error(elf_errmsg(-1));
+
+ assert(string_table_data);
+ char *names = (char*)MALLOC(string_table_data->d_size);
+ memcpy(names, string_table_data->d_buf, string_table_data->d_size);
+ int i = 0;
+ assert(symbol_table_data);
+
+ std::map<std::string, unsigned> kernel_offsets;
+
+ // Determine the offsets for each kernel
+ while ((symbol = gelf_getsym(symbol_table_data, i++, &s))) {
+ char *symbol_name_start = names + symbol->st_name;
+ for (std::vector<llvm::Function*>::const_iterator i = kernels.begin(),
+ e = kernels.end(); i != e; ++i) {
+ llvm::Function *f = *i;
+ if (f->getName() == std::string(symbol_name_start))
+ kernel_offsets[f->getName()] = symbol->st_value;
+ }
+ }
+ FREE(names);
+ // Begin building the clover module
module m;
struct pipe_llvm_program_header header;
- llvm::SmallVector<char, 1024> llvm_bitcode;
- llvm::raw_svector_ostream bitcode_ostream(llvm_bitcode);
- llvm::BitstreamWriter writer(llvm_bitcode);
- llvm::WriteBitcodeToFile(mod, bitcode_ostream);
- bitcode_ostream.flush();
+ // Store the generated ELF binary in the module's text section.
+ header.num_bytes = object_file.size();
+ std::string data;
+ data.insert(0, (char*)(&header), sizeof(header));
+ data.insert(data.end(), object_file.begin(),
+ object_file.end());
+ m.secs.push_back(module::section(0, module::section::text,
+ header.num_bytes, data));
+ // Extract the argument information from the kernel
for (unsigned i = 0; i < kernels.size(); ++i) {
llvm::Function *kernel_func;
std::string kernel_name;
@@ -375,9 +474,10 @@ namespace {
}
}
- m.syms.push_back(module::symbol(kernel_name, 0, i, args ));
+ m.syms.push_back(module::symbol(kernel_name, 0, kernel_offsets[kernel_name], args ));
}
+#if 0
header.num_bytes = llvm_bitcode.size();
std::string data;
data.insert(0, (char*)(&header), sizeof(header));
@@ -385,6 +485,8 @@ namespace {
llvm_bitcode.end());
m.secs.push_back(module::section(0, module::section::text,
header.num_bytes, data));
+#endif
+
return m;
}
@@ -420,6 +522,6 @@ clover::compile_program_llvm(const compat::string &source,
assert(0);
return module();
default:
- return build_module_llvm(mod, kernels, address_spaces);
+ return build_module_llvm(mod, kernels, address_spaces, triple, processor);
}
}