summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRuiling Song <ruiling.song@intel.com>2016-01-19 11:29:06 +0800
committerYang Rong <rong.r.yang@intel.com>2016-11-08 20:38:21 +0800
commit295b1cb6d1ac616dd4d9b4b2ef14934b86e3ffbf (patch)
tree362d23da596fa3f4b222fdadead7431136b4516a
parent07f879d60a62eb56f99d6e20f4bf363caf64a1ae (diff)
GBE: Implement new constant solution for ocl2
Different from ocl 1.2, which gather all constant into one surface. ocl2 only gather program global/constant into one surface. But keep other constant passed through kernel argument in separate buffer. Signed-off-by: Ruiling Song <ruiling.song@intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--backend/src/CMakeLists.txt2
-rw-r--r--backend/src/backend/gen_insn_selection.cpp15
-rw-r--r--backend/src/backend/program.cpp51
-rw-r--r--backend/src/backend/program.h6
-rw-r--r--backend/src/backend/program.hpp4
-rw-r--r--backend/src/gbe_bin_interpreter.cpp3
-rw-r--r--backend/src/ir/profile.cpp4
-rw-r--r--backend/src/ir/profile.hpp5
-rw-r--r--backend/src/ir/reloc.cpp87
-rw-r--r--backend/src/ir/reloc.hpp90
-rw-r--r--backend/src/ir/unit.hpp8
-rw-r--r--backend/src/llvm/llvm_gen_backend.cpp87
-rw-r--r--kernels/compiler_program_global.cl68
-rw-r--r--src/cl_command_queue.c3
-rw-r--r--src/cl_command_queue_gen7.c20
-rw-r--r--src/cl_gbe_loader.cpp10
-rw-r--r--src/cl_gbe_loader.h2
-rw-r--r--src/cl_program.c51
-rw-r--r--src/cl_program.h2
-rw-r--r--utests/CMakeLists.txt1
-rw-r--r--utests/compiler_program_global.cpp80
21 files changed, 550 insertions, 49 deletions
diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index 41eb5ec6..6ff25e7c 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -73,6 +73,8 @@ set (GBE_SRC
ir/immediate.cpp
ir/structurizer.hpp
ir/structurizer.cpp
+ ir/reloc.hpp
+ ir/reloc.cpp
backend/context.cpp
backend/context.hpp
backend/program.cpp
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 5ed6b041..e7e3e413 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -4014,6 +4014,13 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
LoadInstructionPattern(void) : SelectionPattern(1, 1) {
this->opcodes.push_back(ir::OP_LOAD);
}
+ bool isReadConstantLegacy(const ir::LoadInstruction &load) const {
+ ir::AddressMode AM = load.getAddressMode();
+ ir::AddressSpace AS = load.getAddressSpace();
+ if (AM != ir::AM_Stateless && AS == ir::MEM_CONSTANT)
+ return true;
+ return false;
+ }
void untypedReadStateless(Selection::Opaque &sel,
GenRegister addr,
vector<GenRegister> &dst
@@ -4096,7 +4103,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
unsigned SI = insn.getSurfaceIndex();
sel.UNTYPED_READ(addr, dst.data(), valueNum, GenRegister::immud(SI), btiTemp);
}
- } else if (addrSpace == ir::MEM_LOCAL || addrSpace == ir::MEM_CONSTANT ) {
+ } else if (addrSpace == ir::MEM_LOCAL || isReadConstantLegacy(insn) ) {
// stateless mode, local/constant still use bti access
unsigned bti = addrSpace == ir::MEM_CONSTANT ? BTI_CONSTANT : 0xfe;
GenRegister addrDW = addr;
@@ -4260,7 +4267,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
b = GenRegister::immud(insn.getSurfaceIndex());
}
read64Legacy(sel, addr, dst, b, btiTemp);
- } else if (addrSpace == MEM_LOCAL || addrSpace == MEM_CONSTANT) {
+ } else if (addrSpace == MEM_LOCAL || isReadConstantLegacy(insn)) {
GenRegister b = GenRegister::immud(addrSpace == MEM_LOCAL? 0xfe : BTI_CONSTANT);
GenRegister addrDW = addr;
if (addrBytes == 8)
@@ -4481,7 +4488,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
unsigned SI = insn.getSurfaceIndex();
sel.BYTE_GATHER(dst, addr, elemSize, GenRegister::immud(SI), btiTemp);
}
- } else if (addrSpace == ir::MEM_LOCAL || addrSpace == ir::MEM_CONSTANT) {
+ } else if (addrSpace == ir::MEM_LOCAL || isReadConstantLegacy(insn)) {
unsigned bti = addrSpace == ir::MEM_CONSTANT ? BTI_CONSTANT : 0xfe;
GenRegister addrDW = addr;
if (addrBytes == 8) {
@@ -4701,7 +4708,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
if (insn.isBlock())
this->emitOWordRead(sel, insn, address, addrSpace);
- else if (addrSpace == MEM_CONSTANT) {
+ else if (isReadConstantLegacy(insn)) {
// XXX TODO read 64bit constant through constant cache
// Per HW Spec, constant cache messages can read at least DWORD data.
// So, byte/short data type, we have to read through data cache.
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 6c66c06c..38a990b7 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -106,11 +106,14 @@ namespace gbe {
return it->offset; // we found it!
}
- Program::Program(uint32_t fast_relaxed_math) : fast_relaxed_math(fast_relaxed_math), constantSet(NULL) {}
+ Program::Program(uint32_t fast_relaxed_math) : fast_relaxed_math(fast_relaxed_math),
+ constantSet(NULL),
+ relocTable(NULL) {}
Program::~Program(void) {
for (map<std::string, Kernel*>::iterator it = kernels.begin(); it != kernels.end(); ++it)
GBE_DELETE(it->second);
if (constantSet) delete constantSet;
+ if (relocTable) delete relocTable;
}
#ifdef GBE_COMPILER_AVAILABLE
@@ -174,6 +177,7 @@ namespace gbe {
bool Program::buildFromUnit(const ir::Unit &unit, std::string &error) {
constantSet = new ir::ConstantSet(unit.getConstantSet());
+ relocTable = new ir::RelocTable(unit.getRelocTable());
const auto &set = unit.getFunctionSet();
const uint32_t kernelNum = set.size();
if (OCL_OUTPUT_GEN_IR) std::cout << unit;
@@ -212,6 +216,7 @@ namespace gbe {
uint32_t ret_size = 0;
uint32_t ker_num = kernels.size();
uint32_t has_constset = 0;
+ uint32_t has_relocTable = 0;
OUT_UPDATE_SZ(magic_begin);
@@ -227,6 +232,18 @@ namespace gbe {
OUT_UPDATE_SZ(has_constset);
}
+ if(relocTable) {
+ has_relocTable = 1;
+ OUT_UPDATE_SZ(has_relocTable);
+ uint32_t sz = relocTable->serializeToBin(outs);
+ if (!sz)
+ return 0;
+
+ ret_size += sz;
+ } else {
+ OUT_UPDATE_SZ(has_relocTable);
+ }
+
OUT_UPDATE_SZ(ker_num);
for (map<std::string, Kernel*>::iterator it = kernels.begin(); it != kernels.end(); ++it) {
uint32_t sz = it->second->serializeToBin(outs);
@@ -247,6 +264,7 @@ namespace gbe {
int has_constset = 0;
uint32_t ker_num;
uint32_t magic;
+ uint32_t has_relocTable = 0;
IN_UPDATE_SZ(magic);
if (magic != magic_begin)
@@ -263,6 +281,17 @@ namespace gbe {
total_size += sz;
}
+ IN_UPDATE_SZ(has_relocTable);
+ if(has_relocTable) {
+ relocTable = new ir::RelocTable;
+ uint32_t sz = relocTable->deserializeFromBin(ins);
+
+ if (sz == 0)
+ return 0;
+
+ total_size += sz;
+ }
+
IN_UPDATE_SZ(ker_num);
for (uint32_t i = 0; i < ker_num; i++) {
@@ -303,6 +332,8 @@ namespace gbe {
outs.write(name.c_str(), name.size());
ret_size += sizeof(char)*name.size();
+ OUT_UPDATE_SZ(oclVersion);
+
OUT_UPDATE_SZ(argNum);
for (i = 0; i < argNum; i++) {
KernelArgument& arg = args[i];
@@ -415,6 +446,8 @@ namespace gbe {
name = c_name;
delete[] c_name;
+ IN_UPDATE_SZ(oclVersion);
+
IN_UPDATE_SZ(argNum);
args = GBE_NEW_ARRAY_NO_ARG(KernelArgument, argNum);
for (uint32_t i = 0; i < argNum; i++) {
@@ -1164,6 +1197,18 @@ EXTEND_QUOTE:
program->getGlobalConstantData(mem);
}
+ static size_t programGetGlobalRelocCount(gbe_program gbeProgram) {
+ if (gbeProgram == NULL) return 0;
+ const gbe::Program *program = (const gbe::Program*) gbeProgram;
+ return program->getGlobalRelocCount();
+ }
+
+ static void programGetGlobalRelocTable(gbe_program gbeProgram, char *mem) {
+ if (gbeProgram == NULL) return;
+ const gbe::Program *program = (const gbe::Program*) gbeProgram;
+ program->getGlobalRelocTable(mem);
+ }
+
static uint32_t programGetKernelNum(gbe_program gbeProgram) {
if (gbeProgram == NULL) return 0;
const gbe::Program *program = (const gbe::Program*) gbeProgram;
@@ -1411,6 +1456,8 @@ GBE_EXPORT_SYMBOL gbe_program_link_from_llvm_cb *gbe_program_link_from_llvm = NU
GBE_EXPORT_SYMBOL gbe_program_build_from_llvm_cb *gbe_program_build_from_llvm = NULL;
GBE_EXPORT_SYMBOL gbe_program_get_global_constant_size_cb *gbe_program_get_global_constant_size = NULL;
GBE_EXPORT_SYMBOL gbe_program_get_global_constant_data_cb *gbe_program_get_global_constant_data = NULL;
+GBE_EXPORT_SYMBOL gbe_program_get_global_reloc_count_cb *gbe_program_get_global_reloc_count = NULL;
+GBE_EXPORT_SYMBOL gbe_program_get_global_reloc_table_cb *gbe_program_get_global_reloc_table = NULL;
GBE_EXPORT_SYMBOL gbe_program_clean_llvm_resource_cb *gbe_program_clean_llvm_resource = NULL;
GBE_EXPORT_SYMBOL gbe_program_delete_cb *gbe_program_delete = NULL;
GBE_EXPORT_SYMBOL gbe_program_get_kernel_num_cb *gbe_program_get_kernel_num = NULL;
@@ -1462,6 +1509,8 @@ namespace gbe
gbe_program_check_opt = gbe::programCheckOption;
gbe_program_get_global_constant_size = gbe::programGetGlobalConstantSize;
gbe_program_get_global_constant_data = gbe::programGetGlobalConstantData;
+ gbe_program_get_global_reloc_count = gbe::programGetGlobalRelocCount;
+ gbe_program_get_global_reloc_table = gbe::programGetGlobalRelocTable;
gbe_program_clean_llvm_resource = gbe::programCleanLlvmResource;
gbe_program_delete = gbe::programDelete;
gbe_program_get_kernel_num = gbe::programGetKernelNum;
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index 82f04bae..dadc1716 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -109,6 +109,7 @@ enum gbe_curbe_type {
GBE_CURBE_PROFILING_TIMESTAMP3,
GBE_CURBE_PROFILING_TIMESTAMP4,
GBE_CURBE_THREAD_ID,
+ GBE_CURBE_CONSTANT_ADDRSPACE,
GBE_GEN_REG,
};
@@ -257,6 +258,11 @@ extern gbe_program_get_global_constant_size_cb *gbe_program_get_global_constant_
typedef void (gbe_program_get_global_constant_data_cb)(gbe_program gbeProgram, char *mem);
extern gbe_program_get_global_constant_data_cb *gbe_program_get_global_constant_data;
+typedef size_t (gbe_program_get_global_reloc_count_cb)(gbe_program gbeProgram);
+extern gbe_program_get_global_reloc_count_cb *gbe_program_get_global_reloc_count;
+
+typedef void (gbe_program_get_global_reloc_table_cb)(gbe_program gbeProgram, char *mem);
+extern gbe_program_get_global_reloc_table_cb *gbe_program_get_global_reloc_table;
/*! Get the size of defined samplers */
typedef size_t (gbe_kernel_get_sampler_size_cb)(gbe_kernel gbeKernel);
extern gbe_kernel_get_sampler_size_cb *gbe_kernel_get_sampler_size;
diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp
index 184e4fd1..93e15c38 100644
--- a/backend/src/backend/program.hpp
+++ b/backend/src/backend/program.hpp
@@ -300,6 +300,8 @@ namespace gbe {
/*! Get the content of global constant arrays */
void getGlobalConstantData(char *mem) const { constantSet->getData(mem); }
+ uint32_t getGlobalRelocCount(void) const { return relocTable->getCount(); }
+ void getGlobalRelocTable(char *p) const { relocTable->getData(p); }
static const uint32_t magic_begin = TO_MAGIC('P', 'R', 'O', 'G');
static const uint32_t magic_end = TO_MAGIC('G', 'O', 'R', 'P');
@@ -331,6 +333,8 @@ namespace gbe {
map<std::string, Kernel*> kernels;
/*! Global (constants) outside any kernel */
ir::ConstantSet *constantSet;
+ /*! relocation table */
+ ir::RelocTable *relocTable;
/*! Use custom allocators */
GBE_CLASS(Program);
};
diff --git a/backend/src/gbe_bin_interpreter.cpp b/backend/src/gbe_bin_interpreter.cpp
index 6c7380d5..dd3ce016 100644
--- a/backend/src/gbe_bin_interpreter.cpp
+++ b/backend/src/gbe_bin_interpreter.cpp
@@ -23,6 +23,7 @@
#include "ir/constant.cpp"
#include "ir/printf.cpp"
#include "ir/profiling.cpp"
+#include "ir/reloc.cpp"
#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wunused-variable"
@@ -62,6 +63,8 @@ struct BinInterpCallBackInitializer
gbe_program_get_global_constant_size = gbe::programGetGlobalConstantSize;
gbe_program_delete = gbe::programDelete;
gbe_program_get_global_constant_data = gbe::programGetGlobalConstantData;
+ gbe_program_get_global_reloc_count = gbe::programGetGlobalRelocCount;
+ gbe_program_get_global_reloc_table = gbe::programGetGlobalRelocTable;
gbe_kernel_get_sampler_data = gbe::kernelGetSamplerData;
gbe_kernel_get_image_data = gbe::kernelGetImageData;
gbe_kernel_get_ocl_version = gbe::kernelGetOclVersion;
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index e20935de..b7898037 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -47,7 +47,8 @@ namespace ir {
"profiling_timestamps0", "profiling_timestamps1",
"profiling_timestamps2", "profiling_timestamps3",
"profiling_timestamps4",
- "threadid"
+ "threadid",
+ "constant_addrspace_start"
};
#if GBE_DEBUG
@@ -99,6 +100,7 @@ namespace ir {
DECL_NEW_REG(FAMILY_DWORD, profilingts3, 0, GBE_CURBE_PROFILING_TIMESTAMP3);
DECL_NEW_REG(FAMILY_DWORD, profilingts4, 0, GBE_CURBE_PROFILING_TIMESTAMP4);
DECL_NEW_REG(FAMILY_DWORD, threadid, 1, GBE_CURBE_THREAD_ID);
+ DECL_NEW_REG(FAMILY_QWORD, constant_addrspace, 1, GBE_CURBE_CONSTANT_ADDRSPACE);
}
#undef DECL_NEW_REG
diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp
index eab78921..ba3d4fce 100644
--- a/backend/src/ir/profile.hpp
+++ b/backend/src/ir/profile.hpp
@@ -68,7 +68,7 @@ namespace ir {
static const Register zero = Register(24); // scalar register holds zero.
static const Register one = Register(25); // scalar register holds one.
static const Register retVal = Register(26); // helper register to do data flow analysis.
- static const Register dwblockip = Register(27); // blockip
+ static const Register dwblockip = Register(27); // blockip
static const Register profilingbptr = Register(28); // buffer addr for profiling.
static const Register profilingts0 = Register(29); // timestamp for profiling.
static const Register profilingts1 = Register(30); // timestamp for profiling.
@@ -76,7 +76,8 @@ namespace ir {
static const Register profilingts3 = Register(32); // timestamp for profiling.
static const Register profilingts4 = Register(33); // timestamp for profiling.
static const Register threadid = Register(34); // the thread id of this thread.
- static const uint32_t regNum = 35; // number of special registers
+ static const Register constant_addrspace = Register(35); // starting address of program-scope constant
+ static const uint32_t regNum = 36; // number of special registers
extern const char *specialRegMean[]; // special register name.
} /* namespace ocl */
diff --git a/backend/src/ir/reloc.cpp b/backend/src/ir/reloc.cpp
new file mode 100644
index 00000000..48846108
--- /dev/null
+++ b/backend/src/ir/reloc.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+/**
+ * \file constant.hpp
+ *
+ * \author Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+#include "reloc.hpp"
+
+namespace gbe {
+namespace ir {
+
+#define OUT_UPDATE_SZ(elt) SERIALIZE_OUT(elt, outs, ret_size)
+#define IN_UPDATE_SZ(elt) DESERIALIZE_IN(elt, ins, total_size)
+
+ /*! Implements the serialization. */
+ uint32_t RelocTable::serializeToBin(std::ostream& outs) {
+ uint32_t ret_size = 0;
+ uint32_t sz = 0;
+
+ OUT_UPDATE_SZ(magic_begin);
+
+ sz = getCount();
+ OUT_UPDATE_SZ(sz);
+ RelocEntry entry(0, 0);
+ for (uint32_t i = 0; i < sz; ++i) {
+ entry = entries[i];
+ OUT_UPDATE_SZ(entry.refOffset);
+ OUT_UPDATE_SZ(entry.defOffset);
+ }
+
+ OUT_UPDATE_SZ(magic_end);
+ OUT_UPDATE_SZ(ret_size);
+
+ return ret_size;
+ }
+
+ uint32_t RelocTable::deserializeFromBin(std::istream& ins) {
+ uint32_t total_size = 0;
+ uint32_t magic;
+ uint32_t refOffset;
+ uint32_t defOffset;
+ uint32_t sz = 0;
+
+ IN_UPDATE_SZ(magic);
+ if (magic != magic_begin)
+ return 0;
+
+ IN_UPDATE_SZ(sz); //regMap
+ for (uint32_t i = 0; i < sz; i++) {
+ IN_UPDATE_SZ(refOffset);
+ IN_UPDATE_SZ(defOffset);
+ addEntry(refOffset, defOffset);
+ }
+
+ IN_UPDATE_SZ(magic);
+ if (magic != magic_end)
+ return 0;
+
+ uint32_t total_bytes;
+ IN_UPDATE_SZ(total_bytes);
+ if (total_bytes + sizeof(total_size) != total_size)
+ return 0;
+
+ return total_size;
+ }
+
+} /* namespace ir */
+} /* namespace gbe */
+
diff --git a/backend/src/ir/reloc.hpp b/backend/src/ir/reloc.hpp
new file mode 100644
index 00000000..de33a8a1
--- /dev/null
+++ b/backend/src/ir/reloc.hpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+/**
+ * \file reloc.cpp
+ *
+ * \author Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+#ifndef __GBE_IR_RELOC_HPP__
+#define __GBE_IR_RELOC_HPP__
+
+#include "sys/vector.hpp"
+#include <string.h>
+
+namespace gbe {
+namespace ir {
+
+
+ /*! Complete unit of compilation. It contains a set of functions and a set of
+ * RelocEntry the functions may refer to.
+ */
+ struct RelocEntry {
+ RelocEntry(unsigned int rO, unsigned int dO):
+ refOffset(rO),
+ defOffset(dO) {}
+
+ unsigned int refOffset;
+ unsigned int defOffset;
+ };
+
+ class RelocTable : public NonCopyable, public Serializable
+ {
+ public:
+ void addEntry(unsigned refOffset, unsigned defOffset) {
+ entries.push_back(RelocEntry(refOffset, defOffset));
+ }
+ RelocTable() : Serializable() {}
+ RelocTable(const RelocTable& other) : Serializable(other),
+ entries(other.entries) {}
+ uint32_t getCount() { return entries.size(); }
+ void getData(char *p) {
+ if (entries.size() > 0 && p)
+ memcpy(p, entries.data(), entries.size()*sizeof(RelocEntry));
+ }
+ static const uint32_t magic_begin = TO_MAGIC('R', 'E', 'L', 'C');
+ static const uint32_t magic_end = TO_MAGIC('C', 'L', 'E', 'R');
+
+ /* format:
+ magic_begin |
+ reloc_table_size |
+ entry_0_refOffset |
+ entry_0_defOffset |
+ entry_1_refOffset |
+ entry_1_defOffset |
+ ........ |
+ entry_n_refOffset |
+ entry_n_defOffset |
+ magic_end |
+ total_size
+ */
+
+ /*! Implements the serialization. */
+ virtual uint32_t serializeToBin(std::ostream& outs);
+ virtual uint32_t deserializeFromBin(std::istream& ins);
+ private:
+ vector<RelocEntry> entries;
+ GBE_CLASS(RelocTable);
+ };
+
+} /* namespace ir */
+} /* namespace gbe */
+
+#endif /* __GBE_IR_RELOC_HPP__ */
+
diff --git a/backend/src/ir/unit.hpp b/backend/src/ir/unit.hpp
index 01d85055..4ad7592c 100644
--- a/backend/src/ir/unit.hpp
+++ b/backend/src/ir/unit.hpp
@@ -28,7 +28,9 @@
#include "ir/register.hpp"
#include "ir/profiling.hpp"
#include "ir/printf.hpp"
+#include "ir/reloc.hpp"
#include "sys/map.hpp"
+#include <string.h>
#include "llvm/IR/Instructions.h"
@@ -39,9 +41,6 @@ namespace ir {
class Function;
class ProfilingInfo;
- /*! Complete unit of compilation. It contains a set of functions and a set of
- * constant the functions may refer to.
- */
class Unit : public NonCopyable
{
public:
@@ -77,6 +76,8 @@ namespace ir {
}
/*! Return the constant set */
ConstantSet& getConstantSet(void) { return constantSet; }
+ const RelocTable& getRelocTable(void) const { return relocTable; }
+ RelocTable& getRelocTable(void) { return relocTable; }
/*! Return the constant set */
const ConstantSet& getConstantSet(void) const { return constantSet; }
/*! Get profiling info in this function */
@@ -93,6 +94,7 @@ namespace ir {
friend class ContextInterface; //!< Can free modify the unit
FunctionSet functions; //!< All the defined functions
ConstantSet constantSet; //!< All the constants defined in the unit
+ RelocTable relocTable;
PointerSize pointerSize; //!< Size shared by all pointers
ProfilingInfo *profilingInfo; //!< profilingInfo store the information for profiling.
GBE_CLASS(Unit);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 941afc86..5b080ad2 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -554,7 +554,7 @@ namespace gbe
virtual bool doInitialization(Module &M);
/*! helper function for parsing global constant data */
- void getConstantData(const Constant * c, void* mem, uint32_t& offset) const;
+ void getConstantData(const Constant * c, void* mem, uint32_t& offset, vector<ir::RelocEntry> &) const;
void collectGlobalConstant(void) const;
ir::ImmediateIndex processConstantImmIndex(Constant *CPV, int32_t index = 0u);
const ir::Immediate &processConstantImm(Constant *CPV, int32_t index = 0u);
@@ -1189,8 +1189,13 @@ namespace gbe
break;
}
case 2:
- new_bti = BTI_CONSTANT;
-
+ // ocl 2.0, constant pointer use separate bti
+ if(legacyMode)
+ new_bti = BTI_CONSTANT;//btiBase;
+ else {
+ new_bti = btiBase;//btiBase;
+ incBtiBase();
+ }
break;
case 3:
new_bti = BTI_LOCAL;
@@ -1446,22 +1451,31 @@ namespace gbe
return;
}
- void GenWriter::getConstantData(const Constant * c, void* mem, uint32_t& offset) const {
+ void GenWriter::getConstantData(const Constant * c, void* mem, uint32_t& offset, vector<ir::RelocEntry> &relocs) const {
Type * type = c->getType();
Type::TypeID id = type->getTypeID();
GBE_ASSERT(c);
+ if (isa<GlobalVariable>(c)) {
+ ir::Constant cc = unit.getConstantSet().getConstant(c->getName());
+ unsigned int defOffset = cc.getOffset();
+
+ relocs.push_back(ir::RelocEntry(offset, defOffset));
+ uint32_t size = getTypeByteSize(unit, type);
+ memset((char*)mem+offset, 0, size);
+ offset += size;
+ return;
+ }
if(isa<UndefValue>(c)) {
uint32_t size = getTypeByteSize(unit, type);
offset += size;
return;
- } else if(isa<ConstantAggregateZero>(c)) {
+ } else if(isa<ConstantAggregateZero>(c) || isa<ConstantPointerNull>(c)) {
uint32_t size = getTypeByteSize(unit, type);
memset((char*)mem+offset, 0, size);
offset += size;
return;
}
-
switch(id) {
case Type::TypeID::StructTyID:
{
@@ -1479,7 +1493,7 @@ namespace gbe
offset += padding/8;
const Constant* sub = cast<Constant>(c->getOperand(op));
GBE_ASSERT(sub);
- getConstantData(sub, mem, offset);
+ getConstantData(sub, mem, offset, relocs);
}
break;
}
@@ -1500,7 +1514,7 @@ namespace gbe
uint32_t ops = c->getNumOperands();
for(uint32_t op = 0; op < ops; ++op) {
Constant * ca = dyn_cast<Constant>(c->getOperand(op));
- getConstantData(ca, mem, offset);
+ getConstantData(ca, mem, offset, relocs);
offset += padding;
}
}
@@ -1548,21 +1562,36 @@ namespace gbe
const Module::GlobalListType &globalList = TheModule->getGlobalList();
for(auto i = globalList.begin(); i != globalList.end(); i ++) {
const GlobalVariable &v = *i;
- if(!v.isConstantUsed()) continue;
const char *name = v.getName().data();
ir::AddressSpace addrSpace = addressSpaceLLVMToGen(v.getType()->getAddressSpace());
- if(addrSpace == ir::AddressSpace::MEM_CONSTANT || v.isConstant()) {
- GBE_ASSERT(v.hasInitializer());
- const Constant *c = v.getInitializer();
- Type * type = c->getType();
+
+ vector<ir::RelocEntry> relocs;
+ if(addrSpace == ir::MEM_CONSTANT /* __constant */
+ || addrSpace == ir::MEM_GLOBAL
+ || addrSpace == ir::MEM_PRIVATE) {
+ Type * type = v.getType()->getPointerElementType();
uint32_t size = getTypeByteSize(unit, type);
void* mem = malloc(size);
uint32_t offset = 0;
- getConstantData(c, mem, offset);
+ if (v.hasInitializer()) {
+ const Constant *c = v.getInitializer();
+ getConstantData(c, mem, offset, relocs);
+ } else {
+ memset(mem, 0, size);
+ }
uint32_t alignment = getAlignmentByte(unit, type);
unit.newConstant((char *)mem, name, size, alignment);
free(mem);
+ if (!legacyMode) {
+ uint32_t refOffset = unit.getConstantSet().getConstant(name).getOffset();
+ for (uint32_t k = 0; k < relocs.size(); k++) {
+ unit.getRelocTable().addEntry(
+ refOffset + relocs[k].refOffset,
+ relocs[k].defOffset
+ );
+ }
+ }
}
}
}
@@ -2819,30 +2848,20 @@ namespace gbe
this->newRegister(const_cast<GlobalVariable*>(&v));
ir::Register reg = regTranslator.getScalar(const_cast<GlobalVariable*>(&v), 0);
ctx.LOADI(getType(ctx, v.getType()), reg, ctx.newIntegerImmediate(oldSlm + padding/8, getType(ctx, v.getType())));
- } else if(addrSpace == ir::MEM_CONSTANT || v.isConstant()) {
- GBE_ASSERT(v.hasInitializer());
- this->newRegister(const_cast<GlobalVariable*>(&v));
- ir::Register reg = regTranslator.getScalar(const_cast<GlobalVariable*>(&v), 0);
- ir::Constant &con = unit.getConstantSet().getConstant(v.getName());
- ctx.LOADI(getType(ctx, v.getType()), reg, ctx.newIntegerImmediate(con.getOffset(), getType(ctx, v.getType())));
- } else {
+ } else if(addrSpace == ir::MEM_CONSTANT
+ || addrSpace == ir::MEM_GLOBAL
+ || v.isConstant()) {
if(v.getName().equals(StringRef("__gen_ocl_profiling_buf"))) {
ctx.getUnit().getProfilingInfo()->setBTI(BtiMap.find(const_cast<GlobalVariable*>(&v))->second);
regTranslator.newScalarProxy(ir::ocl::profilingbptr, const_cast<GlobalVariable*>(&v));
- } else if(v.getName().str().substr(0, 4) == ".str") {
- /* When there are multi printf statements in multi kernel fucntions within the same
- translate unit, if they have the same sting parameter, such as
- kernel_func1 () {
- printf("Line is %d\n", line_num1);
- }
- kernel_func2 () {
- printf("Line is %d\n", line_num2);
- }
- The Clang will just generate one global string named .strXXX to represent "Line is %d\n"
- So when translating the kernel_func1, we can not unref that global var, so we will
- get here. Just ignore it to avoid assert. */
} else {
- GBE_ASSERT(0 && "Unsupported private memory access pattern");
+ this->newRegister(const_cast<GlobalVariable*>(&v));
+ ir::Register reg = regTranslator.getScalar(const_cast<GlobalVariable*>(&v), 0);
+ ir::Constant &con = unit.getConstantSet().getConstant(v.getName());
+ ctx.LOADI(getType(ctx, v.getType()), reg, ctx.newIntegerImmediate(con.getOffset(), getType(ctx, v.getType())));
+ if (!legacyMode) {
+ ctx.ADD(getType(ctx, v.getType()), reg, ir::ocl::constant_addrspace, reg);
+ }
}
}
}
diff --git a/kernels/compiler_program_global.cl b/kernels/compiler_program_global.cl
new file mode 100644
index 00000000..405c53f5
--- /dev/null
+++ b/kernels/compiler_program_global.cl
@@ -0,0 +1,68 @@
+struct config{
+ int s0;
+ global short *s1;
+};
+
+global int i = 5;
+global int bb = 4;
+global int *global p;
+
+/* array */
+global int ba[12];
+
+/* short/long data type */
+global short s;
+global short s2;
+global long l;
+
+/* pointer in constant AS to global */
+global int * constant px =&i;
+
+/* constant pointer relocation */
+constant int x = 2;
+constant int y =1;
+constant int *constant z[2] = {&x, &y};
+
+/* structure with pointer field */
+global struct config c[2] = {{1, &s}, {2, &s2} };
+
+
+global int a = 1;
+global int b = 2;
+global int * constant gArr[2]= {&a, &b};
+
+__kernel void compiler_program_global0(const global int *src, int dynamic) {
+ size_t gid = get_global_id(0);
+ /* global read/write */
+ p = &i;
+ *p += 1;
+
+ /* pointer in struct memory access */
+ *c[gid&1].s1 += 2;
+
+ s = 2;
+ l = 3;
+
+ /* constant AS pointer (points to global) memory access */
+ *px += *z[dynamic];
+
+ p = &bb;
+ /* array */
+ if (gid < 11)
+ ba[gid] = src[gid];
+}
+
+__kernel void compiler_program_global1(global int *dst, int dynamic) {
+ size_t gid = get_global_id(0);
+// static global sg;
+
+ dst[11] = i;
+ dst[12] = *p;
+ dst[13] = s;
+ dst[14] = l;
+ dst[15] = *gArr[dynamic];
+
+ if (gid < 11)
+ dst[gid] = ba[gid];
+}
+
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 7e7a8542..00442bf0 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -150,11 +150,12 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k, cl_gpgpu gpgp
{
/* Bind all user buffers (given by clSetKernelArg) */
uint32_t i;
+ uint32_t ocl_version = interp_kernel_get_ocl_version(k->opaque);
enum gbe_arg_type arg_type; /* kind of argument */
for (i = 0; i < k->arg_n; ++i) {
int32_t offset; // location of the address in the curbe
arg_type = interp_kernel_get_arg_type(k->opaque, i);
- if (arg_type != GBE_ARG_GLOBAL_PTR || !k->args[i].mem)
+ if (!(arg_type == GBE_ARG_GLOBAL_PTR || (arg_type == GBE_ARG_CONSTANT_PTR && ocl_version >= 200)) || !k->args[i].mem)
continue;
offset = interp_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, i);
if (offset < 0)
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index cc835561..fa28b74c 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -30,6 +30,7 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
+#include <unistd.h>
#define MAX_GROUP_SIZE_IN_HALFSLICE 512
static INLINE size_t cl_kernel_compute_batch_sz(cl_kernel k) { return 256+256; }
@@ -126,9 +127,22 @@ error:
static int
cl_upload_constant_buffer(cl_command_queue queue, cl_kernel ker, cl_gpgpu gpgpu)
{
- /* calculate constant buffer size
- * we need raw_size & aligned_size
- */
+ if (interp_kernel_get_ocl_version(ker->opaque) >= 200) {
+ // pass the starting of constant address space
+ int32_t constant_addrspace = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_CONSTANT_ADDRSPACE, 0);
+ if (constant_addrspace >= 0) {
+ size_t global_const_size = interp_program_get_global_constant_size(ker->program->opaque);
+ if (global_const_size > 0) {
+ *(uint64_t*)(ker->curbe + constant_addrspace) = (uint64_t)ker->program->global_data_ptr;
+ cl_gpgpu_bind_buf(gpgpu, ker->program->global_data, constant_addrspace, 0, ALIGN(global_const_size, getpagesize()), BTI_CONSTANT);
+ }
+ }
+ return 0;
+ }
+ // TODO this is only valid for OpenCL 1.2,
+ // under ocl1.2 we gather all constant into one dedicated surface.
+ // but in 2.0 we put program global into one surface, but constants
+ // pass through kernel argument in each separate buffer
int32_t arg;
size_t offset = 0;
uint32_t raw_size = 0, aligned_size =0;
diff --git a/src/cl_gbe_loader.cpp b/src/cl_gbe_loader.cpp
index 7555d3fd..3736c86a 100644
--- a/src/cl_gbe_loader.cpp
+++ b/src/cl_gbe_loader.cpp
@@ -38,6 +38,8 @@ gbe_program_clean_llvm_resource_cb *compiler_program_clean_llvm_resource = NULL;
gbe_program_new_from_binary_cb *interp_program_new_from_binary = NULL;
gbe_program_get_global_constant_size_cb *interp_program_get_global_constant_size = NULL;
gbe_program_get_global_constant_data_cb *interp_program_get_global_constant_data = NULL;
+gbe_program_get_global_reloc_count_cb *interp_program_get_global_reloc_count = NULL;
+gbe_program_get_global_reloc_table_cb *interp_program_get_global_reloc_table = NULL;
gbe_program_delete_cb *interp_program_delete = NULL;
gbe_program_get_kernel_num_cb *interp_program_get_kernel_num = NULL;
gbe_program_get_kernel_by_name_cb *interp_program_get_kernel_by_name = NULL;
@@ -111,6 +113,14 @@ struct GbeLoaderInitializer
if (interp_program_get_global_constant_data == NULL)
return false;
+ interp_program_get_global_reloc_count = *(gbe_program_get_global_reloc_count_cb**)dlsym(dlhInterp, "gbe_program_get_global_reloc_count");
+ if (interp_program_get_global_reloc_count == NULL)
+ return false;
+
+ interp_program_get_global_reloc_table = *(gbe_program_get_global_reloc_table_cb**)dlsym(dlhInterp, "gbe_program_get_global_reloc_table");
+ if (interp_program_get_global_reloc_table == NULL)
+ return false;
+
interp_program_delete = *(gbe_program_delete_cb**)dlsym(dlhInterp, "gbe_program_delete");
if (interp_program_delete == NULL)
return false;
diff --git a/src/cl_gbe_loader.h b/src/cl_gbe_loader.h
index 9d129e98..fde56ce4 100644
--- a/src/cl_gbe_loader.h
+++ b/src/cl_gbe_loader.h
@@ -38,6 +38,8 @@ extern gbe_program_clean_llvm_resource_cb *compiler_program_clean_llvm_resource;
extern gbe_program_new_from_binary_cb *interp_program_new_from_binary;
extern gbe_program_get_global_constant_size_cb *interp_program_get_global_constant_size;
extern gbe_program_get_global_constant_data_cb *interp_program_get_global_constant_data;
+extern gbe_program_get_global_reloc_count_cb *interp_program_get_global_reloc_count;
+extern gbe_program_get_global_reloc_table_cb *interp_program_get_global_reloc_table;
extern gbe_program_delete_cb *interp_program_delete;
extern gbe_program_get_kernel_num_cb *interp_program_get_kernel_num;
extern gbe_program_get_kernel_by_name_cb *interp_program_get_kernel_by_name;
diff --git a/src/cl_program.c b/src/cl_program.c
index 6e86675a..32f1deba 100644
--- a/src/cl_program.c
+++ b/src/cl_program.c
@@ -95,8 +95,13 @@ cl_program_delete(cl_program p)
cl_free(p->ker);
}
+ if (p->global_data_ptr)
+ cl_buffer_unreference(p->global_data);
+ cl_free(p->global_data_ptr);
+
/* Remove it from the list */
cl_context_remove_program(p->ctx, p);
+
/* Free the program as allocated by the compiler */
if (p->opaque) {
if (CompilerSupported())
@@ -203,6 +208,45 @@ LOCAL cl_bool headerCompare(const unsigned char *BufPtr, BINARY_HEADER_INDEX ind
#define isGenBinary(BufPtr) headerCompare(BufPtr, BHI_GEN_BINARY)
#define isCMRT(BufPtr) headerCompare(BufPtr, BHI_CMRT)
+static cl_int get_program_global_data(cl_program prog) {
+//OpenCL 1.2 would never call this function, and OpenCL 2.0 alwasy HAS_BO_SET_SOFTPIN.
+#ifdef HAS_BO_SET_SOFTPIN
+ cl_buffer_mgr bufmgr = NULL;
+ bufmgr = cl_context_get_bufmgr(prog->ctx);
+ assert(bufmgr);
+ size_t const_size = interp_program_get_global_constant_size(prog->opaque);
+ if (const_size == 0) return CL_SUCCESS;
+
+ int page_size = getpagesize();
+ size_t alignedSz = ALIGN(const_size, page_size);
+ char * p = (char*)cl_aligned_malloc(alignedSz, page_size);
+ prog->global_data_ptr = p;
+ interp_program_get_global_constant_data(prog->opaque, (char*)p);
+
+ prog->global_data = cl_buffer_alloc_userptr(bufmgr, "program global data", p, alignedSz, 0);
+ cl_buffer_set_softpin_offset(prog->global_data, (size_t)p);
+
+ uint32_t reloc_count = interp_program_get_global_reloc_count(prog->opaque);
+ if (reloc_count > 0) {
+ uint32_t x;
+ struct RelocEntry {int refOffset; int defOffset;};
+ char *temp = (char*) malloc(reloc_count *sizeof(int)*2);
+ interp_program_get_global_reloc_table(prog->opaque, temp);
+ for (x = 0; x < reloc_count; x++) {
+ int ref_offset = ((struct RelocEntry *)temp)[x].refOffset;
+ *(uint64_t*)&(p[ref_offset]) = ((struct RelocEntry *)temp)[x].defOffset + (uint64_t)p;
+ }
+ free(temp);
+ }
+#if 0
+ int x = 0;
+ for (x = 0; x < const_size; x++) {
+ printf("offset %d data: %x\n", x, (unsigned)p[x]);
+ }
+#endif
+#endif
+ return CL_SUCCESS;
+}
LOCAL cl_program
cl_program_create_from_binary(cl_context ctx,
cl_uint num_devices,
@@ -624,6 +668,9 @@ cl_program_build(cl_program p, const char *options)
memcpy(p->bin + copyed, interp_kernel_get_code(opaque), sz);
copyed += sz;
}
+ if ((err = get_program_global_data(p)) != CL_SUCCESS)
+ goto error;
+
p->is_built = 1;
p->build_status = CL_BUILD_SUCCESS;
return CL_SUCCESS;
@@ -729,6 +776,10 @@ cl_program_link(cl_context context,
memcpy(p->bin + copyed, interp_kernel_get_code(opaque), sz);
copyed += sz;
}
+
+ if ((err = get_program_global_data(p)) != CL_SUCCESS)
+ goto error;
+
done:
if(p) p->is_built = 1;
if(p) p->build_status = CL_BUILD_SUCCESS;
diff --git a/src/cl_program.h b/src/cl_program.h
index cf9615c9..2d7197ed 100644
--- a/src/cl_program.h
+++ b/src/cl_program.h
@@ -55,6 +55,8 @@ struct _cl_program {
cl_kernel *ker; /* All kernels included by the OCL file */
cl_program prev, next; /* We chain the programs together */
cl_context ctx; /* Its parent context */
+ cl_buffer global_data;
+ char * global_data_ptr;
char *bin; /* The program copied verbatim */
size_t bin_sz; /* Its size in memory */
char *source; /* Program sources */
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 7078a1fa..56add847 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -299,6 +299,7 @@ endif (LLVM_VERSION_NODOT VERSION_GREATER 34)
if (ENABLE_OPENCL_20)
SET(utests_sources
${utests_sources}
+ compiler_program_global.cpp
compiler_generic_pointer.cpp)
endif (ENABLE_OPENCL_20)
diff --git a/utests/compiler_program_global.cpp b/utests/compiler_program_global.cpp
new file mode 100644
index 00000000..ef7c655b
--- /dev/null
+++ b/utests/compiler_program_global.cpp
@@ -0,0 +1,80 @@
+#include "utest_helper.hpp"
+#include "utest_file_map.hpp"
+
+static int init_program(const char* name, cl_context ctx, cl_program *pg )
+{
+ cl_int err;
+ char* ker_path = cl_do_kiss_path(name, device);
+
+ cl_file_map_t *fm = cl_file_map_new();
+ err = cl_file_map_open(fm, ker_path);
+ if(err != CL_FILE_MAP_SUCCESS)
+ OCL_ASSERT(0);
+ const char *src = cl_file_map_begin(fm);
+
+ *pg = clCreateProgramWithSource(ctx, 1, &src, NULL, &err);
+ free(ker_path);
+ cl_file_map_delete(fm);
+ return 0;
+
+}
+
+void compiler_program_global()
+{
+ const int n = 16;
+ int cpu_src[16];
+ cl_int err;
+
+ // Setup kernel and buffers
+ cl_program program;
+ init_program("compiler_program_global.cl", ctx, &program);
+ OCL_CALL (clBuildProgram, program, 1, &device, "-cl-std=CL2.0", NULL, NULL);
+
+ cl_kernel k0 = clCreateKernel(program, "compiler_program_global0", &err);
+ assert(err == CL_SUCCESS);
+ cl_kernel k1 = clCreateKernel(program, "compiler_program_global1", &err);
+ assert(err == CL_SUCCESS);
+
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL);
+
+ OCL_CALL (clSetKernelArg, k0, 0, sizeof(cl_mem), &buf[0]);
+ OCL_CALL (clSetKernelArg, k1, 0, sizeof(cl_mem), &buf[1]);
+
+ int dynamic = 1;
+ OCL_CALL (clSetKernelArg, k0, 1, sizeof(cl_int), &dynamic);
+ OCL_CALL (clSetKernelArg, k1, 1, sizeof(cl_int), &dynamic);
+
+ globals[0] = 16;
+ locals[0] = 16;
+
+ OCL_MAP_BUFFER(0);
+ for (int i = 0; i < n; ++i)
+ cpu_src[i] = ((int*)buf_data[0])[i] = i;
+ OCL_UNMAP_BUFFER(0);
+
+ // Run the kernel on GPU
+ OCL_CALL (clEnqueueNDRangeKernel, queue, k0, 1, NULL, globals, locals, 0, NULL, NULL);
+ OCL_CALL (clEnqueueNDRangeKernel, queue, k1, 1, NULL, globals, locals, 0, NULL, NULL);
+
+ // Compare
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < n; ++i) {
+// printf("i=%d dst=%d\n", i, ((int*)buf_data[1])[i]);
+ switch(i) {
+ default: OCL_ASSERT(((int*)buf_data[1])[i] == i); break;
+ case 11: OCL_ASSERT(((int*)buf_data[1])[i] == 7); break;
+ case 12: OCL_ASSERT(((int*)buf_data[1])[i] == 4); break;
+ case 13: OCL_ASSERT(((int*)buf_data[1])[i] == 2); break;
+ case 14: OCL_ASSERT(((int*)buf_data[1])[i] == 3); break;
+ case 15: OCL_ASSERT(((int*)buf_data[1])[i] == 2); break;
+ }
+ }
+ OCL_UNMAP_BUFFER(1);
+ clReleaseKernel(k0);
+ clReleaseKernel(k1);
+ clReleaseProgram(program);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_program_global);
+