summaryrefslogtreecommitdiff
path: root/src/core/cpu/builtins.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/cpu/builtins.cpp')
-rw-r--r--src/core/cpu/builtins.cpp211
1 files changed, 206 insertions, 5 deletions
diff --git a/src/core/cpu/builtins.cpp b/src/core/cpu/builtins.cpp
index 6fdab30..64eb945 100644
--- a/src/core/cpu/builtins.cpp
+++ b/src/core/cpu/builtins.cpp
@@ -1,6 +1,205 @@
+#include "builtins.h"
#include "kernel.h"
+#include "../events.h"
+
+#include <sys/mman.h>
+#include <signal.h>
+
+#include <llvm/Function.h>
+#include <iostream>
+#include <cstring>
+
+using namespace Coal;
+
+/*
+ * TLS-related functions
+ */
__thread Coal::CPUKernelWorkGroup *g_work_group;
+__thread void *work_items_data;
+__thread size_t work_items_size;
+
+void setThreadLocalWorkGroup(Coal::CPUKernelWorkGroup *current)
+{
+ g_work_group = current;
+}
+
+void *getWorkItemsData(size_t &size)
+{
+ size = work_items_size;
+ return work_items_data;
+}
+
+void setWorkItemsData(void *ptr, size_t size)
+{
+ work_items_data = ptr;
+ work_items_size = size;
+}
+
+/*
+ * Actual built-ins implementations
+ */
+cl_uint CPUKernelWorkGroup::getWorkDim() const
+{
+ return p_work_dim;
+}
+
+size_t CPUKernelWorkGroup::getGlobalId(cl_uint dimindx) const
+{
+ if (dimindx > p_work_dim)
+ return 0;
+
+ return p_global_id_start_offset[dimindx] + p_current_context->local_id[dimindx];
+}
+
+size_t CPUKernelWorkGroup::getGlobalSize(cl_uint dimindx) const
+{
+ if (dimindx >p_work_dim)
+ return 1;
+
+ return p_event->global_work_size(dimindx);
+}
+
+size_t CPUKernelWorkGroup::getLocalSize(cl_uint dimindx) const
+{
+ if (dimindx > p_work_dim)
+ return 1;
+
+ return p_event->local_work_size(dimindx);
+}
+
+size_t CPUKernelWorkGroup::getLocalID(cl_uint dimindx) const
+{
+ if (dimindx > p_work_dim)
+ return 0;
+
+ return p_current_context->local_id[dimindx];
+}
+
+size_t CPUKernelWorkGroup::getNumGroups(cl_uint dimindx) const
+{
+ if (dimindx > p_work_dim)
+ return 1;
+
+ return (p_event->global_work_size(dimindx) /
+ p_event->local_work_size(dimindx));
+}
+
+size_t CPUKernelWorkGroup::getGroupID(cl_uint dimindx) const
+{
+ if (dimindx > p_work_dim)
+ return 0;
+
+ return p_index[dimindx];
+}
+
+size_t CPUKernelWorkGroup::getGlobalOffset(cl_uint dimindx) const
+{
+ if (dimindx > p_work_dim)
+ return 0;
+
+ return p_event->global_work_offset(dimindx);
+}
+
+void CPUKernelWorkGroup::barrier(unsigned int flags)
+{
+ p_had_barrier = true;
+
+ // Allocate or reuse TLS memory for the stacks (it isn't freed between
+ // the work groups, and even the kernels, so if we need less space than
+ // allocated, it's good)
+ if (!p_contexts)
+ {
+ if (p_current_work_item != 0)
+ {
+ // Completely abnormal, it means that not every work-items
+ // encounter the barrier
+ std::cerr << "*** Not every work-items of "
+ << p_kernel->function()->getNameStr()
+ << " calls barrier(); !" << std::endl;
+ return;
+ }
+
+ // Allocate or reuse the stacks
+ size_t contexts_size;
+ p_contexts = getWorkItemsData(contexts_size);
+ size_t needed_size = p_num_work_items * (p_stack_size + sizeof(Context));
+
+ if (!p_contexts || contexts_size < needed_size)
+ {
+ // We must allocate a new space
+ if (p_contexts)
+ munmap(p_contexts, contexts_size);
+
+ p_contexts = mmap(0, needed_size, PROT_EXEC | PROT_READ | PROT_WRITE, /* People say a stack must be executable */
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+
+ setWorkItemsData(p_contexts, contexts_size);
+ }
+
+ // Now that we have a real main context, initialize it
+ p_current_context = getContextAddr(0);
+ p_current_context->initialized = 1;
+ std::memset(p_current_context->local_id, 0, p_work_dim * sizeof(size_t));
+
+ getcontext(&p_current_context->context);
+ }
+
+ // Take the next context
+ p_current_work_item++;
+ if (p_current_work_item == p_num_work_items) p_current_work_item = 0;
+
+ Context *next = getContextAddr(p_current_work_item);
+ Context *main = getContextAddr(0); // The context not created with makecontext
+
+ // If the next context isn't initialized, initialize it.
+ // Note: mmap zeroes the memory, so next->initialized == 0 if it isn't initialized
+ if (next->initialized == 0)
+ {
+ next->initialized = 1;
+
+ // local-id of next is the one of the current context, but incVec'ed
+ std::memcpy(next->local_id, p_current_context->local_id,
+ MAX_WORK_DIMS * sizeof(size_t));
+
+ incVec(p_work_dim, next->local_id, p_max_local_id);
+
+ // Initialize the next context
+ if (getcontext(&next->context) != 0)
+ return;
+
+ // Get its stack. It is located a next + sizeof(Context)
+ char *stack = (char *)next;
+ stack += sizeof(Context);
+
+ next->context.uc_link = &main->context;
+ next->context.uc_stack.ss_sp = stack;
+ next->context.uc_stack.ss_size = p_stack_size;
+
+ // Tell it to run the kernel function
+ makecontext(&next->context, p_kernel_func_addr, 0);
+ }
+
+ // Switch to the next context
+ ucontext_t *cur = &p_current_context->context;
+ p_current_context = next;
+
+ swapcontext(cur, &next->context);
+
+ // When we return here, it means that all the other work items encountered
+ // a barrier and that we returned to this one. We can continue.
+}
+
+void CPUKernelWorkGroup::builtinNotFound(const std::string &name) const
+{
+ std::cout << "OpenCL: Non-existant builtin function " << name
+ << " found in kernel " << p_kernel->function()->getNameStr()
+ << '.' << std::endl;
+}
+
+/*
+ * Built-in functions
+ */
static size_t get_global_id(cl_uint dimindx)
{
@@ -42,14 +241,14 @@ static size_t get_global_offset(uint dimindx)
return g_work_group->getGlobalOffset(dimindx);
}
-/*
- * Utility functions
- */
-void setThreadLocalWorkGroup(Coal::CPUKernelWorkGroup *current)
+static void barrier(unsigned int flags)
{
- g_work_group = current;
+ g_work_group->barrier(flags);
}
+/*
+ * Bridge between LLVM and us
+ */
static void unimplemented_stub()
{
}
@@ -72,6 +271,8 @@ void *getBuiltin(const std::string &name)
return (void *)&get_group_id;
else if (name == "get_global_offset")
return (void *)&get_global_offset;
+ else if (name == "barrier")
+ return (void *)&barrier;
// Function not found
g_work_group->builtinNotFound(name);