summaryrefslogtreecommitdiff
path: root/src/core/cpu/kernel.h
blob: 14607bfb18a689180677dbfed555c950a7e0ecce (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#ifndef __CPU_KERNEL_H__
#define __CPU_KERNEL_H__

#include "../deviceinterface.h"
#include "config.h"

#include <llvm/ExecutionEngine/GenericValue.h>
#include <vector>
#include <string>

#include <ucontext.h>
#include <pthread.h>

namespace llvm
{
    class Function;
}

namespace Coal
{

class CPUDevice;
class Kernel;
class KernelEvent;
class Image2D;

class CPUKernel : public DeviceKernel
{
    public:
        CPUKernel(CPUDevice *device, Kernel *kernel, llvm::Function *function);
        ~CPUKernel();

        size_t workGroupSize() const;
        cl_ulong localMemSize() const;
        cl_ulong privateMemSize() const;
        size_t preferredWorkGroupSizeMultiple() const;
        size_t guessWorkGroupSize(cl_uint num_dims, cl_uint dim,
                                  size_t global_work_size) const;

        Kernel *kernel() const;
        CPUDevice *device() const;

        llvm::Function *function() const;
        llvm::Function *callFunction(std::vector<void *> &freeLocal);

    private:
        CPUDevice *p_device;
        Kernel *p_kernel;
        llvm::Function *p_function, *p_call_function;
        pthread_mutex_t p_call_function_mutex;
};

class CPUKernelEvent;

class CPUKernelWorkGroup
{
    public:
        CPUKernelWorkGroup(CPUKernel *kernel, KernelEvent *event,
                           CPUKernelEvent *cpu_event,
                           const size_t *work_group_index);
        ~CPUKernelWorkGroup();

        bool run();

        // Native functions
        size_t getGlobalId(cl_uint dimindx) const;
        cl_uint getWorkDim() const;
        size_t getGlobalSize(cl_uint dimindx) const;
        size_t getLocalSize(cl_uint dimindx) const;
        size_t getLocalID(cl_uint dimindx) const;
        size_t getNumGroups(cl_uint dimindx) const;
        size_t getGroupID(cl_uint dimindx) const;
        size_t getGlobalOffset(cl_uint dimindx) const;
        void barrier(unsigned int flags);
        void *getImageData(Image2D *image, int x, int y, int z) const;

        void builtinNotFound(const std::string &name) const;

    private:
        CPUKernel *p_kernel;
        CPUKernelEvent *p_cpu_event;
        KernelEvent *p_event;
        cl_uint p_work_dim;
        size_t p_index[MAX_WORK_DIMS],
               p_max_local_id[MAX_WORK_DIMS],
               p_global_id_start_offset[MAX_WORK_DIMS];

        void (*p_kernel_func_addr)();

        // Machinery to have barrier() working
        struct Context
        {
            size_t local_id[MAX_WORK_DIMS];
            ucontext_t context;
            unsigned int initialized;
        };

        Context *getContextAddr(unsigned int index);

        Context *p_current_context;
        Context p_dummy_context;
        void *p_contexts;
        size_t p_stack_size;
        unsigned int p_num_work_items, p_current_work_item;
        bool p_had_barrier;
};

class CPUKernelEvent
{
    public:
        CPUKernelEvent(CPUDevice *device, KernelEvent *event);
        ~CPUKernelEvent();

        bool reserve();  /*!< The next Work Group that will execute will be the last. Locks the event */
        bool finished(); /*!< All the work groups have finished */
        CPUKernelWorkGroup *takeInstance(); /*!< Must be called exactly one time after reserve(). Unlocks the event */

        void workGroupFinished();

    private:
        CPUDevice *p_device;
        KernelEvent *p_event;
        size_t p_current_work_group[MAX_WORK_DIMS],
               p_max_work_groups[MAX_WORK_DIMS];
        size_t p_current_wg, p_finished_wg, p_num_wg;
        pthread_mutex_t p_mutex;
};

}

#endif