summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenis Steckelmacher <steckdenis@yahoo.fr>2011-08-11 19:34:05 +0200
committerDenis Steckelmacher <steckdenis@yahoo.fr>2011-08-11 19:34:05 +0200
commitd138ae4dc14621d01e6d9a25491b7c2bbf3c8c09 (patch)
tree93b7bd4b980e71a1fbf8fc7bef8e54958cf8bec4
parent4238667e0e492ad940ffc8a5e01e3b39f0d6b2e2 (diff)
Implement all image built-ins except read_image{f,i,ui}.
-rw-r--r--src/core/cpu/buffer.cpp1
-rw-r--r--src/core/cpu/builtins.cpp80
-rw-r--r--src/core/cpu/builtins.h4
-rw-r--r--src/core/cpu/device.cpp1
-rw-r--r--src/core/cpu/device.h13
-rw-r--r--src/core/cpu/kernel.cpp20
-rw-r--r--src/core/cpu/kernel.h2
-rw-r--r--src/core/kernel.cpp6
-rw-r--r--src/core/kernel.h2
-rw-r--r--src/runtime/stdlib.c420
-rw-r--r--src/runtime/stdlib.h72
-rw-r--r--tests/test_builtins.cpp91
12 files changed, 685 insertions, 27 deletions
diff --git a/src/core/cpu/buffer.cpp b/src/core/cpu/buffer.cpp
index a2a2d97..a9b878d 100644
--- a/src/core/cpu/buffer.cpp
+++ b/src/core/cpu/buffer.cpp
@@ -5,6 +5,7 @@
#include <cstdlib>
#include <cstring>
+#include <iostream>
using namespace Coal;
diff --git a/src/core/cpu/builtins.cpp b/src/core/cpu/builtins.cpp
index 64eb945..2399acc 100644
--- a/src/core/cpu/builtins.cpp
+++ b/src/core/cpu/builtins.cpp
@@ -1,7 +1,9 @@
#include "builtins.h"
#include "kernel.h"
+#include "buffer.h"
#include "../events.h"
+#include "../memobject.h"
#include <sys/mman.h>
#include <signal.h>
@@ -10,8 +12,23 @@
#include <iostream>
#include <cstring>
+#include <stdio.h>
+
using namespace Coal;
+unsigned char *imageData(unsigned char *base, size_t x, size_t y, size_t z,
+ size_t row_pitch, size_t slice_pitch,
+ unsigned int bytes_per_pixel)
+{
+ unsigned char *result = base;
+
+ result += (z * slice_pitch) +
+ (y * row_pitch) +
+ (x * bytes_per_pixel);
+
+ return result;
+}
+
/*
* TLS-related functions
*/
@@ -190,6 +207,18 @@ void CPUKernelWorkGroup::barrier(unsigned int flags)
// a barrier and that we returned to this one. We can continue.
}
+void *CPUKernelWorkGroup::getImageData(Image2D *image, int x, int y, int z) const
+{
+ CPUBuffer *buffer =
+ (CPUBuffer *)image->deviceBuffer((DeviceInterface *)p_kernel->device());
+
+ return imageData((unsigned char *)buffer->data(),
+ x, y, z,
+ image->row_pitch(),
+ image->slice_pitch(),
+ image->pixel_size());
+}
+
void CPUKernelWorkGroup::builtinNotFound(const std::string &name) const
{
std::cout << "OpenCL: Non-existant builtin function " << name
@@ -246,6 +275,41 @@ static void barrier(unsigned int flags)
g_work_group->barrier(flags);
}
+// Images
+
+int get_image_width(Image2D *image)
+{
+ return image->width();
+}
+
+int get_image_height(Image2D *image)
+{
+ return image->height();
+}
+
+int get_image_depth(Image3D *image)
+{
+ return image->depth();
+}
+
+int get_image_channel_data_type(Image2D *image)
+{
+ return image->format().image_channel_data_type;
+}
+
+int get_image_channel_order(Image2D *image)
+{
+ return image->format().image_channel_order;
+}
+
+void *image_data(Image2D *image, int x, int y, int z, int *order, int *type)
+{
+ *order = image->format().image_channel_order;
+ *type = image->format().image_channel_data_type;
+
+ return g_work_group->getImageData(image, x, y, z);
+}
+
/*
* Bridge between LLVM and us
*/
@@ -274,6 +338,22 @@ void *getBuiltin(const std::string &name)
else if (name == "barrier")
return (void *)&barrier;
+ else if (name == "__cpu_get_image_width")
+ return (void *)&get_image_width;
+ else if (name == "__cpu_get_image_height")
+ return (void *)&get_image_height;
+ else if (name == "__cpu_get_image_depth")
+ return (void *)&get_image_depth;
+ else if (name == "__cpu_get_image_channel_data_type")
+ return (void *)&get_image_channel_data_type;
+ else if (name == "__cpu_get_image_channel_order")
+ return (void *)&get_image_channel_order;
+ else if (name == "__cpu_image_data")
+ return (void *)&image_data;
+
+ else if (name == "debug")
+ return (void *)&printf;
+
// Function not found
g_work_group->builtinNotFound(name);
diff --git a/src/core/cpu/builtins.h b/src/core/cpu/builtins.h
index 1a703fb..f01c288 100644
--- a/src/core/cpu/builtins.h
+++ b/src/core/cpu/builtins.h
@@ -36,4 +36,8 @@ bool incVec(unsigned long dims, T *vec, T *maxs)
return overflow;
}
+unsigned char *imageData(unsigned char *base, size_t x, size_t y, size_t z,
+ size_t row_pitch, size_t slice_pitch,
+ unsigned int bytes_per_pixel);
+
#endif \ No newline at end of file
diff --git a/src/core/cpu/device.cpp b/src/core/cpu/device.cpp
index 10cd037..4c30f03 100644
--- a/src/core/cpu/device.cpp
+++ b/src/core/cpu/device.cpp
@@ -3,6 +3,7 @@
#include "kernel.h"
#include "program.h"
#include "worker.h"
+#include "builtins.h"
#include "config.h"
#include "../propertylist.h"
diff --git a/src/core/cpu/device.h b/src/core/cpu/device.h
index 52128f1..fd38ff7 100644
--- a/src/core/cpu/device.h
+++ b/src/core/cpu/device.h
@@ -6,19 +6,6 @@
#include <pthread.h>
#include <list>
-static unsigned char *imageData(unsigned char *base, size_t x, size_t y,
- size_t z, size_t row_pitch, size_t slice_pitch,
- unsigned int bytes_per_pixel)
-{
- unsigned char *result = base;
-
- result += (z * slice_pitch) +
- (y * row_pitch) +
- (x * bytes_per_pixel);
-
- return result;
-}
-
namespace Coal
{
diff --git a/src/core/cpu/kernel.cpp b/src/core/cpu/kernel.cpp
index 20cb0d6..33e0cdd 100644
--- a/src/core/cpu/kernel.cpp
+++ b/src/core/cpu/kernel.cpp
@@ -28,14 +28,14 @@ using namespace Coal;
static llvm::Constant *getPointerConstant(llvm::LLVMContext &C,
llvm::Type *type,
- void *const *value)
+ void *value)
{
llvm::Constant *rs = 0;
if (sizeof(void *) == 4)
- rs = llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), *(uint32_t *)value);
+ rs = llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), (uint64_t)value);
else
- rs = llvm::ConstantInt::get(llvm::Type::getInt64Ty(C), *(uint64_t *)value);
+ rs = llvm::ConstantInt::get(llvm::Type::getInt64Ty(C), (uint64_t)value);
// Cast to kernel's pointer type
rs = llvm::ConstantExpr::getIntToPtr(rs, type);
@@ -223,7 +223,7 @@ llvm::Function *CPUKernel::callFunction(std::vector<void *> &freeLocal)
void *local_buffer = std::malloc(a.allocAtKernelRuntime());
C = getPointerConstant(stub->getContext(),
k_func_type->getParamType(i),
- &local_buffer);
+ local_buffer);
freeLocal.push_back(local_buffer);
}
@@ -243,14 +243,13 @@ llvm::Function *CPUKernel::callFunction(std::vector<void *> &freeLocal)
(CPUBuffer *)buffer->deviceBuffer(p_device);
void *buf_ptr = 0;
- if (!cpubuf->allocated())
- cpubuf->allocate();
+ buffer->allocate(p_device);
buf_ptr = cpubuf->data();
C = getPointerConstant(stub->getContext(),
k_func_type->getParamType(i),
- &buf_ptr);
+ buf_ptr);
}
}
@@ -259,12 +258,17 @@ llvm::Function *CPUKernel::callFunction(std::vector<void *> &freeLocal)
case Kernel::Arg::Image2D:
case Kernel::Arg::Image3D:
+ {
+ Image2D *image = *(Image2D **)value;
+ image->allocate(p_device);
+
// Assign a pointer to the image object, the intrinsic functions
// will handle them
C = getPointerConstant(stub->getContext(),
k_func_type->getParamType(i),
- (void **)value);
+ (void *)image);
break;
+ }
default:
break;
diff --git a/src/core/cpu/kernel.h b/src/core/cpu/kernel.h
index 4abc38d..14607bf 100644
--- a/src/core/cpu/kernel.h
+++ b/src/core/cpu/kernel.h
@@ -22,6 +22,7 @@ namespace Coal
class CPUDevice;
class Kernel;
class KernelEvent;
+class Image2D;
class CPUKernel : public DeviceKernel
{
@@ -71,6 +72,7 @@ class CPUKernelWorkGroup
size_t getGroupID(cl_uint dimindx) const;
size_t getGlobalOffset(cl_uint dimindx) const;
void barrier(unsigned int flags);
+ void *getImageData(Image2D *image, int x, int y, int z) const;
void builtinNotFound(const std::string &name) const;
diff --git a/src/core/kernel.cpp b/src/core/kernel.cpp
index f5511bf..0c81937 100644
--- a/src/core/kernel.cpp
+++ b/src/core/kernel.cpp
@@ -111,14 +111,14 @@ cl_int Kernel::addFunction(DeviceInterface *device, llvm::Function *function,
{
llvm::StructType *struct_type =
llvm::cast<llvm::StructType>(arg_type);
- llvm::StringRef struct_name = struct_type->getName();
+ std::string struct_name = struct_type->getName().str();
- if (struct_name == "image2d")
+ if (struct_name.compare(0, 14, "struct.image2d") == 0)
{
kind = Arg::Image2D;
file = Arg::Global;
}
- else if (struct_name == "image3d")
+ else if (struct_name.compare(0, 14, "struct.image3d") == 0)
{
kind = Arg::Image3D;
file = Arg::Global;
diff --git a/src/core/kernel.h b/src/core/kernel.h
index 5bb8988..84b2cf5 100644
--- a/src/core/kernel.h
+++ b/src/core/kernel.h
@@ -123,7 +123,7 @@ class Kernel : public Object
}
-class _cl_kernel : public Coal::Kernel
+struct _cl_kernel : public Coal::Kernel
{};
#endif
diff --git a/src/runtime/stdlib.c b/src/runtime/stdlib.c
index bacce9f..2b12727 100644
--- a/src/runtime/stdlib.c
+++ b/src/runtime/stdlib.c
@@ -1,5 +1,425 @@
#include "stdlib.h"
+int debug(const char *format, ...);
+
/* WARNING: Due to some device-specific things in stdlib.h, the bitcode stdlib
* must only be used by CPUDevice, as it's targeted to the host CPU at Clover's
* compilation! */
+
+/*
+ * Image functions
+ */
+
+int __cpu_get_image_width(void *image);
+int __cpu_get_image_height(void *image);
+int __cpu_get_image_depth(void *image);
+int __cpu_get_image_channel_data_type(void *image);
+int __cpu_get_image_channel_order(void *image);
+void *__cpu_image_data(void *image, int x, int y, int z, int *order, int *type);
+
+float4 OVERLOAD read_imagef(image2d_t image, sampler_t sampler, int2 coord)
+{
+
+}
+
+float4 OVERLOAD read_imagef(image3d_t image, sampler_t sampler, int4 coord)
+{
+
+}
+
+float4 OVERLOAD read_imagef(image2d_t image, sampler_t sampler, float2 coord)
+{
+
+}
+
+float4 OVERLOAD read_imagef(image3d_t image, sampler_t sampler, float4 coord)
+{
+
+}
+
+int4 OVERLOAD read_imagei(image2d_t image, sampler_t sampler, int2 coord)
+{
+
+}
+
+int4 OVERLOAD read_imagei(image3d_t image, sampler_t sampler, int4 coord)
+{
+
+}
+
+int4 OVERLOAD read_imagei(image2d_t image, sampler_t sampler, float2 coord)
+{
+
+}
+
+int4 OVERLOAD read_imagei(image3d_t image, sampler_t sampler, float4 coord)
+{
+
+}
+
+uint4 OVERLOAD read_imageui(image2d_t image, sampler_t sampler, int2 coord)
+{
+
+}
+
+uint4 OVERLOAD read_imageui(image3d_t image, sampler_t sampler, int4 coord)
+{
+
+}
+
+uint4 OVERLOAD read_imageui(image2d_t image, sampler_t sampler, float2 coord)
+{
+
+}
+
+uint4 OVERLOAD read_imageui(image3d_t image, sampler_t sampler, float4 coord)
+{
+
+}
+
+void OVERLOAD write_imagef(image2d_t image, int2 coord, float4 color)
+{
+ int4 c;
+ c.xy = coord;
+ c.zw = 0;
+
+ write_imagef((image3d_t)image, c, color);
+}
+
+void OVERLOAD write_imagef(image3d_t image, int4 coord, float4 color)
+{
+ int order, type;
+ void *v_target = __cpu_image_data(image, coord.x, coord.y, coord.z, &order, &type);
+
+#define SWIZZLE(order, target, data) \
+ switch (order) \
+ { \
+ case CLK_R: \
+ case CLK_Rx: \
+ (*target).x = data.x; \
+ break; \
+ case CLK_A: \
+ (*target).x = data.w; \
+ break; \
+ case CLK_RG: \
+ case CLK_RGx: \
+ (*target).xy = data.xy; \
+ break; \
+ case CLK_RA: \
+ (*target).xy = data.xw; \
+ break; \
+ case CLK_RGBA: \
+ *target = data; \
+ break; \
+ case CLK_BGRA: \
+ (*target).zyxw = data.xyzw; \
+ break; \
+ case CLK_ARGB: \
+ (*target).wxyz = data.xyzw; \
+ break; \
+ case CLK_INTENSITY: \
+ case CLK_LUMINANCE: \
+ (*target).x = data.x; \
+ break; \
+ }
+
+ switch (type)
+ {
+ case CLK_SNORM_INT8:
+ {
+ char4 *target = v_target;
+ char4 data;
+
+ // Denormalize
+ data.x = (color.x * 127.0f);
+ data.y = (color.y * 127.0f);
+ data.z = (color.z * 127.0f);
+ data.w = (color.w * 127.0f);
+
+ SWIZZLE(order, target, data)
+ break;
+ }
+ case CLK_UNORM_INT8:
+ {
+ uchar4 *target = v_target;
+ uchar4 data;
+
+ // Denormalize
+ data.x = (color.x * 255.0f);
+ data.y = (color.y * 255.0f);
+ data.z = (color.z * 255.0f);
+ data.w = (color.w * 255.0f);
+
+ SWIZZLE(order, target, data)
+ break;
+ }
+ case CLK_SNORM_INT16:
+ {
+ short4 *target = v_target;
+ short4 data;
+
+ // Denormalize
+ data.x = (color.x * 127.0f);
+ data.y = (color.y * 127.0f);
+ data.z = (color.z * 127.0f);
+ data.w = (color.w * 127.0f);
+
+ SWIZZLE(order, target, data)
+ break;
+ }
+ case CLK_UNORM_INT16:
+ {
+ ushort4 *target = v_target;
+ ushort4 data;
+
+ data.x = (color.x * 255.0f);
+ data.y = (color.y * 255.0f);
+ data.z = (color.z * 255.0f);
+ data.w = (color.w * 255.0f);
+
+ SWIZZLE(order, target, data)
+ break;
+ }
+ case CLK_FLOAT:
+ {
+ float4 *target = v_target;
+
+ SWIZZLE(order, target, color)
+ break;
+ }
+ }
+
+#undef SWIZZLE
+}
+
+#define SWIZZLE_8(target, data) \
+ case CLK_ARGB: \
+ (*target).wxyz = data.xyzw; \
+ break; \
+ case CLK_BGRA: \
+ (*target).zyxw = data.xyzw; \
+ break;
+
+#define SWIZZLE_16(target, data) \
+ case CLK_LUMINANCE: \
+ case CLK_INTENSITY: \
+ (*target).x = data.x; \
+ break;
+
+#define SWIZZLE_32(target, data) \
+ case CLK_R: \
+ case CLK_Rx: \
+ (*target).x = data.x; \
+ break; \
+ case CLK_A: \
+ (*target).x = data.w; \
+ break; \
+ case CLK_RG: \
+ case CLK_RGx: \
+ (*target).xy = data.xy; \
+ break; \
+ case CLK_RA: \
+ (*target).xy = data.xw; \
+ break; \
+ case CLK_RGBA: \
+ *target = data; \
+ break;
+
+void OVERLOAD write_imagei(image2d_t image, int2 coord, int4 color)
+{
+ int4 c;
+ c.xy = coord;
+ c.zw = 0;
+
+ write_imagei((image3d_t)image, c, color);
+}
+
+void OVERLOAD write_imagei(image3d_t image, int4 coord, int4 color)
+{
+ int order, type;
+ void *v_target = __cpu_image_data(image, coord.x, coord.y, coord.z, &order, &type);
+
+ switch (type)
+ {
+ case CLK_SIGNED_INT8:
+ {
+ char4 *target = v_target;
+ char4 data;
+
+ data.x = color.x;
+ data.y = color.y;
+ data.z = color.z;
+ data.w = color.w;
+
+ switch (order)
+ {
+ SWIZZLE_8(target, data)
+ SWIZZLE_16(target, data)
+ SWIZZLE_32(target, data)
+ }
+
+ break;
+ }
+ case CLK_SIGNED_INT16:
+ {
+ short4 *target = v_target;
+ short4 data;
+
+ data.x = color.x;
+ data.y = color.y;
+ data.z = color.z;
+ data.w = color.w;
+
+ switch (order)
+ {
+ SWIZZLE_16(target, data)
+ SWIZZLE_32(target, data)
+ }
+
+ break;
+ }
+ case CLK_SIGNED_INT32:
+ {
+ int4 *target = v_target;
+
+ switch (order)
+ {
+ SWIZZLE_32(target, color)
+ }
+
+ break;
+ }
+ }
+}
+
+void OVERLOAD write_imageui(image2d_t image, int2 coord, uint4 color)
+{
+ int4 c;
+ c.xy = coord;
+ c.zw = 0;
+
+ write_imageui((image3d_t)image, c, color);
+}
+
+void OVERLOAD write_imageui(image3d_t image, int4 coord, uint4 color)
+{
+ int order, type;
+ void *v_target = __cpu_image_data(image, coord.x, coord.y, coord.z, &order, &type);
+
+ switch (type)
+ {
+ case CLK_UNSIGNED_INT8:
+ {
+ uchar4 *target = v_target;
+ uchar4 data;
+
+ data.x = color.x;
+ data.y = color.y;
+ data.z = color.z;
+ data.w = color.w;
+
+ switch (order)
+ {
+ SWIZZLE_8(target, data)
+ SWIZZLE_16(target, data)
+ SWIZZLE_32(target, data)
+ }
+ }
+ case CLK_UNSIGNED_INT16:
+ {
+ ushort4 *target = v_target;
+ ushort4 data;
+
+ data.x = color.x;
+ data.y = color.y;
+ data.z = color.z;
+ data.w = color.w;
+
+ switch (order)
+ {
+ SWIZZLE_16(target, data)
+ SWIZZLE_32(target, data)
+ }
+ }
+ case CLK_UNSIGNED_INT32:
+ {
+ uint4 *target = v_target;
+
+ switch (order)
+ {
+ SWIZZLE_32(target, color)
+ }
+ }
+ }
+}
+
+#undef SWIZZLE_8
+#undef SWIZZLE_16
+#undef SWIZZLE_32
+
+int2 OVERLOAD get_image_dim(image2d_t image)
+{
+ int2 result;
+
+ result.x = get_image_width(image);
+ result.y = get_image_height(image);
+
+ return result;
+}
+
+int4 OVERLOAD get_image_dim(image3d_t image)
+{
+ int4 result;
+
+ result.x = get_image_width(image);
+ result.y = get_image_height(image);
+ result.z = get_image_depth(image);
+
+ return result;
+}
+
+int OVERLOAD get_image_width(image2d_t image)
+{
+ return __cpu_get_image_width(image);
+}
+
+int OVERLOAD get_image_width(image3d_t image)
+{
+ return __cpu_get_image_width(image);
+}
+
+int OVERLOAD get_image_height(image2d_t image)
+{
+ return __cpu_get_image_height(image);
+}
+
+int OVERLOAD get_image_height(image3d_t image)
+{
+ return __cpu_get_image_height(image);
+}
+
+int OVERLOAD get_image_depth(image3d_t image)
+{
+ return __cpu_get_image_depth(image);
+}
+
+int OVERLOAD get_image_channel_data_type(image2d_t image)
+{
+ return __cpu_get_image_channel_data_type(image);
+}
+
+int OVERLOAD get_image_channel_data_type(image3d_t image)
+{
+ return __cpu_get_image_channel_data_type(image);
+}
+
+int OVERLOAD get_image_channel_order(image2d_t image)
+{
+ return __cpu_get_image_channel_order(image);
+}
+
+int OVERLOAD get_image_channel_order(image3d_t image)
+{
+ return __cpu_get_image_channel_order(image);
+}
+
diff --git a/src/runtime/stdlib.h b/src/runtime/stdlib.h
index 527d5ac..5723909 100644
--- a/src/runtime/stdlib.h
+++ b/src/runtime/stdlib.h
@@ -8,6 +8,8 @@ typedef int *intptr_t;
typedef uint *uintptr_t;
typedef unsigned int sampler_t;
+typedef struct image2d *image2d_t;
+typedef struct image3d *image3d_t;
/* Standard types from Clang's stddef, Copyright (C) 2008 Eli Friedman */
typedef __typeof__(((int*)0)-((int*)0)) ptrdiff_t;
@@ -19,7 +21,9 @@ typedef __typeof__(sizeof(int)) size_t;
#define COAL_VECTOR_SET(type) \
COAL_VECTOR(type, 2); \
COAL_VECTOR(type, 3); \
- COAL_VECTOR(type, 4)
+ COAL_VECTOR(type, 4); \
+ COAL_VECTOR(type, 8); \
+ COAL_VECTOR(type, 16);
COAL_VECTOR_SET(char);
COAL_VECTOR_SET(uchar);
@@ -56,6 +60,8 @@ COAL_VECTOR_SET(float);
#define read_only __read_only
/* Defines */
+#define OVERLOAD __attribute__((overloadable))
+
#define CLK_NORMALIZED_COORDS_FALSE 0x00000000
#define CLK_NORMALIZED_COORDS_TRUE 0x00000001
#define CLK_ADDRESS_NONE 0x00000000
@@ -69,6 +75,36 @@ COAL_VECTOR_SET(float);
#define CLK_LOCAL_MEM_FENCE 0x00000001
#define CLK_GLOBAL_MEM_FENCE 0x00000002
+#define CLK_R 0x10B0
+#define CLK_A 0x10B1
+#define CLK_RG 0x10B2
+#define CLK_RA 0x10B3
+#define CLK_RGB 0x10B4
+#define CLK_RGBA 0x10B5
+#define CLK_BGRA 0x10B6
+#define CLK_ARGB 0x10B7
+#define CLK_INTENSITY 0x10B8
+#define CLK_LUMINANCE 0x10B9
+#define CLK_Rx 0x10BA
+#define CLK_RGx 0x10BB
+#define CLK_RGBx 0x10BC
+
+#define CLK_SNORM_INT8 0x10D0
+#define CLK_SNORM_INT16 0x10D1
+#define CLK_UNORM_INT8 0x10D2
+#define CLK_UNORM_INT16 0x10D3
+#define CLK_UNORM_SHORT_565 0x10D4
+#define CLK_UNORM_SHORT_555 0x10D5
+#define CLK_UNORM_INT_101010 0x10D6
+#define CLK_SIGNED_INT8 0x10D7
+#define CLK_SIGNED_INT16 0x10D8
+#define CLK_SIGNED_INT32 0x10D9
+#define CLK_UNSIGNED_INT8 0x10DA
+#define CLK_UNSIGNED_INT16 0x10DB
+#define CLK_UNSIGNED_INT32 0x10DC
+#define CLK_HALF_FLOAT 0x10DD
+#define CLK_FLOAT 0x10DE
+
/* Typedefs */
typedef unsigned int cl_mem_fence_flags;
@@ -83,3 +119,37 @@ size_t get_group_id(uint dimindx);
size_t get_global_offset(uint dimindx);
void barrier(cl_mem_fence_flags flags);
+
+/* Image functions */
+float4 OVERLOAD read_imagef(image2d_t image, sampler_t sampler, int2 coord);
+float4 OVERLOAD read_imagef(image3d_t image, sampler_t sampler, int4 coord);
+float4 OVERLOAD read_imagef(image2d_t image, sampler_t sampler, float2 coord);
+float4 OVERLOAD read_imagef(image3d_t image, sampler_t sampler, float4 coord);
+int4 OVERLOAD read_imagei(image2d_t image, sampler_t sampler, int2 coord);
+int4 OVERLOAD read_imagei(image3d_t image, sampler_t sampler, int4 coord);
+int4 OVERLOAD read_imagei(image2d_t image, sampler_t sampler, float2 coord);
+int4 OVERLOAD read_imagei(image3d_t image, sampler_t sampler, float4 coord);
+uint4 OVERLOAD read_imageui(image2d_t image, sampler_t sampler, int2 coord);
+uint4 OVERLOAD read_imageui(image3d_t image, sampler_t sampler, int4 coord);
+uint4 OVERLOAD read_imageui(image2d_t image, sampler_t sampler, float2 coord);
+uint4 OVERLOAD read_imageui(image3d_t image, sampler_t sampler, float4 coord);
+
+void OVERLOAD write_imagef(image2d_t image, int2 coord, float4 color);
+void OVERLOAD write_imagef(image3d_t image, int4 coord, float4 color);
+void OVERLOAD write_imagei(image2d_t image, int2 coord, int4 color);
+void OVERLOAD write_imagei(image3d_t image, int4 coord, int4 color);
+void OVERLOAD write_imageui(image2d_t image, int2 coord, uint4 color);
+void OVERLOAD write_imageui(image3d_t image, int4 coord, uint4 color);
+
+int2 OVERLOAD get_image_dim(image2d_t image);
+int4 OVERLOAD get_image_dim(image3d_t image);
+int OVERLOAD get_image_width(image2d_t image);
+int OVERLOAD get_image_width(image3d_t image);
+int OVERLOAD get_image_height(image2d_t image);
+int OVERLOAD get_image_height(image3d_t image);
+int OVERLOAD get_image_depth(image3d_t image);
+
+int OVERLOAD get_image_channel_data_type(image2d_t image);
+int OVERLOAD get_image_channel_data_type(image3d_t image);
+int OVERLOAD get_image_channel_order(image2d_t image);
+int OVERLOAD get_image_channel_order(image3d_t image);
diff --git a/tests/test_builtins.cpp b/tests/test_builtins.cpp
index acb39ca..a901f22 100644
--- a/tests/test_builtins.cpp
+++ b/tests/test_builtins.cpp
@@ -22,11 +22,43 @@ const char barrier_source[] =
" *rs += 1;\n"
"}\n";
+const char image_source[] =
+ "__kernel void test_case(__global uint *rs, __write_only image2d_t image1,\n"
+ " __write_only image2d_t image2) {\n"
+ " float4 fcolor;\n"
+ " int4 scolor;\n"
+ " int2 coord;\n"
+ "\n"
+ " if (get_image_width(image1) != 4) *rs = 1;\n"
+ " if (get_image_height(image1) != 4) *rs = 2;\n"
+ " if (get_image_channel_data_type(image2) != CLK_SIGNED_INT16) *rs = 3;\n"
+ " if (get_image_channel_order(image2) != CLK_RGBA) *rs = 4;\n"
+ "\n"
+ " if (*rs != 0) return;\n"
+ "\n"
+ " fcolor.x = 1.0f;\n"
+ " fcolor.y = 0.5f;\n"
+ " fcolor.z = 0.0f;\n"
+ " fcolor.w = 1.0f;\n"
+ "\n"
+ " scolor.x = -3057;\n"
+ " scolor.y = 65;\n"
+ " scolor.z = 0;\n"
+ " scolor.w = 32767;\n"
+ "\n"
+ " coord.x = 3;\n"
+ " coord.y = 1;\n"
+ "\n"
+ " write_imagef(image1, coord, fcolor);\n"
+ " write_imagei(image2, coord, scolor);\n"
+ "}\n";
+
enum TestCaseKind
{
NormalKind,
SamplerKind,
- BarrierKind
+ BarrierKind,
+ ImageKind
};
/*
@@ -48,6 +80,8 @@ static uint32_t run_kernel(const char *source, TestCaseKind kind)
cl_mem rs_buf;
cl_sampler sampler;
+ cl_mem mem1, mem2;
+ cl_image_format fmt;
uint32_t rs = 0;
@@ -105,6 +139,25 @@ static uint32_t run_kernel(const char *source, TestCaseKind kind)
if (result != CL_SUCCESS) return 65547;
break;
+ case ImageKind:
+ fmt.image_channel_data_type = CL_SNORM_INT8;
+ fmt.image_channel_order = CL_RGBA;
+
+ mem1 = clCreateImage2D(ctx, CL_MEM_WRITE_ONLY, &fmt, 4, 4, 0, 0, &result);
+ if (result != CL_SUCCESS) return 65548;
+
+ fmt.image_channel_data_type = CL_SIGNED_INT16;
+
+ mem2 = clCreateImage2D(ctx, CL_MEM_WRITE_ONLY, &fmt, 4, 4, 0, 0, &result);
+ if (result != CL_SUCCESS) return 65548;
+
+ result = clSetKernelArg(kernel, 1, sizeof(cl_mem), &mem1);
+ if (result != CL_SUCCESS) return 65549;
+
+ result = clSetKernelArg(kernel, 2, sizeof(cl_mem), &mem2);
+ if (result != CL_SUCCESS) return 65549;
+ break;
+
default:
break;
}
@@ -128,6 +181,7 @@ static uint32_t run_kernel(const char *source, TestCaseKind kind)
if (result != CL_SUCCESS) return 65545;
if (kind == SamplerKind) clReleaseSampler(sampler);
+ if (kind == ImageKind) clReleaseMemObject(mem1), clReleaseMemObject(mem2);
clReleaseEvent(event);
clReleaseMemObject(rs_buf);
clReleaseKernel(kernel);
@@ -168,6 +222,10 @@ static const char *default_error(uint32_t errcode)
return "Cannot create a sampler";
case 65547:
return "Cannot set a sampler kernel argument";
+ case 65548:
+ return "Cannot create an Image2D object";
+ case 65549:
+ return "Cannot set image kernel argument";
default:
return "Unknown error code";
@@ -206,11 +264,42 @@ START_TEST (test_barrier)
}
END_TEST
+START_TEST (test_image)
+{
+ uint32_t rs = run_kernel(image_source, ImageKind);
+ const char *errstr = 0;
+
+ switch (rs)
+ {
+ case 1:
+ errstr = "Image1 must have width of 4";
+ break;
+ case 2:
+ errstr = "Image1 must have width of 4";
+ break;
+ case 3:
+ errstr = "Image2 must have type SIGNED_FLOAT16";
+ break;
+ case 4:
+ errstr = "Image2 must have channel order RGBA";
+ break;
+ default:
+ errstr = default_error(rs);
+ }
+
+ fail_if(
+ errstr != 0,
+ errstr
+ );
+}
+END_TEST
+
TCase *cl_builtins_tcase_create(void)
{
TCase *tc = NULL;
tc = tcase_create("builtins");
tcase_add_test(tc, test_sampler);
tcase_add_test(tc, test_barrier);
+ tcase_add_test(tc, test_image);
return tc;
}