summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenis Steckelmacher <steckdenis@yahoo.fr>2011-08-20 15:56:26 +0200
committerDenis Steckelmacher <steckdenis@yahoo.fr>2011-08-20 15:56:26 +0200
commitbeb531f333d3838e7a4690706a43293c4cee45ef (patch)
tree5cf0e031516c8223b3bfcf703d3f82a74e349867
parent97917f6c1898a6eeb68c54ed640263bb435fc0ec (diff)
Natively re-implement image reading functions taking integer coordinates.
-rw-r--r--src/core/cpu/builtins.cpp24
-rw-r--r--src/core/cpu/kernel.h7
-rw-r--r--src/core/cpu/sampler.cpp315
-rw-r--r--src/core/memobject.cpp27
-rw-r--r--src/core/memobject.h9
-rw-r--r--src/runtime/stdlib.c370
6 files changed, 357 insertions, 395 deletions
diff --git a/src/core/cpu/builtins.cpp b/src/core/cpu/builtins.cpp
index 081717c..265ba1f 100644
--- a/src/core/cpu/builtins.cpp
+++ b/src/core/cpu/builtins.cpp
@@ -353,6 +353,24 @@ static void write_imageui(Image2D *image, int x, int y, int z, uint32_t *color)
g_work_group->writeImage(image, x, y, z, color);
}
+static void read_imagefi(float *result, Image2D *image, int x, int y, int z,
+ int32_t sampler)
+{
+ g_work_group->readImage(result, image, x, y, z, sampler);
+}
+
+static void read_imageii(int32_t *result, Image2D *image, int x, int y, int z,
+ int32_t sampler)
+{
+ g_work_group->readImage(result, image, x, y, z, sampler);
+}
+
+static void read_imageuii(uint32_t *result, Image2D *image, int x, int y, int z,
+ int32_t sampler)
+{
+ g_work_group->readImage(result, image, x, y, z, sampler);
+}
+
/*
* Bridge between LLVM and us
*/
@@ -401,6 +419,12 @@ void *getBuiltin(const std::string &name)
return (void *)&write_imagei;
else if (name == "__cpu_write_imageui")
return (void *)&write_imageui;
+ else if (name == "__cpu_read_imagefi")
+ return (void *)&read_imagefi;
+ else if (name == "__cpu_read_imageii")
+ return (void *)&read_imageii;
+ else if (name == "__cpu_read_imageuii")
+ return (void *)&read_imageuii;
else if (name == "debug")
return (void *)&printf;
diff --git a/src/core/cpu/kernel.h b/src/core/cpu/kernel.h
index dec42e6..cb3a296 100644
--- a/src/core/cpu/kernel.h
+++ b/src/core/cpu/kernel.h
@@ -109,6 +109,13 @@ class CPUKernelWorkGroup
void writeImage(Image2D *image, int x, int y, int z, int32_t *color) const;
void writeImage(Image2D *image, int x, int y, int z, uint32_t *color) const;
+ void readImage(float *result, Image2D *image, int x, int y, int z,
+ int32_t sampler) const;
+ void readImage(int32_t *result, Image2D *image, int x, int y, int z,
+ int32_t sampler) const;
+ void readImage(uint32_t *result, Image2D *image, int x, int y, int z,
+ int32_t sampler) const;
+
void builtinNotFound(const std::string &name) const;
private:
diff --git a/src/core/cpu/sampler.cpp b/src/core/cpu/sampler.cpp
index c173004..58b3bd3 100644
--- a/src/core/cpu/sampler.cpp
+++ b/src/core/cpu/sampler.cpp
@@ -35,6 +35,7 @@
*/
#include "../memobject.h"
+#include "../sampler.h"
#include "kernel.h"
#include "buffer.h"
#include "builtins.h"
@@ -45,6 +46,39 @@
using namespace Coal;
/*
+ * Helper functions
+ */
+
+static int clamp(int a, int b, int c)
+{
+ return (a < b) ? b : ((a > c) ? c : a);
+}
+
+static bool handle_address_mode(Image2D *image, int &x, int &y, int &z,
+ uint32_t sampler)
+{
+ bool is_3d = (image->type() == MemObject::Image3D);
+ int w = image->width(),
+ h = image->height(),
+ d = (is_3d ? ((Image3D *)image)->depth() : 1);
+
+ if ((sampler & 0xf0) == CLK_ADDRESS_CLAMP_TO_EDGE)
+ {
+ x = clamp(x, 0, w - 1);
+ y = clamp(y, 0, h - 1);
+ if (is_3d) z = clamp(z, 0, d - 1);
+ }
+ else if ((sampler & 0xf0) == CLK_ADDRESS_CLAMP)
+ {
+ x = clamp(x, 0, w);
+ y = clamp(y, 0, h);
+ if (is_3d) z = clamp(z, 0, d);
+ }
+
+ return (x == w || y == h || z == d);
+}
+
+/*
* Macros or functions used to accelerate the functions
*/
#ifndef __has_builtin
@@ -60,70 +94,139 @@ static void slow_shuffle4(uint32_t *rs, uint32_t *a, uint32_t *b,
rs[3] = (w < 4 ? a[w] : b[w - 4]);
}
-static void slow_convert_to_format4f(float *data, cl_channel_type type)
+static void slow_convert_to_format4f(void *dest, float *data,
+ cl_channel_type type, unsigned int channels)
{
// Convert always the four components of source to target
if (type == CL_FLOAT)
- return;
+ std::memcpy(dest, data, channels * sizeof(float));
- // NOTE: We can read and write at the same time in data because
- // we always begin wy reading 4 bytes (float) and never write
- // more than 4 bytes, so no data is corrupted
- for (unsigned int i=0; i<3; ++i)
+ for (unsigned int i=0; i<channels; ++i)
+ {
+ switch (type)
+ {
+ case CL_SNORM_INT8:
+ ((int8_t *)dest)[i] = data[i] * 128.0f;
+ break;
+ case CL_SNORM_INT16:
+ ((int16_t *)dest)[i] = data[i] * 32767.0f;
+ break;
+ case CL_UNORM_INT8:
+ ((uint8_t *)dest)[i] = data[i] * 255.0f;
+ break;
+ case CL_UNORM_INT16:
+ ((uint16_t *)dest)[i] = data[i] * 65535.0f;
+ break;
+ }
+ }
+}
+
+static void slow_convert_from_format4f(float *data, void *source,
+ cl_channel_type type, unsigned int channels)
+{
+ // Convert always the four components of source to target
+ if (type == CL_FLOAT)
+ std::memcpy(data, source, channels * sizeof(float));
+
+ for (unsigned int i=0; i<channels; ++i)
{
switch (type)
{
case CL_SNORM_INT8:
- ((int8_t *)data)[i] = data[i] * 128.0f;
+ data[i] = (float)((int8_t *)source)[i] / 127.0f;
break;
case CL_SNORM_INT16:
- ((int16_t *)data)[i] = data[i] * 32767.0f;
+ data[i] = (float)((int16_t *)source)[i] / 32767.0f;
break;
case CL_UNORM_INT8:
- ((uint8_t *)data)[i] = data[i] * 256.0f;
+ data[i] = (float)((uint8_t *)source)[i] / 127.0f;
break;
case CL_UNORM_INT16:
- ((uint16_t *)data)[i] = data[i] * 65535.0f;
+ data[i] = (float)((uint16_t *)source)[i] / 127.0f;
break;
}
}
}
-static void slow_convert_to_format4i(int *data, cl_channel_type type)
+static void slow_convert_to_format4i(void *dest, int *data,
+ cl_channel_type type, unsigned int channels)
{
// Convert always the four components of source to target
if (type == CL_SIGNED_INT32)
- return;
+ std::memcpy(dest, data, channels * sizeof(int32_t));
- for (unsigned int i=0; i<3; ++i)
+ for (unsigned int i=0; i<channels; ++i)
+ {
+ switch (type)
+ {
+ case CL_SIGNED_INT8:
+ ((int8_t *)dest)[i] = data[i];
+ break;
+ case CL_SIGNED_INT16:
+ ((int16_t *)dest)[i] = data[i];
+ break;
+ }
+ }
+}
+
+static void slow_convert_from_format4i(int32_t *data, void *source,
+ cl_channel_type type, unsigned int channels)
+{
+ // Convert always the four components of source to target
+ if (type == CL_SIGNED_INT32)
+ std::memcpy(data, source, channels * sizeof(int32_t));
+
+ for (unsigned int i=0; i<channels; ++i)
{
switch (type)
{
case CL_SIGNED_INT8:
- ((int8_t *)data)[i] = data[i];
+ data[i] = ((int8_t *)source)[i];
break;
case CL_SIGNED_INT16:
- ((int16_t *)data)[i] = data[i];
+ data[i] = ((int16_t *)source)[i];
break;
}
}
}
-static void slow_convert_to_format4ui(uint32_t *data, cl_channel_type type)
+static void slow_convert_to_format4ui(void *dest, uint32_t *data,
+ cl_channel_type type, unsigned int channels)
{
// Convert always the four components of source to target
if (type == CL_UNSIGNED_INT32)
- return;
+ std::memcpy(dest, data, channels * sizeof(uint32_t));
for (unsigned int i=0; i<3; ++i)
{
switch (type)
{
case CL_UNSIGNED_INT8:
- ((uint8_t *)data)[i] = data[i];
+ ((uint8_t *)dest)[i] = data[i];
break;
case CL_UNSIGNED_INT16:
- ((uint16_t *)data)[i] = data[i];
+ ((uint16_t *)dest)[i] = data[i];
+ break;
+ }
+ }
+}
+
+static void slow_convert_from_format4ui(uint32_t *data, void *source,
+ cl_channel_type type, unsigned int channels)
+{
+ // Convert always the four components of source to target
+ if (type == CL_UNSIGNED_INT32)
+ std::memcpy(data, source, channels * sizeof(uint32_t));
+
+ for (unsigned int i=0; i<channels; ++i)
+ {
+ switch (type)
+ {
+ case CL_UNSIGNED_INT8:
+ data[i] = ((uint8_t *)source)[i];
+ break;
+ case CL_UNSIGNED_INT16:
+ data[i] = ((uint16_t *)source)[i];
break;
}
}
@@ -138,14 +241,23 @@ static void slow_convert_to_format4ui(uint32_t *data, cl_channel_type type)
slow_shuffle4(rs, a, b, x, y, z, w)
#endif
- #define convert_to_format4f(data, type) \
- slow_convert_to_format4f(data, type)
+ #define convert_to_format4f(dest, data, type, channels) \
+ slow_convert_to_format4f(dest, data, type, channels)
+
+ #define convert_to_format4i(dest, data, type, channels) \
+ slow_convert_to_format4i(dest, data, type, channels)
+
+ #define convert_to_format4ui(data, source, type, channels) \
+ slow_convert_to_format4ui(data, source, type, channels)
+
+ #define convert_from_format4f(data, source, type, channels) \
+ slow_convert_from_format4f(data, source, type, channels)
- #define convert_to_format4i(data, type) \
- slow_convert_to_format4i(data, type)
+ #define convert_from_format4i(data, source, type, channels) \
+ slow_convert_from_format4i(data, source, type, channels)
- #define convert_to_format4ui(data, type) \
- slow_convert_to_format4ui(data, type)
+ #define convert_from_format4ui(data, source, type, channels) \
+ slow_convert_from_format4ui(data, source, type, channels)
static void swizzle(uint32_t *target, uint32_t *source,
cl_channel_order order, bool reading, uint32_t t_max)
@@ -250,14 +362,14 @@ void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z,
swizzle((uint32_t *)converted, (uint32_t *)color,
image->format().image_channel_order, false, 0);
- // Convert color to the correct format
- convert_to_format4f(converted, image->format().image_channel_data_type);
-
// Get a pointer in the image where to write the data
void *target = getImageData(image, x, y, z);
- // Copy the converted data to the image
- std::memcpy(target, converted, image->pixel_size());
+ // Convert color to the correct format
+ convert_to_format4f(target,
+ converted,
+ image->format().image_channel_data_type,
+ image->channels());
}
void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z,
@@ -270,14 +382,14 @@ void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z,
swizzle((uint32_t *)converted, (uint32_t *)color,
image->format().image_channel_order, false, 0);
- // Convert color to the correct format
- convert_to_format4i(converted, image->format().image_channel_data_type);
-
// Get a pointer in the image where to write the data
void *target = getImageData(image, x, y, z);
- // Copy the converted data to the image
- std::memcpy(target, converted, image->pixel_size());
+ // Convert color to the correct format
+ convert_to_format4i(target,
+ converted,
+ image->format().image_channel_data_type,
+ image->channels());
}
void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z,
@@ -287,15 +399,134 @@ void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z,
// Swizzle to the correct order (float, int and uint are 32-bit, so the
// type has no importance
- swizzle((uint32_t *)converted, (uint32_t *)color,
- image->format().image_channel_order, false, 0);
-
- // Convert color to the correct format
- convert_to_format4ui(converted, image->format().image_channel_data_type);
+ swizzle(converted, color, image->format().image_channel_order, false, 0);
// Get a pointer in the image where to write the data
void *target = getImageData(image, x, y, z);
- // Copy the converted data to the image
- std::memcpy(target, converted, image->pixel_size());
+ // Convert color to the correct format
+ convert_to_format4ui(target,
+ converted,
+ image->format().image_channel_data_type,
+ image->channels());
+}
+
+void CPUKernelWorkGroup::readImage(float *result, Image2D *image, int x, int y,
+ int z, int32_t sampler) const
+{
+ // Handle the addressing mode of the sampler
+ if (handle_address_mode(image, x, y, z, sampler))
+ {
+ // Border color
+ result[0] = 0.0f;
+ result[1] = 0.0f;
+ result[2] = 0.0f;
+
+ switch (image->format().image_channel_order)
+ {
+ case CL_R:
+ case CL_RG:
+ case CL_RGB:
+ case CL_LUMINANCE:
+ result[3] = 1.0f;
+ break;
+ default:
+ result[3] = 0.0f;
+ }
+
+ return;
+ }
+
+ // Load the data from the image, converting it
+ void *source = getImageData(image, x, y, z);
+ float converted[4];
+
+ convert_from_format4f(converted,
+ source,
+ image->format().image_channel_data_type,
+ image->channels());
+
+ // Swizzle the pixel just read and place it in result
+ swizzle((uint32_t *)result, (uint32_t *)converted,
+ image->format().image_channel_order, true, 1065353216 /* 1.0f */);
+}
+
+void CPUKernelWorkGroup::readImage(int32_t *result, Image2D *image, int x, int y,
+ int z, int32_t sampler) const
+{
+ // Handle the addressing mode of the sampler
+ if (handle_address_mode(image, x, y, z, sampler))
+ {
+ // Border color
+ result[0] = 0;
+ result[1] = 0;
+ result[2] = 0;
+
+ switch (image->format().image_channel_order)
+ {
+ case CL_R:
+ case CL_RG:
+ case CL_RGB:
+ case CL_LUMINANCE:
+ result[3] = 0x7fffffff;
+ break;
+ default:
+ result[3] = 0;
+ }
+
+ return;
+ }
+
+ // Load the data from the image, converting it
+ void *source = getImageData(image, x, y, z);
+ int32_t converted[4];
+
+ convert_from_format4i(converted,
+ source,
+ image->format().image_channel_data_type,
+ image->channels());
+
+ // Swizzle the pixel just read and place it in result
+ swizzle((uint32_t *)result, (uint32_t *)converted,
+ image->format().image_channel_order, true, 0x7fffffff);
+}
+
+void CPUKernelWorkGroup::readImage(uint32_t *result, Image2D *image, int x, int y,
+ int z, int32_t sampler) const
+{
+ // Handle the addressing mode of the sampler
+ if (handle_address_mode(image, x, y, z, sampler))
+ {
+ // Border color
+ result[0] = 0;
+ result[1] = 0;
+ result[2] = 0;
+
+ switch (image->format().image_channel_order)
+ {
+ case CL_R:
+ case CL_RG:
+ case CL_RGB:
+ case CL_LUMINANCE:
+ result[3] = 0xffffffff;
+ break;
+ default:
+ result[3] = 0;
+ }
+
+ return;
+ }
+
+ // Load the data from the image, converting it
+ void *source = getImageData(image, x, y, z);
+ uint32_t converted[4];
+
+ convert_from_format4ui(converted,
+ source,
+ image->format().image_channel_data_type,
+ image->channels());
+
+ // Swizzle the pixel just read and place it in result
+ swizzle(result, converted, image->format().image_channel_order, true,
+ 0x7fffffff);
} \ No newline at end of file
diff --git a/src/core/memobject.cpp b/src/core/memobject.cpp
index 47e1141..613d403 100644
--- a/src/core/memobject.cpp
+++ b/src/core/memobject.cpp
@@ -713,10 +713,8 @@ size_t Image2D::element_size(const cl_image_format &format)
}
}
-size_t Image2D::pixel_size(const cl_image_format &format)
+unsigned int Image2D::channels(const cl_image_format &format)
{
- size_t multiplier;
-
switch (format.image_channel_order)
{
case CL_R:
@@ -724,30 +722,33 @@ size_t Image2D::pixel_size(const cl_image_format &format)
case CL_A:
case CL_INTENSITY:
case CL_LUMINANCE:
- multiplier = 1;
+ return 1;
break;
case CL_RG:
case CL_RGx:
case CL_RA:
- multiplier = 2;
+ return 2;
break;
case CL_RGBA:
case CL_ARGB:
case CL_BGRA:
- multiplier = 4;
+ return 4;
break;
case CL_RGBx:
case CL_RGB:
- multiplier = 0; // Only special data types allowed (565, 555, etc)
+ return 1; // Only special data types allowed (565, 555, etc)
break;
default:
return 0;
}
+}
+size_t Image2D::pixel_size(const cl_image_format &format)
+{
switch (format.image_channel_data_type)
{
case CL_UNORM_SHORT_565:
@@ -756,15 +757,25 @@ size_t Image2D::pixel_size(const cl_image_format &format)
case CL_UNORM_INT_101010:
return 4;
default:
- return multiplier * element_size(format);
+ return channels(format) * element_size(format);
}
}
+size_t Image2D::element_size() const
+{
+ return element_size(p_format);
+}
+
size_t Image2D::pixel_size() const
{
return pixel_size(p_format);
}
+unsigned int Image2D::channels() const
+{
+ return channels(p_format);
+}
+
/*
* Image3D
*/
diff --git a/src/core/memobject.h b/src/core/memobject.h
index 2a81a03..09f07e9 100644
--- a/src/core/memobject.h
+++ b/src/core/memobject.h
@@ -230,9 +230,12 @@ class Image2D : public MemObject
void *param_value,
size_t *param_value_size_ret) const;
- static size_t element_size(const cl_image_format &format); /*!< \brief Size in bytes of each channel of \p format */
- static size_t pixel_size(const cl_image_format &format); /*!< \brief Size in bytes of a pixel in \p format */
- size_t pixel_size() const; /*!< \brief Pixel size of this image */
+ static size_t element_size(const cl_image_format &format); /*!< \brief Size in bytes of each channel of \p format */
+ static unsigned int channels(const cl_image_format &format);/*!< \brief Number of channels of \p format */
+ static size_t pixel_size(const cl_image_format &format); /*!< \brief Size in bytes of a pixel in \p format */
+ size_t pixel_size() const; /*!< \brief Pixel size of this image */
+ size_t element_size() const; /*!< \brief Channel size of this image */
+ unsigned int channels() const; /*!< \brief Number of channels of this image */
private:
size_t p_width, p_height, p_row_pitch;
diff --git a/src/runtime/stdlib.c b/src/runtime/stdlib.c
index cbb3ec8..4b6a0c2 100644
--- a/src/runtime/stdlib.c
+++ b/src/runtime/stdlib.c
@@ -54,6 +54,13 @@ void __cpu_write_imagef(void *image, int x, int y, int z, float4 *color);
void __cpu_write_imagei(void *image, int x, int y, int z, int4 *color);
void __cpu_write_imageui(void *image, int x, int y, int z, uint4 *color);
+void __cpu_read_imagefi(float4 *result, void *image, int x, int y, int z,
+ sampler_t sampler);
+void __cpu_read_imageii(int4 *result, void *image, int x, int y, int z,
+ sampler_t sampler);
+void __cpu_read_imageuii(uint4 *result, void *image, int x, int y, int z,
+ sampler_t sampler);
+
int4 handle_address_mode(image3d_t image, int4 coord, sampler_t sampler)
{
coord.w = 0;
@@ -87,147 +94,20 @@ int4 handle_address_mode(image3d_t image, int4 coord, sampler_t sampler)
float4 OVERLOAD read_imagef(image2d_t image, sampler_t sampler, int2 coord)
{
- int4 c;
- c.xy = coord;
- c.zw = 0;
+ float4 rs;
- return read_imagef((image3d_t)image, sampler, c);
+ __cpu_read_imagefi(&rs, image, coord.x, coord.y, 0, sampler);
+
+ return rs;
}
float4 OVERLOAD read_imagef(image3d_t image, sampler_t sampler, int4 coord)
{
- float4 result;
+ float4 rs;
- // Handle address mode
- coord = handle_address_mode(image, coord, sampler);
+ __cpu_read_imagefi(&rs, image, coord.x, coord.y, coord.z, sampler);
- if (coord.w != 0)
- {
- // Border color
- switch (get_image_channel_order(image))
- {
- case CLK_R:
- case CLK_RG:
- case CLK_RGB:
- case CLK_LUMINANCE:
- result.xyz = 0.0f;
- result.w = 1.0f;
- return result;
- default:
- result.xyzw = 0.0f;
- return result;
- }
- }
-
- int order, type;
- void *v_source = __cpu_image_data(image, coord.x, coord.y, coord.z, &order, &type);
-
-#define UNSWIZZLE(order, source, data, m)\
- switch (order) \
- { \
- case CLK_R: \
- case CLK_Rx: \
- data.x = (*source).x; \
- data.yz = 0; \
- data.w = m; \
- break; \
- case CLK_A: \
- data.w = (*source).x; \
- data.xyz = 0; \
- break; \
- case CLK_RG: \
- case CLK_RGx: \
- data.xy = (*source).xy; \
- data.z = 0; \
- data.w = m; \
- break; \
- case CLK_RA: \
- data.xw = (*source).xy; \
- data.yz = 0; \
- break; \
- case CLK_RGBA: \
- data = *source; \
- break; \
- case CLK_BGRA: \
- data.zyxw = (*source).xyzw; \
- break; \
- case CLK_ARGB: \
- data.wxyz = (*source).xyzw; \
- break; \
- case CLK_INTENSITY: \
- data.xyzw = (*source).x; \
- break; \
- case CLK_LUMINANCE: \
- data.xyz = (*source).x; \
- data.w = m; \
- break; \
- }
-
- switch (type)
- {
- case CLK_UNORM_INT8:
- {
- uchar4 *source = v_source;
- uchar4 data;
-
- UNSWIZZLE(order, source, data, 0xff)
-
- result.x = (float)data.x / 255.0f;
- result.y = (float)data.y / 255.0f;
- result.z = (float)data.z / 255.0f;
- result.w = (float)data.w / 255.0f;
- break;
- }
- case CLK_UNORM_INT16:
- {
- ushort4 *source = v_source;
- ushort4 data;
-
- UNSWIZZLE(order, source, data, 0xffff)
-
- result.x = (float)data.x / 65535.0f;
- result.y = (float)data.y / 65535.0f;
- result.z = (float)data.z / 65535.0f;
- result.w = (float)data.w / 65535.0f;
- break;
- }
- case CLK_SNORM_INT8:
- {
- char4 *source = v_source;
- char4 data;
-
- UNSWIZZLE(order, source, data, 0x7f)
-
- result.x = (float)data.x / 127.0f;
- result.y = (float)data.y / 127.0f;
- result.z = (float)data.z / 127.0f;
- result.w = (float)data.w / 127.0f;
- break;
- }
- case CLK_SNORM_INT16:
- {
- short4 *source = v_source;
- short4 data;
-
- UNSWIZZLE(order, source, data, 0x7fff)
-
- result.x = (float)data.x / 32767.0f;
- result.y = (float)data.y / 32767.0f;
- result.z = (float)data.z / 32767.0f;
- result.w = (float)data.w / 32767.0f;
- break;
- }
- case CLK_FLOAT:
- {
- float4 *source = v_source;
- UNSWIZZLE(order, source, result, 1.0f)
- break;
- }
- }
-
-#undef UNSWIZZLE
-
- return result;
+ return rs;
}
float4 OVERLOAD read_imagef(image2d_t image, sampler_t sampler, float2 coord)
@@ -485,138 +365,22 @@ float4 OVERLOAD read_imagef(image3d_t image, sampler_t sampler, float4 coord)
READ_IMAGE(float, f, 1.0f)
}
-#define UNSWIZZLE_8(source, data, m) \
- case CLK_ARGB: \
- data.wxyz = (*source).xyzw; \
- break; \
- case CLK_BGRA: \
- data.zyxw = (*source).xyzw; \
- break;
-
-#define UNSWIZZLE_16(source, data, m) \
- case CLK_INTENSITY: \
- data.xyzw = (*source).x; \
- break; \
- case CLK_LUMINANCE: \
- data.xyz = (*source).x; \
- data.w = m; \
- break;
-
-#define UNSWIZZLE_32(source, data, m) \
- case CLK_R: \
- case CLK_Rx: \
- data.x = (*source).x; \
- data.yz = 0; \
- data.w = m; \
- break; \
- case CLK_A: \
- data.w = (*source).x; \
- data.xyz = 0; \
- break; \
- case CLK_RG: \
- case CLK_RGx: \
- data.xy = (*source).xy; \
- data.z = 0; \
- data.w = m; \
- break; \
- case CLK_RA: \
- data.xw = (*source).xy; \
- data.yz = 0; \
- break; \
- case CLK_RGBA: \
- data = *source; \
- break;
-
int4 OVERLOAD read_imagei(image2d_t image, sampler_t sampler, int2 coord)
{
- int4 c;
- c.xy = coord;
- c.zw = 0;
+ int4 rs;
- return read_imagei((image3d_t)image, sampler, c);
+ __cpu_read_imageii(&rs, image, coord.x, coord.y, 0, sampler);
+
+ return rs;
}
int4 OVERLOAD read_imagei(image3d_t image, sampler_t sampler, int4 coord)
{
- int4 result;
+ int4 rs;
- // Handle address mode
- coord = handle_address_mode(image, coord, sampler);
+ __cpu_read_imageii(&rs, image, coord.x, coord.y, coord.z, sampler);
- if (coord.w != 0)
- {
- // Border color
- switch (get_image_channel_order(image))
- {
- case CLK_R:
- case CLK_RG:
- case CLK_RGB:
- case CLK_LUMINANCE:
- result.xyz = 0;
- result.w = 0x7fffffff;
- return result;
- default:
- result.xyzw = 0;
- return result;
- }
- }
-
- int order, type;
- void *v_source = __cpu_image_data(image, coord.x, coord.y, coord.z, &order, &type);
-
- switch (type)
- {
- case CLK_SIGNED_INT8:
- {
- char4 *source = v_source;
- char4 data;
-
- switch (order)
- {
- UNSWIZZLE_8(source, data, 0x7f)
- UNSWIZZLE_16(source, data, 0x7f)
- UNSWIZZLE_32(source, data, 0x7f)
- }
-
- result.x = data.x;
- result.y = data.y;
- result.z = data.z;
- result.w = data.w;
- break;
- }
- case CLK_SIGNED_INT16:
- {
- short4 *source = v_source;
- short4 data;
-
- switch (order)
- {
- UNSWIZZLE_8(source, data, 0x7fff)
- UNSWIZZLE_16(source, data, 0x7fff)
- UNSWIZZLE_32(source, data, 0x7fff)
- }
-
- result.x = data.x;
- result.y = data.y;
- result.z = data.z;
- result.w = data.w;
- break;
- }
- case CLK_SIGNED_INT32:
- {
- int4 *source = v_source;
-
- switch (order)
- {
- UNSWIZZLE_8(source, result, 0x7fffffff)
- UNSWIZZLE_16(source, result, 0x7fffffff)
- UNSWIZZLE_32(source, result, 0x7fffffff)
- }
- break;
- }
- }
-
- return result;
+ return rs;
}
int4 OVERLOAD read_imagei(image2d_t image, sampler_t sampler, float2 coord)
@@ -636,94 +400,20 @@ int4 OVERLOAD read_imagei(image3d_t image, sampler_t sampler, float4 coord)
uint4 OVERLOAD read_imageui(image2d_t image, sampler_t sampler, int2 coord)
{
- int4 c;
- c.xy = coord;
- c.zw = 0;
+ uint4 rs;
- return read_imageui((image3d_t)image, sampler, c);
+ __cpu_read_imageuii(&rs, image, coord.x, coord.y, 0, sampler);
+
+ return rs;
}
uint4 OVERLOAD read_imageui(image3d_t image, sampler_t sampler, int4 coord)
{
- uint4 result;
-
- // Handle address mode
- coord = handle_address_mode(image, coord, sampler);
-
- if (coord.w != 0)
- {
- // Border color
- switch (get_image_channel_order(image))
- {
- case CLK_R:
- case CLK_RG:
- case CLK_RGB:
- case CLK_LUMINANCE:
- result.xyz = 0;
- result.w = 0xffffffff;
- return result;
- default:
- result.xyzw = 0;
- return result;
- }
- }
+ uint4 rs;
- int order, type;
- void *v_source = __cpu_image_data(image, coord.x, coord.y, coord.z, &order, &type);
+ __cpu_read_imageuii(&rs, image, coord.x, coord.y, coord.z, sampler);
- switch (type)
- {
- case CLK_UNSIGNED_INT8:
- {
- uchar4 *source = v_source;
- uchar4 data;
-
- switch (order)
- {
- UNSWIZZLE_8(source, data, 0xff)
- UNSWIZZLE_16(source, data, 0xff)
- UNSWIZZLE_32(source, data, 0xff)
- }
-
- result.x = data.x;
- result.y = data.y;
- result.z = data.z;
- result.w = data.w;
- break;
- }
- case CLK_UNSIGNED_INT16:
- {
- ushort4 *source = v_source;
- ushort4 data;
-
- switch (order)
- {
- UNSWIZZLE_8(source, data, 0xffff)
- UNSWIZZLE_16(source, data, 0xffff)
- UNSWIZZLE_32(source, data, 0xffff)
- }
-
- result.x = data.x;
- result.y = data.y;
- result.z = data.z;
- result.w = data.w;
- break;
- }
- case CLK_UNSIGNED_INT32:
- {
- uint4 *source = v_source;
-
- switch (order)
- {
- UNSWIZZLE_8(source, result, 0xffffffff)
- UNSWIZZLE_16(source, result, 0xffffffff)
- UNSWIZZLE_32(source, result, 0xffffffff)
- }
- break;
- }
- }
-
- return result;
+ return rs;
}
uint4 OVERLOAD read_imageui(image2d_t image, sampler_t sampler, float2 coord)
@@ -741,10 +431,6 @@ uint4 OVERLOAD read_imageui(image3d_t image, sampler_t sampler, float4 coord)
READ_IMAGE(uint, ui, 0xffffffff)
}
-#undef UNSWIZZLE_8
-#undef UNSWIZZLE_16
-#undef UNSWIZZLE_32
-
void OVERLOAD write_imagef(image2d_t image, int2 coord, float4 color)
{
__cpu_write_imagef(image, coord.x, coord.y, 0, &color);