diff options
author | Denis Steckelmacher <steckdenis@yahoo.fr> | 2011-08-20 15:56:26 +0200 |
---|---|---|
committer | Denis Steckelmacher <steckdenis@yahoo.fr> | 2011-08-20 15:56:26 +0200 |
commit | beb531f333d3838e7a4690706a43293c4cee45ef (patch) | |
tree | 5cf0e031516c8223b3bfcf703d3f82a74e349867 | |
parent | 97917f6c1898a6eeb68c54ed640263bb435fc0ec (diff) |
Natively re-implement image reading functions taking integer coordinates.
-rw-r--r-- | src/core/cpu/builtins.cpp | 24 | ||||
-rw-r--r-- | src/core/cpu/kernel.h | 7 | ||||
-rw-r--r-- | src/core/cpu/sampler.cpp | 315 | ||||
-rw-r--r-- | src/core/memobject.cpp | 27 | ||||
-rw-r--r-- | src/core/memobject.h | 9 | ||||
-rw-r--r-- | src/runtime/stdlib.c | 370 |
6 files changed, 357 insertions, 395 deletions
diff --git a/src/core/cpu/builtins.cpp b/src/core/cpu/builtins.cpp index 081717c..265ba1f 100644 --- a/src/core/cpu/builtins.cpp +++ b/src/core/cpu/builtins.cpp @@ -353,6 +353,24 @@ static void write_imageui(Image2D *image, int x, int y, int z, uint32_t *color) g_work_group->writeImage(image, x, y, z, color); } +static void read_imagefi(float *result, Image2D *image, int x, int y, int z, + int32_t sampler) +{ + g_work_group->readImage(result, image, x, y, z, sampler); +} + +static void read_imageii(int32_t *result, Image2D *image, int x, int y, int z, + int32_t sampler) +{ + g_work_group->readImage(result, image, x, y, z, sampler); +} + +static void read_imageuii(uint32_t *result, Image2D *image, int x, int y, int z, + int32_t sampler) +{ + g_work_group->readImage(result, image, x, y, z, sampler); +} + /* * Bridge between LLVM and us */ @@ -401,6 +419,12 @@ void *getBuiltin(const std::string &name) return (void *)&write_imagei; else if (name == "__cpu_write_imageui") return (void *)&write_imageui; + else if (name == "__cpu_read_imagefi") + return (void *)&read_imagefi; + else if (name == "__cpu_read_imageii") + return (void *)&read_imageii; + else if (name == "__cpu_read_imageuii") + return (void *)&read_imageuii; else if (name == "debug") return (void *)&printf; diff --git a/src/core/cpu/kernel.h b/src/core/cpu/kernel.h index dec42e6..cb3a296 100644 --- a/src/core/cpu/kernel.h +++ b/src/core/cpu/kernel.h @@ -109,6 +109,13 @@ class CPUKernelWorkGroup void writeImage(Image2D *image, int x, int y, int z, int32_t *color) const; void writeImage(Image2D *image, int x, int y, int z, uint32_t *color) const; + void readImage(float *result, Image2D *image, int x, int y, int z, + int32_t sampler) const; + void readImage(int32_t *result, Image2D *image, int x, int y, int z, + int32_t sampler) const; + void readImage(uint32_t *result, Image2D *image, int x, int y, int z, + int32_t sampler) const; + void builtinNotFound(const std::string &name) const; private: diff --git a/src/core/cpu/sampler.cpp b/src/core/cpu/sampler.cpp index c173004..58b3bd3 100644 --- a/src/core/cpu/sampler.cpp +++ b/src/core/cpu/sampler.cpp @@ -35,6 +35,7 @@ */ #include "../memobject.h" +#include "../sampler.h" #include "kernel.h" #include "buffer.h" #include "builtins.h" @@ -45,6 +46,39 @@ using namespace Coal; /* + * Helper functions + */ + +static int clamp(int a, int b, int c) +{ + return (a < b) ? b : ((a > c) ? c : a); +} + +static bool handle_address_mode(Image2D *image, int &x, int &y, int &z, + uint32_t sampler) +{ + bool is_3d = (image->type() == MemObject::Image3D); + int w = image->width(), + h = image->height(), + d = (is_3d ? ((Image3D *)image)->depth() : 1); + + if ((sampler & 0xf0) == CLK_ADDRESS_CLAMP_TO_EDGE) + { + x = clamp(x, 0, w - 1); + y = clamp(y, 0, h - 1); + if (is_3d) z = clamp(z, 0, d - 1); + } + else if ((sampler & 0xf0) == CLK_ADDRESS_CLAMP) + { + x = clamp(x, 0, w); + y = clamp(y, 0, h); + if (is_3d) z = clamp(z, 0, d); + } + + return (x == w || y == h || z == d); +} + +/* * Macros or functions used to accelerate the functions */ #ifndef __has_builtin @@ -60,70 +94,139 @@ static void slow_shuffle4(uint32_t *rs, uint32_t *a, uint32_t *b, rs[3] = (w < 4 ? a[w] : b[w - 4]); } -static void slow_convert_to_format4f(float *data, cl_channel_type type) +static void slow_convert_to_format4f(void *dest, float *data, + cl_channel_type type, unsigned int channels) { // Convert always the four components of source to target if (type == CL_FLOAT) - return; + std::memcpy(dest, data, channels * sizeof(float)); - // NOTE: We can read and write at the same time in data because - // we always begin wy reading 4 bytes (float) and never write - // more than 4 bytes, so no data is corrupted - for (unsigned int i=0; i<3; ++i) + for (unsigned int i=0; i<channels; ++i) + { + switch (type) + { + case CL_SNORM_INT8: + ((int8_t *)dest)[i] = data[i] * 128.0f; + break; + case CL_SNORM_INT16: + ((int16_t *)dest)[i] = data[i] * 32767.0f; + break; + case CL_UNORM_INT8: + ((uint8_t *)dest)[i] = data[i] * 255.0f; + break; + case CL_UNORM_INT16: + ((uint16_t *)dest)[i] = data[i] * 65535.0f; + break; + } + } +} + +static void slow_convert_from_format4f(float *data, void *source, + cl_channel_type type, unsigned int channels) +{ + // Convert always the four components of source to target + if (type == CL_FLOAT) + std::memcpy(data, source, channels * sizeof(float)); + + for (unsigned int i=0; i<channels; ++i) { switch (type) { case CL_SNORM_INT8: - ((int8_t *)data)[i] = data[i] * 128.0f; + data[i] = (float)((int8_t *)source)[i] / 127.0f; break; case CL_SNORM_INT16: - ((int16_t *)data)[i] = data[i] * 32767.0f; + data[i] = (float)((int16_t *)source)[i] / 32767.0f; break; case CL_UNORM_INT8: - ((uint8_t *)data)[i] = data[i] * 256.0f; + data[i] = (float)((uint8_t *)source)[i] / 127.0f; break; case CL_UNORM_INT16: - ((uint16_t *)data)[i] = data[i] * 65535.0f; + data[i] = (float)((uint16_t *)source)[i] / 127.0f; break; } } } -static void slow_convert_to_format4i(int *data, cl_channel_type type) +static void slow_convert_to_format4i(void *dest, int *data, + cl_channel_type type, unsigned int channels) { // Convert always the four components of source to target if (type == CL_SIGNED_INT32) - return; + std::memcpy(dest, data, channels * sizeof(int32_t)); - for (unsigned int i=0; i<3; ++i) + for (unsigned int i=0; i<channels; ++i) + { + switch (type) + { + case CL_SIGNED_INT8: + ((int8_t *)dest)[i] = data[i]; + break; + case CL_SIGNED_INT16: + ((int16_t *)dest)[i] = data[i]; + break; + } + } +} + +static void slow_convert_from_format4i(int32_t *data, void *source, + cl_channel_type type, unsigned int channels) +{ + // Convert always the four components of source to target + if (type == CL_SIGNED_INT32) + std::memcpy(data, source, channels * sizeof(int32_t)); + + for (unsigned int i=0; i<channels; ++i) { switch (type) { case CL_SIGNED_INT8: - ((int8_t *)data)[i] = data[i]; + data[i] = ((int8_t *)source)[i]; break; case CL_SIGNED_INT16: - ((int16_t *)data)[i] = data[i]; + data[i] = ((int16_t *)source)[i]; break; } } } -static void slow_convert_to_format4ui(uint32_t *data, cl_channel_type type) +static void slow_convert_to_format4ui(void *dest, uint32_t *data, + cl_channel_type type, unsigned int channels) { // Convert always the four components of source to target if (type == CL_UNSIGNED_INT32) - return; + std::memcpy(dest, data, channels * sizeof(uint32_t)); for (unsigned int i=0; i<3; ++i) { switch (type) { case CL_UNSIGNED_INT8: - ((uint8_t *)data)[i] = data[i]; + ((uint8_t *)dest)[i] = data[i]; break; case CL_UNSIGNED_INT16: - ((uint16_t *)data)[i] = data[i]; + ((uint16_t *)dest)[i] = data[i]; + break; + } + } +} + +static void slow_convert_from_format4ui(uint32_t *data, void *source, + cl_channel_type type, unsigned int channels) +{ + // Convert always the four components of source to target + if (type == CL_UNSIGNED_INT32) + std::memcpy(data, source, channels * sizeof(uint32_t)); + + for (unsigned int i=0; i<channels; ++i) + { + switch (type) + { + case CL_UNSIGNED_INT8: + data[i] = ((uint8_t *)source)[i]; + break; + case CL_UNSIGNED_INT16: + data[i] = ((uint16_t *)source)[i]; break; } } @@ -138,14 +241,23 @@ static void slow_convert_to_format4ui(uint32_t *data, cl_channel_type type) slow_shuffle4(rs, a, b, x, y, z, w) #endif - #define convert_to_format4f(data, type) \ - slow_convert_to_format4f(data, type) + #define convert_to_format4f(dest, data, type, channels) \ + slow_convert_to_format4f(dest, data, type, channels) + + #define convert_to_format4i(dest, data, type, channels) \ + slow_convert_to_format4i(dest, data, type, channels) + + #define convert_to_format4ui(data, source, type, channels) \ + slow_convert_to_format4ui(data, source, type, channels) + + #define convert_from_format4f(data, source, type, channels) \ + slow_convert_from_format4f(data, source, type, channels) - #define convert_to_format4i(data, type) \ - slow_convert_to_format4i(data, type) + #define convert_from_format4i(data, source, type, channels) \ + slow_convert_from_format4i(data, source, type, channels) - #define convert_to_format4ui(data, type) \ - slow_convert_to_format4ui(data, type) + #define convert_from_format4ui(data, source, type, channels) \ + slow_convert_from_format4ui(data, source, type, channels) static void swizzle(uint32_t *target, uint32_t *source, cl_channel_order order, bool reading, uint32_t t_max) @@ -250,14 +362,14 @@ void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z, swizzle((uint32_t *)converted, (uint32_t *)color, image->format().image_channel_order, false, 0); - // Convert color to the correct format - convert_to_format4f(converted, image->format().image_channel_data_type); - // Get a pointer in the image where to write the data void *target = getImageData(image, x, y, z); - // Copy the converted data to the image - std::memcpy(target, converted, image->pixel_size()); + // Convert color to the correct format + convert_to_format4f(target, + converted, + image->format().image_channel_data_type, + image->channels()); } void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z, @@ -270,14 +382,14 @@ void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z, swizzle((uint32_t *)converted, (uint32_t *)color, image->format().image_channel_order, false, 0); - // Convert color to the correct format - convert_to_format4i(converted, image->format().image_channel_data_type); - // Get a pointer in the image where to write the data void *target = getImageData(image, x, y, z); - // Copy the converted data to the image - std::memcpy(target, converted, image->pixel_size()); + // Convert color to the correct format + convert_to_format4i(target, + converted, + image->format().image_channel_data_type, + image->channels()); } void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z, @@ -287,15 +399,134 @@ void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z, // Swizzle to the correct order (float, int and uint are 32-bit, so the // type has no importance - swizzle((uint32_t *)converted, (uint32_t *)color, - image->format().image_channel_order, false, 0); - - // Convert color to the correct format - convert_to_format4ui(converted, image->format().image_channel_data_type); + swizzle(converted, color, image->format().image_channel_order, false, 0); // Get a pointer in the image where to write the data void *target = getImageData(image, x, y, z); - // Copy the converted data to the image - std::memcpy(target, converted, image->pixel_size()); + // Convert color to the correct format + convert_to_format4ui(target, + converted, + image->format().image_channel_data_type, + image->channels()); +} + +void CPUKernelWorkGroup::readImage(float *result, Image2D *image, int x, int y, + int z, int32_t sampler) const +{ + // Handle the addressing mode of the sampler + if (handle_address_mode(image, x, y, z, sampler)) + { + // Border color + result[0] = 0.0f; + result[1] = 0.0f; + result[2] = 0.0f; + + switch (image->format().image_channel_order) + { + case CL_R: + case CL_RG: + case CL_RGB: + case CL_LUMINANCE: + result[3] = 1.0f; + break; + default: + result[3] = 0.0f; + } + + return; + } + + // Load the data from the image, converting it + void *source = getImageData(image, x, y, z); + float converted[4]; + + convert_from_format4f(converted, + source, + image->format().image_channel_data_type, + image->channels()); + + // Swizzle the pixel just read and place it in result + swizzle((uint32_t *)result, (uint32_t *)converted, + image->format().image_channel_order, true, 1065353216 /* 1.0f */); +} + +void CPUKernelWorkGroup::readImage(int32_t *result, Image2D *image, int x, int y, + int z, int32_t sampler) const +{ + // Handle the addressing mode of the sampler + if (handle_address_mode(image, x, y, z, sampler)) + { + // Border color + result[0] = 0; + result[1] = 0; + result[2] = 0; + + switch (image->format().image_channel_order) + { + case CL_R: + case CL_RG: + case CL_RGB: + case CL_LUMINANCE: + result[3] = 0x7fffffff; + break; + default: + result[3] = 0; + } + + return; + } + + // Load the data from the image, converting it + void *source = getImageData(image, x, y, z); + int32_t converted[4]; + + convert_from_format4i(converted, + source, + image->format().image_channel_data_type, + image->channels()); + + // Swizzle the pixel just read and place it in result + swizzle((uint32_t *)result, (uint32_t *)converted, + image->format().image_channel_order, true, 0x7fffffff); +} + +void CPUKernelWorkGroup::readImage(uint32_t *result, Image2D *image, int x, int y, + int z, int32_t sampler) const +{ + // Handle the addressing mode of the sampler + if (handle_address_mode(image, x, y, z, sampler)) + { + // Border color + result[0] = 0; + result[1] = 0; + result[2] = 0; + + switch (image->format().image_channel_order) + { + case CL_R: + case CL_RG: + case CL_RGB: + case CL_LUMINANCE: + result[3] = 0xffffffff; + break; + default: + result[3] = 0; + } + + return; + } + + // Load the data from the image, converting it + void *source = getImageData(image, x, y, z); + uint32_t converted[4]; + + convert_from_format4ui(converted, + source, + image->format().image_channel_data_type, + image->channels()); + + // Swizzle the pixel just read and place it in result + swizzle(result, converted, image->format().image_channel_order, true, + 0x7fffffff); }
\ No newline at end of file diff --git a/src/core/memobject.cpp b/src/core/memobject.cpp index 47e1141..613d403 100644 --- a/src/core/memobject.cpp +++ b/src/core/memobject.cpp @@ -713,10 +713,8 @@ size_t Image2D::element_size(const cl_image_format &format) } } -size_t Image2D::pixel_size(const cl_image_format &format) +unsigned int Image2D::channels(const cl_image_format &format) { - size_t multiplier; - switch (format.image_channel_order) { case CL_R: @@ -724,30 +722,33 @@ size_t Image2D::pixel_size(const cl_image_format &format) case CL_A: case CL_INTENSITY: case CL_LUMINANCE: - multiplier = 1; + return 1; break; case CL_RG: case CL_RGx: case CL_RA: - multiplier = 2; + return 2; break; case CL_RGBA: case CL_ARGB: case CL_BGRA: - multiplier = 4; + return 4; break; case CL_RGBx: case CL_RGB: - multiplier = 0; // Only special data types allowed (565, 555, etc) + return 1; // Only special data types allowed (565, 555, etc) break; default: return 0; } +} +size_t Image2D::pixel_size(const cl_image_format &format) +{ switch (format.image_channel_data_type) { case CL_UNORM_SHORT_565: @@ -756,15 +757,25 @@ size_t Image2D::pixel_size(const cl_image_format &format) case CL_UNORM_INT_101010: return 4; default: - return multiplier * element_size(format); + return channels(format) * element_size(format); } } +size_t Image2D::element_size() const +{ + return element_size(p_format); +} + size_t Image2D::pixel_size() const { return pixel_size(p_format); } +unsigned int Image2D::channels() const +{ + return channels(p_format); +} + /* * Image3D */ diff --git a/src/core/memobject.h b/src/core/memobject.h index 2a81a03..09f07e9 100644 --- a/src/core/memobject.h +++ b/src/core/memobject.h @@ -230,9 +230,12 @@ class Image2D : public MemObject void *param_value, size_t *param_value_size_ret) const; - static size_t element_size(const cl_image_format &format); /*!< \brief Size in bytes of each channel of \p format */ - static size_t pixel_size(const cl_image_format &format); /*!< \brief Size in bytes of a pixel in \p format */ - size_t pixel_size() const; /*!< \brief Pixel size of this image */ + static size_t element_size(const cl_image_format &format); /*!< \brief Size in bytes of each channel of \p format */ + static unsigned int channels(const cl_image_format &format);/*!< \brief Number of channels of \p format */ + static size_t pixel_size(const cl_image_format &format); /*!< \brief Size in bytes of a pixel in \p format */ + size_t pixel_size() const; /*!< \brief Pixel size of this image */ + size_t element_size() const; /*!< \brief Channel size of this image */ + unsigned int channels() const; /*!< \brief Number of channels of this image */ private: size_t p_width, p_height, p_row_pitch; diff --git a/src/runtime/stdlib.c b/src/runtime/stdlib.c index cbb3ec8..4b6a0c2 100644 --- a/src/runtime/stdlib.c +++ b/src/runtime/stdlib.c @@ -54,6 +54,13 @@ void __cpu_write_imagef(void *image, int x, int y, int z, float4 *color); void __cpu_write_imagei(void *image, int x, int y, int z, int4 *color); void __cpu_write_imageui(void *image, int x, int y, int z, uint4 *color); +void __cpu_read_imagefi(float4 *result, void *image, int x, int y, int z, + sampler_t sampler); +void __cpu_read_imageii(int4 *result, void *image, int x, int y, int z, + sampler_t sampler); +void __cpu_read_imageuii(uint4 *result, void *image, int x, int y, int z, + sampler_t sampler); + int4 handle_address_mode(image3d_t image, int4 coord, sampler_t sampler) { coord.w = 0; @@ -87,147 +94,20 @@ int4 handle_address_mode(image3d_t image, int4 coord, sampler_t sampler) float4 OVERLOAD read_imagef(image2d_t image, sampler_t sampler, int2 coord) { - int4 c; - c.xy = coord; - c.zw = 0; + float4 rs; - return read_imagef((image3d_t)image, sampler, c); + __cpu_read_imagefi(&rs, image, coord.x, coord.y, 0, sampler); + + return rs; } float4 OVERLOAD read_imagef(image3d_t image, sampler_t sampler, int4 coord) { - float4 result; + float4 rs; - // Handle address mode - coord = handle_address_mode(image, coord, sampler); + __cpu_read_imagefi(&rs, image, coord.x, coord.y, coord.z, sampler); - if (coord.w != 0) - { - // Border color - switch (get_image_channel_order(image)) - { - case CLK_R: - case CLK_RG: - case CLK_RGB: - case CLK_LUMINANCE: - result.xyz = 0.0f; - result.w = 1.0f; - return result; - default: - result.xyzw = 0.0f; - return result; - } - } - - int order, type; - void *v_source = __cpu_image_data(image, coord.x, coord.y, coord.z, &order, &type); - -#define UNSWIZZLE(order, source, data, m)\ - switch (order) \ - { \ - case CLK_R: \ - case CLK_Rx: \ - data.x = (*source).x; \ - data.yz = 0; \ - data.w = m; \ - break; \ - case CLK_A: \ - data.w = (*source).x; \ - data.xyz = 0; \ - break; \ - case CLK_RG: \ - case CLK_RGx: \ - data.xy = (*source).xy; \ - data.z = 0; \ - data.w = m; \ - break; \ - case CLK_RA: \ - data.xw = (*source).xy; \ - data.yz = 0; \ - break; \ - case CLK_RGBA: \ - data = *source; \ - break; \ - case CLK_BGRA: \ - data.zyxw = (*source).xyzw; \ - break; \ - case CLK_ARGB: \ - data.wxyz = (*source).xyzw; \ - break; \ - case CLK_INTENSITY: \ - data.xyzw = (*source).x; \ - break; \ - case CLK_LUMINANCE: \ - data.xyz = (*source).x; \ - data.w = m; \ - break; \ - } - - switch (type) - { - case CLK_UNORM_INT8: - { - uchar4 *source = v_source; - uchar4 data; - - UNSWIZZLE(order, source, data, 0xff) - - result.x = (float)data.x / 255.0f; - result.y = (float)data.y / 255.0f; - result.z = (float)data.z / 255.0f; - result.w = (float)data.w / 255.0f; - break; - } - case CLK_UNORM_INT16: - { - ushort4 *source = v_source; - ushort4 data; - - UNSWIZZLE(order, source, data, 0xffff) - - result.x = (float)data.x / 65535.0f; - result.y = (float)data.y / 65535.0f; - result.z = (float)data.z / 65535.0f; - result.w = (float)data.w / 65535.0f; - break; - } - case CLK_SNORM_INT8: - { - char4 *source = v_source; - char4 data; - - UNSWIZZLE(order, source, data, 0x7f) - - result.x = (float)data.x / 127.0f; - result.y = (float)data.y / 127.0f; - result.z = (float)data.z / 127.0f; - result.w = (float)data.w / 127.0f; - break; - } - case CLK_SNORM_INT16: - { - short4 *source = v_source; - short4 data; - - UNSWIZZLE(order, source, data, 0x7fff) - - result.x = (float)data.x / 32767.0f; - result.y = (float)data.y / 32767.0f; - result.z = (float)data.z / 32767.0f; - result.w = (float)data.w / 32767.0f; - break; - } - case CLK_FLOAT: - { - float4 *source = v_source; - UNSWIZZLE(order, source, result, 1.0f) - break; - } - } - -#undef UNSWIZZLE - - return result; + return rs; } float4 OVERLOAD read_imagef(image2d_t image, sampler_t sampler, float2 coord) @@ -485,138 +365,22 @@ float4 OVERLOAD read_imagef(image3d_t image, sampler_t sampler, float4 coord) READ_IMAGE(float, f, 1.0f) } -#define UNSWIZZLE_8(source, data, m) \ - case CLK_ARGB: \ - data.wxyz = (*source).xyzw; \ - break; \ - case CLK_BGRA: \ - data.zyxw = (*source).xyzw; \ - break; - -#define UNSWIZZLE_16(source, data, m) \ - case CLK_INTENSITY: \ - data.xyzw = (*source).x; \ - break; \ - case CLK_LUMINANCE: \ - data.xyz = (*source).x; \ - data.w = m; \ - break; - -#define UNSWIZZLE_32(source, data, m) \ - case CLK_R: \ - case CLK_Rx: \ - data.x = (*source).x; \ - data.yz = 0; \ - data.w = m; \ - break; \ - case CLK_A: \ - data.w = (*source).x; \ - data.xyz = 0; \ - break; \ - case CLK_RG: \ - case CLK_RGx: \ - data.xy = (*source).xy; \ - data.z = 0; \ - data.w = m; \ - break; \ - case CLK_RA: \ - data.xw = (*source).xy; \ - data.yz = 0; \ - break; \ - case CLK_RGBA: \ - data = *source; \ - break; - int4 OVERLOAD read_imagei(image2d_t image, sampler_t sampler, int2 coord) { - int4 c; - c.xy = coord; - c.zw = 0; + int4 rs; - return read_imagei((image3d_t)image, sampler, c); + __cpu_read_imageii(&rs, image, coord.x, coord.y, 0, sampler); + + return rs; } int4 OVERLOAD read_imagei(image3d_t image, sampler_t sampler, int4 coord) { - int4 result; + int4 rs; - // Handle address mode - coord = handle_address_mode(image, coord, sampler); + __cpu_read_imageii(&rs, image, coord.x, coord.y, coord.z, sampler); - if (coord.w != 0) - { - // Border color - switch (get_image_channel_order(image)) - { - case CLK_R: - case CLK_RG: - case CLK_RGB: - case CLK_LUMINANCE: - result.xyz = 0; - result.w = 0x7fffffff; - return result; - default: - result.xyzw = 0; - return result; - } - } - - int order, type; - void *v_source = __cpu_image_data(image, coord.x, coord.y, coord.z, &order, &type); - - switch (type) - { - case CLK_SIGNED_INT8: - { - char4 *source = v_source; - char4 data; - - switch (order) - { - UNSWIZZLE_8(source, data, 0x7f) - UNSWIZZLE_16(source, data, 0x7f) - UNSWIZZLE_32(source, data, 0x7f) - } - - result.x = data.x; - result.y = data.y; - result.z = data.z; - result.w = data.w; - break; - } - case CLK_SIGNED_INT16: - { - short4 *source = v_source; - short4 data; - - switch (order) - { - UNSWIZZLE_8(source, data, 0x7fff) - UNSWIZZLE_16(source, data, 0x7fff) - UNSWIZZLE_32(source, data, 0x7fff) - } - - result.x = data.x; - result.y = data.y; - result.z = data.z; - result.w = data.w; - break; - } - case CLK_SIGNED_INT32: - { - int4 *source = v_source; - - switch (order) - { - UNSWIZZLE_8(source, result, 0x7fffffff) - UNSWIZZLE_16(source, result, 0x7fffffff) - UNSWIZZLE_32(source, result, 0x7fffffff) - } - break; - } - } - - return result; + return rs; } int4 OVERLOAD read_imagei(image2d_t image, sampler_t sampler, float2 coord) @@ -636,94 +400,20 @@ int4 OVERLOAD read_imagei(image3d_t image, sampler_t sampler, float4 coord) uint4 OVERLOAD read_imageui(image2d_t image, sampler_t sampler, int2 coord) { - int4 c; - c.xy = coord; - c.zw = 0; + uint4 rs; - return read_imageui((image3d_t)image, sampler, c); + __cpu_read_imageuii(&rs, image, coord.x, coord.y, 0, sampler); + + return rs; } uint4 OVERLOAD read_imageui(image3d_t image, sampler_t sampler, int4 coord) { - uint4 result; - - // Handle address mode - coord = handle_address_mode(image, coord, sampler); - - if (coord.w != 0) - { - // Border color - switch (get_image_channel_order(image)) - { - case CLK_R: - case CLK_RG: - case CLK_RGB: - case CLK_LUMINANCE: - result.xyz = 0; - result.w = 0xffffffff; - return result; - default: - result.xyzw = 0; - return result; - } - } + uint4 rs; - int order, type; - void *v_source = __cpu_image_data(image, coord.x, coord.y, coord.z, &order, &type); + __cpu_read_imageuii(&rs, image, coord.x, coord.y, coord.z, sampler); - switch (type) - { - case CLK_UNSIGNED_INT8: - { - uchar4 *source = v_source; - uchar4 data; - - switch (order) - { - UNSWIZZLE_8(source, data, 0xff) - UNSWIZZLE_16(source, data, 0xff) - UNSWIZZLE_32(source, data, 0xff) - } - - result.x = data.x; - result.y = data.y; - result.z = data.z; - result.w = data.w; - break; - } - case CLK_UNSIGNED_INT16: - { - ushort4 *source = v_source; - ushort4 data; - - switch (order) - { - UNSWIZZLE_8(source, data, 0xffff) - UNSWIZZLE_16(source, data, 0xffff) - UNSWIZZLE_32(source, data, 0xffff) - } - - result.x = data.x; - result.y = data.y; - result.z = data.z; - result.w = data.w; - break; - } - case CLK_UNSIGNED_INT32: - { - uint4 *source = v_source; - - switch (order) - { - UNSWIZZLE_8(source, result, 0xffffffff) - UNSWIZZLE_16(source, result, 0xffffffff) - UNSWIZZLE_32(source, result, 0xffffffff) - } - break; - } - } - - return result; + return rs; } uint4 OVERLOAD read_imageui(image2d_t image, sampler_t sampler, float2 coord) @@ -741,10 +431,6 @@ uint4 OVERLOAD read_imageui(image3d_t image, sampler_t sampler, float4 coord) READ_IMAGE(uint, ui, 0xffffffff) } -#undef UNSWIZZLE_8 -#undef UNSWIZZLE_16 -#undef UNSWIZZLE_32 - void OVERLOAD write_imagef(image2d_t image, int2 coord, float4 color) { __cpu_write_imagef(image, coord.x, coord.y, 0, &color); |