diff options
author | Zhigang Gong <zhigang.gong@linux.intel.com> | 2014-12-14 00:34:20 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@linux.intel.com> | 2014-12-14 00:34:20 +0800 |
commit | 9bacda9c9f733d1b47c3dc3c36b83b2ed7df7ae8 (patch) | |
tree | a78e07c65a6028a88c6d229fac6c8b17bc5c2444 | |
parent | 1268bb1c7394d8a75482102a5ea9e6fc2a7421cc (diff) |
GBE/CL: use 2D image to implement large image1D_buffer.image_refine
Per OpenCL spec, the minimum CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is 65536
which is too large for 1D surface on Gen platforms.
Have to use a 2D surface to implement it. As OpenCL spec only allows
the image1d_t to be accessed via default sampler, it is doable as it
will never use a float coordinates and never use linear non-nearest
filters.
Signed-off-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r-- | backend/src/libocl/src/ocl_image.cl | 20 | ||||
-rw-r--r-- | src/cl_gt_device.h | 2 | ||||
-rw-r--r-- | src/cl_mem.c | 53 | ||||
-rw-r--r-- | src/cl_mem.h | 6 | ||||
-rw-r--r-- | src/intel/intel_gpgpu.c | 2 |
5 files changed, 68 insertions, 15 deletions
diff --git a/backend/src/libocl/src/ocl_image.cl b/backend/src/libocl/src/ocl_image.cl index 8777d9fc..8bbd1e28 100644 --- a/backend/src/libocl/src/ocl_image.cl +++ b/backend/src/libocl/src/ocl_image.cl @@ -52,7 +52,7 @@ OVERLOADABLE int __gen_ocl_get_image_depth(image_type image); \ DECL_GEN_OCL_RW_IMAGE(image1d_t, 1) -DECL_GEN_OCL_RW_IMAGE(image1d_buffer_t, 1) +DECL_GEN_OCL_RW_IMAGE(image1d_buffer_t, 2) DECL_GEN_OCL_RW_IMAGE(image1d_array_t, 2) DECL_GEN_OCL_RW_IMAGE(image1d_array_t, 4) DECL_GEN_OCL_RW_IMAGE(image2d_t, 2) @@ -370,9 +370,23 @@ DECL_IMAGE_TYPE(image3d_t, 3) DECL_IMAGE_TYPE(image2d_array_t, 4) DECL_IMAGE_TYPE(image2d_array_t, 3) +#define DECL_READ_IMAGE1D_BUFFER_NOSAMPLER(image_type, image_data_type, \ + suffix, coord_type) \ + OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \ + coord_type coord) \ + { \ + sampler_t defaultSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE \ + | CLK_FILTER_NEAREST; \ + int2 effectCoord; \ + effectCoord.s0 = coord % 8192; \ + effectCoord.s1 = coord / 8192; \ + return __gen_ocl_read_image ##suffix( \ + cl_image, defaultSampler, effectCoord, 0); \ + } + #define DECL_IMAGE_1DBuffer(int_clamping_fix, image_data_type, suffix) \ - DECL_READ_IMAGE_NOSAMPLER(image1d_buffer_t, image_data_type, \ - suffix, int) \ + DECL_READ_IMAGE1D_BUFFER_NOSAMPLER(image1d_buffer_t, image_data_type, \ + suffix, int) \ DECL_WRITE_IMAGE(image1d_buffer_t, image_data_type, suffix, int) DECL_IMAGE_1DBuffer(GEN_FIX_INT_CLAMPING, int4, i) diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index ed19f109..4faa15a1 100644 --- a/src/cl_gt_device.h +++ b/src/cl_gt_device.h @@ -50,7 +50,7 @@ .image3d_max_width = 8192, .image3d_max_height = 8192, .image3d_max_depth = 2048, -.image_mem_size = 8192, +.image_mem_size = 65536, .max_samplers = 16, .mem_base_addr_align = sizeof(cl_long) * 16 * 8, .min_data_type_align_size = sizeof(cl_long) * 16, diff --git a/src/cl_mem.c b/src/cl_mem.c index 3055bea5..1fb25ba2 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -190,10 +190,18 @@ cl_get_image_info(cl_mem mem, *(size_t *)param_value = image->slice_pitch; break; case CL_IMAGE_WIDTH: - *(size_t *)param_value = image->w; + + if (mem->type == CL_MEM_BUFFER1D_IMAGE_TYPE) { + struct _cl_mem_buffer1d_image *buffer1d_image = (struct _cl_mem_buffer1d_image*) image; + *(size_t *)param_value = buffer1d_image->size; + } else + *(size_t *)param_value = image->w; break; case CL_IMAGE_HEIGHT: - *(size_t *)param_value = IS_1D(image) ? 0 : image->h; + if (mem->type == CL_MEM_BUFFER1D_IMAGE_TYPE) + *(size_t *)param_value = 0; + else + *(size_t *)param_value = IS_1D(image) ? 0 : image->h; break; case CL_IMAGE_DEPTH: *(size_t *)param_value = IS_3D(image) ? image->depth : 0; @@ -243,6 +251,10 @@ cl_mem_allocate(enum cl_mem_type type, struct _cl_mem_gl_image *gl_image = NULL; TRY_ALLOC (gl_image, CALLOC(struct _cl_mem_gl_image)); mem = &gl_image->base.base; + } else if (type == CL_MEM_BUFFER1D_IMAGE_TYPE) { + struct _cl_mem_buffer1d_image *buffer1d_image = NULL; + TRY_ALLOC(buffer1d_image, CALLOC(struct _cl_mem_buffer1d_image)); + mem = &buffer1d_image->base.base; } else { struct _cl_mem_buffer *buffer = NULL; TRY_ALLOC (buffer, CALLOC(struct _cl_mem_buffer)); @@ -678,6 +690,7 @@ _cl_mem_new_image(cl_context ctx, cl_mem_object_type image_type = orig_image_type; uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT; size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h = 0; + size_t origin_width = w; // for image1d buffer work around. cl_image_tiling_t tiling = CL_NO_TILE; /* Check flags consistency */ @@ -710,8 +723,7 @@ _cl_mem_new_image(cl_context ctx, image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER))) DO_IMAGE_ERROR; - if (image_type == CL_MEM_OBJECT_IMAGE1D || - image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { + if (image_type == CL_MEM_OBJECT_IMAGE1D) { size_t min_pitch = bpp * w; if (data && pitch == 0) pitch = min_pitch; @@ -724,19 +736,31 @@ _cl_mem_new_image(cl_context ctx, if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR; if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR; tiling = CL_NO_TILE; - } else if (image_type == CL_MEM_OBJECT_IMAGE2D) { + } else if (image_type == CL_MEM_OBJECT_IMAGE2D || + image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { + + if (image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { + if (UNLIKELY(w > ctx->device->image_mem_size)) DO_IMAGE_ERROR; + /* This is an image1d buffer which exceeds normal image size restrication + We have to use a 2D image to simulate this 1D image. */ + h = (w + ctx->device->image2d_max_width - 1) / ctx->device->image2d_max_width; + w = w > ctx->device->image2d_max_width ? ctx->device->image2d_max_width : w; + printf("h = %d w = %d \n", h, w); + tiling = CL_NO_TILE; + } else if (cl_driver_get_ver(ctx->drv) != 6) { + /* Pick up tiling mode (we do only linear on SNB) */ + tiling = cl_get_default_tiling(ctx->drv); + } + size_t min_pitch = bpp * w; if (data && pitch == 0) pitch = min_pitch; + if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR; if (UNLIKELY(h > ctx->device->image2d_max_height)) DO_IMAGE_ERROR; if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR; if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR; - /* Pick up tiling mode (we do only linear on SNB) */ - if (cl_driver_get_ver(ctx->drv) != 6) - tiling = cl_get_default_tiling(ctx->drv); - depth = 1; } else if (image_type == CL_MEM_OBJECT_IMAGE3D || image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || @@ -791,7 +815,16 @@ _cl_mem_new_image(cl_context ctx, sz = aligned_pitch * aligned_h * depth; } - mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err); + if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER) + mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err); + else { + mem = cl_mem_allocate(CL_MEM_BUFFER1D_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err); + if (mem != NULL && err == CL_SUCCESS) { + struct _cl_mem_buffer1d_image *buffer1d_image = (struct _cl_mem_buffer1d_image *)mem; + buffer1d_image->size = origin_width;; + } + } + if (mem == NULL || err != CL_SUCCESS) goto error; diff --git a/src/cl_mem.h b/src/cl_mem.h index 1641dcc4..fd502203 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -72,6 +72,7 @@ enum cl_mem_type { CL_MEM_SUBBUFFER_TYPE, CL_MEM_IMAGE_TYPE, CL_MEM_GL_IMAGE_TYPE, + CL_MEM_BUFFER1D_IMAGE_TYPE }; #define IS_IMAGE(mem) (mem->type >= CL_MEM_IMAGE_TYPE) #define IS_GL_IMAGE(mem) (mem->type == CL_MEM_GL_IMAGE_TYPE) @@ -117,6 +118,11 @@ struct _cl_mem_gl_image { uint32_t texture; }; +struct _cl_mem_buffer1d_image { + struct _cl_mem_image base; + uint32_t size; +}; + inline static void cl_mem_image_init(struct _cl_mem_image *image, size_t w, size_t h, cl_mem_object_type image_type, diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 0df78768..c80a11ba 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -1003,11 +1003,11 @@ static int intel_get_surface_type(cl_mem_object_type type) { switch (type) { - case CL_MEM_OBJECT_IMAGE1D_BUFFER: case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_ARRAY: return I965_SURFACE_1D; + case CL_MEM_OBJECT_IMAGE1D_BUFFER: case CL_MEM_OBJECT_IMAGE2D: case CL_MEM_OBJECT_IMAGE2D_ARRAY: return I965_SURFACE_2D; |