summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorZhigang Gong <zhigang.gong@intel.com>2014-09-04 08:01:25 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-09-04 11:53:04 +0800
commit9ce75d4de41e8359f411ce5ab3dbd2cd363364b2 (patch)
tree96e63bf32d8826d6de3402373fccdb7674399ea8 /src
parent56d2cf2e02b6933478f080548f6960751ff18741 (diff)
GBE: fixup/refine a bug for image1D array's extra binding index handling.
Due to hardware limitation on Gen7/Gen75 when sampling a surface with clamp address mode and nearest filter mode on a integer image1Darray type surface, we have to bind one buffer to to bti. The previous implementation hard coded it to 128 + original index and when check whether it is such type bti in driver layer, assume the bti reserved is 3 which is wrong now. This patch fixed those hard coded functions and use the macros defined in the program.h. Signed-off-by: Zhigang Gong <zhigang.gong@intel.com> Reviewed-by: "Song, Ruiling" <ruiling.song@intel.com>
Diffstat (limited to 'src')
-rw-r--r--src/cl_command_queue.c5
-rw-r--r--src/intel/intel_gpgpu.c28
2 files changed, 23 insertions, 10 deletions
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 52e91ae4..4cbb4eb8 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -17,6 +17,7 @@
* Author: Benjamin Segovia <benjamin.segovia@intel.com>
*/
+#include "program.h" // for BTI_MAX_IMAGE_NUM
#include "cl_command_queue.h"
#include "cl_context.h"
#include "cl_program.h"
@@ -142,8 +143,10 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k)
image->intel_fmt, image->image_type,
image->w, image->h, image->depth,
image->row_pitch, (cl_gpgpu_tiling)image->tiling);
+ // TODO, this workaround is for GEN7/GEN75 only, we may need to do it in the driver layer
+ // on demand.
if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
- cl_gpgpu_bind_image(gpgpu, k->images[i].idx + 128, image->base.bo, image->offset,
+ cl_gpgpu_bind_image(gpgpu, k->images[i].idx + BTI_MAX_IMAGE_NUM, image->base.bo, image->offset,
image->intel_fmt, image->image_type,
image->w, image->h, image->depth,
image->row_pitch, (cl_gpgpu_tiling)image->tiling);
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 867ab4cf..c4b91564 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -35,6 +35,7 @@
#include "intel/intel_structs.h"
#include "intel/intel_batchbuffer.h"
#include "intel/intel_driver.h"
+#include "program.h" // for BTI_RESERVED_NUM
#include "cl_alloc.h"
#include "cl_utils.h"
@@ -819,6 +820,22 @@ intel_get_surface_type(cl_mem_object_type type)
return 0;
}
+/* Get fixed surface type. If it is a 1D array image with a large index,
+ we need to fixup it to 2D type due to a Gen7/Gen75's sampler issue
+ on a integer type surface with clamp address mode and nearest filter mode.
+*/
+static uint32_t get_surface_type(intel_gpgpu_t *gpgpu, int index, cl_mem_object_type type)
+{
+ uint32_t surface_type;
+ if (((IS_IVYBRIDGE(gpgpu->drv->device_id) || IS_HASWELL(gpgpu->drv->device_id))) &&
+ index >= 128 + BTI_RESERVED_NUM &&
+ type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ surface_type = I965_SURFACE_2D;
+ else
+ surface_type = intel_get_surface_type(type);
+ return surface_type;
+}
+
static void
intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
uint32_t index,
@@ -836,12 +853,8 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index];
memset(ss, 0, sizeof(*ss));
-
ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
- if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
- ss->ss0.surface_type = I965_SURFACE_2D;
- else
- ss->ss0.surface_type = intel_get_surface_type(type);
+ ss->ss0.surface_type = get_surface_type(gpgpu, index, type);
if (intel_is_surface_array(type)) {
ss->ss0.surface_array = 1;
ss->ss0.surface_array_spacing = 1;
@@ -886,10 +899,7 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu,
gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index];
memset(ss, 0, sizeof(*ss));
ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
- if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
- ss->ss0.surface_type = I965_SURFACE_2D;
- else
- ss->ss0.surface_type = intel_get_surface_type(type);
+ ss->ss0.surface_type = get_surface_type(gpgpu, index, type);
if (intel_is_surface_array(type)) {
ss->ss0.surface_array = 1;
ss->ss0.surface_array_spacing = 1;