summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYang Rong <rong.r.yang@intel.com>2015-11-10 15:52:13 +0800
committerYang Rong <rong.r.yang@intel.com>2016-11-08 20:35:08 +0800
commitb303506b1e3f17a3ab660ef6b66ab24c301270ef (patch)
tree6b82b8678c6208c908cd1ada60bd633e366db34e
parent0e89202b50561e716deb15b4c1142fb49c525458 (diff)
OCL20: Add svm support.
Enable CL_DEVICE_SVM_COARSE_GRAIN_BUFFER svm support, use userptr and softpin to implement it. Use userptr to share the page between cpu and gpu, and softpin to unify the cpu and gpu's address. Now it works on i386 system. x86_64 depends on backend support. This patch base on DRM library and DRM kernel driver's softpin patch: http://lists.freedesktop.org/archives/intel-gfx/2015-September/075446.html. Signed-off-by: Yang Rong <rong.r.yang@intel.com> Reviewed-by: "Song, Ruiling" <ruiling.song@intel.com>
-rw-r--r--CMakeLists.txt5
-rw-r--r--src/CMakeLists.txt5
-rw-r--r--src/cl_api.c87
-rw-r--r--src/cl_context.c29
-rw-r--r--src/cl_context.h4
-rw-r--r--src/cl_device_id.c1
-rw-r--r--src/cl_device_id.h3
-rw-r--r--src/cl_driver.h6
-rw-r--r--src/cl_driver_defs.c2
-rw-r--r--src/cl_enqueue.c8
-rw-r--r--src/cl_gt_device.h1
-rw-r--r--src/cl_kernel.c36
-rw-r--r--src/cl_kernel.h8
-rw-r--r--src/cl_mem.c111
-rw-r--r--src/cl_mem.h13
-rw-r--r--src/intel/intel_driver.c4
16 files changed, 312 insertions, 11 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d839f3f1..f36ac717 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -153,18 +153,19 @@ IF(DRM_INTEL_FOUND)
ELSE(HAVE_DRM_INTEL_SUBSLICE_TOTAL)
MESSAGE(STATUS "Disable subslice total query support")
ENDIF(HAVE_DRM_INTEL_SUBSLICE_TOTAL)
- CHECK_LIBRARY_EXISTS(drm_intel "drm_intel_get_pooled_eu" "" HAVE_DRM_INTEL_POOLED_EU)
+ CHECK_LIBRARY_EXISTS(drm_intel "drm_intel_get_pooled_eu" ${DRM_INTEL_LIBDIR} HAVE_DRM_INTEL_POOLED_EU)
IF(HAVE_DRM_INTEL_POOLED_EU)
MESSAGE(STATUS "Enable pooled eu query support")
ELSE(HAVE_DRM_INTEL_POOLED_EU)
MESSAGE(STATUS "Disable pooled eu query support")
ENDIF(HAVE_DRM_INTEL_POOLED_EU)
- CHECK_LIBRARY_EXISTS(drm_intel "drm_intel_get_min_eu_in_pool" "" HAVE_DRM_INTEL_MIN_EU_IN_POOL)
+ CHECK_LIBRARY_EXISTS(drm_intel "drm_intel_get_min_eu_in_pool" ${DRM_INTEL_LIBDIR} HAVE_DRM_INTEL_MIN_EU_IN_POOL)
IF(HAVE_DRM_INTEL_MIN_EU_IN_POOL)
MESSAGE(STATUS "Enable min eu in pool query support")
ELSE(HAVE_DRM_INTEL_MIN_EU_IN_POOL)
MESSAGE(STATUS "Disable min eu in pool query support")
ENDIF(HAVE_DRM_INTEL_MIN_EU_IN_POOL)
+ CHECK_LIBRARY_EXISTS(drm_intel "drm_intel_bo_set_softpin_offset" ${DRM_INTEL_LIBDIR} HAVE_DRM_INTEL_BO_SET_SOFTPIN)
ELSE(DRM_INTEL_FOUND)
MESSAGE(FATAL_ERROR "Looking for DRM Intel (>= 2.4.52) - not found")
ENDIF(DRM_INTEL_FOUND)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 72392b56..26ccceaf 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -151,6 +151,11 @@ if (HAVE_DRM_INTEL_MIN_EU_IN_POOL)
SET(CMAKE_C_FLAGS "-DHAS_MIN_EU_IN_POOL ${CMAKE_C_FLAGS}")
endif (HAVE_DRM_INTEL_MIN_EU_IN_POOL)
+if (HAVE_DRM_INTEL_BO_SET_SOFTPIN)
+ SET(CMAKE_CXX_FLAGS "-DHAS_BO_SET_SOFTPIN ${CMAKE_CXX_FLAGS}")
+ SET(CMAKE_C_FLAGS "-DHAS_BO_SET_SOFTPIN ${CMAKE_C_FLAGS}")
+endif (HAVE_DRM_INTEL_BO_SET_SOFTPIN)
+
set(GIT_SHA1 "git_sha1.h")
add_custom_target(${GIT_SHA1} ALL
COMMAND chmod +x ${CMAKE_CURRENT_SOURCE_DIR}/git_sha1.sh
diff --git a/src/cl_api.c b/src/cl_api.c
index 1d4c5a1f..e24831c4 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -487,6 +487,80 @@ error:
return mem;
}
+void *
+clSVMAlloc (cl_context context,
+ cl_svm_mem_flags flags,
+ size_t size,
+ unsigned int alignment)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_CONTEXT (context);
+ return cl_mem_svm_allocate(context, flags, size, alignment);
+error:
+ return NULL;
+}
+
+void
+ clSVMFree (cl_context context, void* svm_pointer)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_CONTEXT (context);
+ return cl_mem_svm_delete(context, svm_pointer);
+error:
+ return;
+}
+
+cl_int
+clEnqueueSVMMap (cl_command_queue command_queue,
+ cl_bool blocking_map,
+ cl_map_flags map_flags,
+ void *svm_ptr,
+ size_t size,
+ cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list,
+ cl_event *event)
+{
+ cl_int err = CL_SUCCESS;
+ cl_mem buffer;
+
+ CHECK_QUEUE(command_queue);
+ buffer = cl_context_get_svm_from_ptr(command_queue->ctx, svm_ptr);
+ if(buffer == NULL) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+
+ clEnqueueMapBuffer(command_queue, buffer, blocking_map, map_flags, 0, size,
+ num_events_in_wait_list, event_wait_list, event, &err);
+error:
+ return err;
+}
+
+cl_int
+clEnqueueSVMUnmap (cl_command_queue command_queue,
+ void *svm_ptr,
+ cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list,
+ cl_event *event)
+{
+ cl_int err = CL_SUCCESS;
+ cl_mem buffer;
+
+ CHECK_QUEUE(command_queue);
+ buffer = cl_context_get_svm_from_ptr(command_queue->ctx, svm_ptr);
+ if(buffer == NULL) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+
+ err = clEnqueueUnmapMemObject(command_queue, buffer, svm_ptr,
+ num_events_in_wait_list, event_wait_list, event);
+
+error:
+ return err;
+}
+
+
cl_mem
clCreateImage2D(cl_context context,
cl_mem_flags flags,
@@ -1169,6 +1243,19 @@ error:
return err;
}
+cl_int
+clSetKernelArgSVMPointer (cl_kernel kernel,
+ cl_uint arg_index,
+ const void *arg_value)
+{
+ cl_int err = CL_SUCCESS;
+ CHECK_KERNEL(kernel);
+
+ err = cl_kernel_set_arg_svm_pointer(kernel, arg_index, arg_value);
+error:
+ return err;
+}
+
cl_int clGetKernelArgInfo(cl_kernel kernel, cl_uint arg_index, cl_kernel_arg_info param_name,
size_t param_value_size, void *param_value, size_t *param_value_size_ret)
{
diff --git a/src/cl_context.c b/src/cl_context.c
index 229ab960..95ca5b0e 100644
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -81,6 +81,19 @@ cl_context_add_mem(cl_context ctx, cl_mem mem) {
}
LOCAL void
+cl_context_add_svm(cl_context ctx, cl_mem mem) {
+ assert(mem->ctx == NULL);
+ cl_context_add_ref(ctx);
+
+ CL_OBJECT_LOCK(ctx);
+ list_add_tail(&mem->base.node, &ctx->svm_objects);
+ ctx->svm_object_num++;
+ CL_OBJECT_UNLOCK(ctx);
+
+ mem->ctx = ctx;
+}
+
+LOCAL void
cl_context_remove_mem(cl_context ctx, cl_mem mem) {
assert(mem->ctx == ctx);
CL_OBJECT_LOCK(ctx);
@@ -452,3 +465,19 @@ unlock:
CL_OBJECT_RELEASE_OWNERSHIP(ctx);
return cl_kernel_dup(ker);
}
+
+cl_mem
+cl_context_get_svm_from_ptr(cl_context ctx, void * p)
+{
+ struct list_head *pos;
+ cl_mem buf;
+
+ list_for_each (pos, (&ctx->mem_objects)) {
+ buf = (cl_mem)list_entry(pos, _cl_base_object, node);
+ if(buf->host_ptr == NULL) continue;
+ if(buf->is_svm == 0) continue;
+ if (buf->host_ptr == p)
+ return buf;
+ }
+ return NULL;
+}
diff --git a/src/cl_context.h b/src/cl_context.h
index b2903a70..f49212ba 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -107,6 +107,8 @@ struct _cl_context {
cl_uint queue_cookie; /* Cookie will change every time we change queue list. */
list_head mem_objects; /* All memory object currently allocated */
cl_uint mem_object_num; /* All memory number currently allocated */
+ list_head svm_objects; /* All svm object currently allocated */
+ cl_uint svm_object_num; /* All svm number currently allocated */
list_head samplers; /* All sampler object currently allocated */
cl_uint sampler_num; /* All sampler number currently allocated */
list_head events; /* All event object currently allocated */
@@ -186,5 +188,7 @@ extern cl_buffer_mgr cl_context_get_bufmgr(cl_context ctx);
extern cl_kernel cl_context_get_static_kernel_from_bin(cl_context ctx, cl_int index,
const char * str_kernel, size_t size, const char * str_option);
+/* Get the SVM from pointer, return NULL if pointer is not from SVM */
+extern cl_mem cl_context_get_svm_from_ptr(cl_context ctx, void *p);
#endif /* __CL_CONTEXT_H__ */
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 7d56d90e..57a74fbd 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -1052,6 +1052,7 @@ cl_get_device_info(cl_device_id device,
DECL_FIELD(PARTITION_TYPE, partition_type)
DECL_FIELD(IMAGE_PITCH_ALIGNMENT, image_pitch_alignment)
DECL_FIELD(IMAGE_BASE_ADDRESS_ALIGNMENT, image_base_address_alignment)
+ DECL_FIELD(SVM_CAPABILITIES, svm_capabilities)
case CL_DEVICE_REFERENCE_COUNT:
{
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index 8cd55bb5..6c62d0b6 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -54,7 +54,8 @@ struct _cl_device_id {
cl_uint native_vector_width_half;
cl_uint max_clock_frequency;
cl_uint address_bits;
- cl_ulong max_mem_alloc_size;
+ size_t max_mem_alloc_size;
+ cl_device_svm_capabilities svm_capabilities;
cl_bool image_support;
cl_uint max_read_image_args;
cl_uint max_write_image_args;
diff --git a/src/cl_driver.h b/src/cl_driver.h
index c431906a..a13ffd92 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -340,6 +340,12 @@ extern cl_buffer_alloc_cb *cl_buffer_alloc;
typedef cl_buffer (cl_buffer_alloc_userptr_cb)(cl_buffer_mgr, const char*, void *, size_t, unsigned long);
extern cl_buffer_alloc_userptr_cb *cl_buffer_alloc_userptr;
+typedef cl_buffer (cl_buffer_set_softpin_offset_cb)(cl_buffer, uint64_t);
+extern cl_buffer_set_softpin_offset_cb *cl_buffer_set_softpin_offset;
+
+typedef cl_buffer (cl_buffer_set_bo_use_full_range_cb)(cl_buffer, uint32_t);
+extern cl_buffer_set_bo_use_full_range_cb *cl_buffer_set_bo_use_full_range;
+
/* Set a buffer's tiling mode */
typedef cl_buffer (cl_buffer_set_tiling_cb)(cl_buffer, int tiling, size_t stride);
extern cl_buffer_set_tiling_cb *cl_buffer_set_tiling;
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
index 96b2f77e..f5f5fe2c 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -33,6 +33,8 @@ LOCAL cl_driver_update_device_info_cb *cl_driver_update_device_info = NULL;
/* Buffer */
LOCAL cl_buffer_alloc_cb *cl_buffer_alloc = NULL;
LOCAL cl_buffer_alloc_userptr_cb *cl_buffer_alloc_userptr = NULL;
+LOCAL cl_buffer_set_softpin_offset_cb *cl_buffer_set_softpin_offset = NULL;
+LOCAL cl_buffer_set_bo_use_full_range_cb *cl_buffer_set_bo_use_full_range = NULL;
LOCAL cl_buffer_set_tiling_cb *cl_buffer_set_tiling = NULL;
LOCAL cl_buffer_alloc_from_texture_cb *cl_buffer_alloc_from_texture = NULL;
LOCAL cl_buffer_release_from_texture_cb *cl_buffer_release_from_texture = NULL;
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index 29cf5930..00b2dee9 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -282,8 +282,9 @@ cl_enqueue_map_buffer(enqueue_data *data, cl_int status)
cl_int err = CL_SUCCESS;
cl_mem mem = data->mem_obj;
assert(mem->type == CL_MEM_BUFFER_TYPE ||
- mem->type == CL_MEM_SUBBUFFER_TYPE);
- struct _cl_mem_buffer *buffer = (struct _cl_mem_buffer *)mem;
+ mem->type == CL_MEM_SUBBUFFER_TYPE ||
+ mem->type == CL_MEM_SVM_TYPE);
+ struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer *)mem;
if (status == CL_SUBMITTED) {
if (buffer->base.is_userptr) {
@@ -408,7 +409,8 @@ cl_enqueue_unmap_mem_object(enqueue_data *data, cl_int status)
if (memobj->flags & CL_MEM_USE_HOST_PTR) {
if (memobj->type == CL_MEM_BUFFER_TYPE ||
- memobj->type == CL_MEM_SUBBUFFER_TYPE) {
+ memobj->type == CL_MEM_SUBBUFFER_TYPE ||
+ memobj->type == CL_MEM_SVM_TYPE) {
assert(mapped_ptr >= memobj->host_ptr &&
mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size);
/* Sync the data. */
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index d27c1adb..d11d1814 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -40,6 +40,7 @@
.native_vector_width_double = 2,
.native_vector_width_half = 8,
.address_bits = 32,
+.svm_capabilities = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER,
.image_support = CL_TRUE,
.max_read_image_args = BTI_MAX_READ_IMAGE_ARGS,
.max_write_image_args = BTI_MAX_WRITE_IMAGE_ARGS,
diff --git a/src/cl_kernel.c b/src/cl_kernel.c
index b3f1e353..760fca85 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -253,11 +253,47 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value)
cl_mem_delete(k->args[index].mem);
k->args[index].mem = mem;
k->args[index].is_set = 1;
+ k->args[index].is_svm = mem->is_svm;
+ if(mem->is_svm)
+ k->args[index].ptr = mem->host_ptr;
k->args[index].local_sz = 0;
k->args[index].bti = interp_kernel_get_arg_bti(k->opaque, index);
return CL_SUCCESS;
}
+
+LOCAL cl_int
+cl_kernel_set_arg_svm_pointer(cl_kernel k, cl_uint index, const void *value)
+{
+ enum gbe_arg_type arg_type; /* kind of argument */
+ size_t arg_sz; /* size of the argument */
+ cl_context ctx = k->program->ctx;
+ cl_mem mem= cl_context_get_svm_from_ptr(ctx, value);
+
+ if (UNLIKELY(index >= k->arg_n))
+ return CL_INVALID_ARG_INDEX;
+ arg_type = interp_kernel_get_arg_type(k->opaque, index);
+ arg_sz = interp_kernel_get_arg_size(k->opaque, index);
+
+ if(arg_type != GBE_ARG_GLOBAL_PTR && arg_type != GBE_ARG_CONSTANT_PTR )
+ return CL_INVALID_ARG_VALUE;
+
+ if(mem == NULL)
+ return CL_INVALID_ARG_VALUE;
+
+ cl_mem_add_ref(mem);
+ if (k->args[index].mem)
+ cl_mem_delete(k->args[index].mem);
+
+ k->args[index].ptr = value;
+ k->args[index].mem = mem;
+ k->args[index].is_set = 1;
+ k->args[index].is_svm = 1;
+ k->args[index].local_sz = 0;
+ k->args[index].bti = interp_kernel_get_arg_bti(k->opaque, index);
+ return 0;
+}
+
LOCAL int
cl_get_kernel_arg_info(cl_kernel k, cl_uint arg_index, cl_kernel_arg_info param_name,
size_t param_value_size, void *param_value, size_t *param_value_size_ret)
diff --git a/src/cl_kernel.h b/src/cl_kernel.h
index 0aa4a4f2..9ec49139 100644
--- a/src/cl_kernel.h
+++ b/src/cl_kernel.h
@@ -41,8 +41,10 @@ typedef struct cl_argument {
cl_sampler sampler; /* For sampler. */
cl_accelerator_intel accel;
unsigned char bti;
- uint32_t local_sz:31; /* For __local size specification */
+ void *ptr; /* SVM ptr value. */
+ uint32_t local_sz:30; /* For __local size specification */
uint32_t is_set:1; /* All args must be set before NDRange */
+ uint32_t is_svm:1; /* Indicate this argument is SVMPointer */
} cl_argument;
/* One OCL function */
@@ -108,6 +110,10 @@ extern int cl_kernel_set_arg(cl_kernel,
uint32_t arg_index,
size_t arg_size,
const void *arg_value);
+extern int cl_kernel_set_arg_svm_pointer(cl_kernel,
+ uint32_t arg_index,
+ const void *arg_value);
+
/* Get the argument information */
extern int cl_get_kernel_arg_info(cl_kernel k, cl_uint arg_index,
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 333ffc95..712871b5 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -85,6 +85,7 @@ cl_get_mem_object_info(cl_mem mem,
FIELD_SIZE(MEM_CONTEXT, cl_context);
FIELD_SIZE(MEM_ASSOCIATED_MEMOBJECT, cl_mem);
FIELD_SIZE(MEM_OFFSET, size_t);
+ FIELD_SIZE(MEM_USES_SVM_POINTER, cl_bool);
default:
return CL_INVALID_VALUE;
}
@@ -133,6 +134,8 @@ cl_get_mem_object_info(cl_mem mem,
*((size_t *)param_value) = buf->sub_offset;
}
break;
+ case CL_MEM_USES_SVM_POINTER:
+ *((cl_uint *)param_value) = mem->is_svm;
}
return CL_SUCCESS;
@@ -269,6 +272,7 @@ cl_mem_allocate(enum cl_mem_type type,
mem->flags = flags;
mem->is_userptr = 0;
mem->offset = 0;
+ mem->is_svm = 0;
mem->cmrt_mem = NULL;
if (mem->type == CL_MEM_IMAGE_TYPE) {
cl_mem_image(mem)->is_image_from_buffer = 0;
@@ -293,6 +297,9 @@ cl_mem_allocate(enum cl_mem_type type,
if (type == CL_MEM_BUFFER_TYPE) {
if (flags & CL_MEM_USE_HOST_PTR) {
assert(host_ptr != NULL);
+ cl_mem svm_mem = NULL;
+ if((svm_mem = cl_context_get_svm_from_ptr(ctx, host_ptr)) != NULL)
+ mem->is_svm = 1;
/* userptr not support tiling */
if (!is_tiled) {
if ((ALIGN((unsigned long)host_ptr, cacheline_size) == (unsigned long)host_ptr) &&
@@ -301,7 +308,13 @@ cl_mem_allocate(enum cl_mem_type type,
mem->offset = host_ptr - aligned_host_ptr;
mem->is_userptr = 1;
size_t aligned_sz = ALIGN((mem->offset + sz), page_size);
- mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", aligned_host_ptr, aligned_sz, 0);
+
+ if(svm_mem != NULL) {
+ mem->bo = svm_mem->bo;
+ cl_mem_add_ref(svm_mem);
+ } else
+ mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", aligned_host_ptr, aligned_sz, 0);
+
bufCreated = 1;
}
}
@@ -614,6 +627,80 @@ void cl_mem_replace_buffer(cl_mem buffer, cl_buffer new_bo)
}
}
+void* cl_mem_svm_allocate(cl_context ctx, cl_svm_mem_flags flags,
+ size_t size, unsigned int alignment)
+{
+ cl_int err = CL_SUCCESS;
+ size_t max_mem_size;
+
+ if(UNLIKELY(alignment & (alignment - 1)))
+ return NULL;
+
+ if ((err = cl_get_device_info(ctx->device,
+ CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+ sizeof(max_mem_size),
+ &max_mem_size,
+ NULL)) != CL_SUCCESS) {
+ return NULL;
+ }
+
+ if(UNLIKELY(size == 0 || size > max_mem_size)) {
+ return NULL;
+ }
+
+ if (flags & (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS)) {
+ return NULL;
+ }
+ if (flags && ((flags & (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_FINE_GRAIN_BUFFER))
+ || ((flags & CL_MEM_WRITE_ONLY) && (flags & CL_MEM_READ_ONLY))
+ || ((flags & CL_MEM_WRITE_ONLY) && (flags & CL_MEM_READ_WRITE))
+ || ((flags & CL_MEM_READ_ONLY) && (flags & CL_MEM_READ_WRITE)))) {
+ return NULL;
+ }
+
+#ifdef HAS_BO_SET_SOFTPIN
+ cl_buffer_mgr bufmgr = NULL;
+ void * ptr = NULL;
+ cl_mem mem;
+ _cl_mem_svm* svm;
+ if(UNLIKELY((svm = CALLOC(_cl_mem_svm)) == NULL))
+ return NULL;
+ mem = &svm->base;
+
+ mem->type = CL_MEM_SVM_TYPE;
+ CL_OBJECT_INIT_BASE(mem, CL_OBJECT_MEM_MAGIC);
+ mem->flags = flags | CL_MEM_USE_HOST_PTR;
+ mem->is_userptr = 0;
+ mem->is_svm = 0;
+ mem->offset = 0;
+
+ bufmgr = cl_context_get_bufmgr(ctx);
+ assert(bufmgr);
+
+ int page_size = getpagesize();
+ const size_t alignedSZ = ALIGN(size, page_size);
+ if(alignment == 0)
+ alignment = page_size;
+ else
+ alignment = ALIGN(alignment, page_size);
+ ptr = cl_aligned_malloc(alignedSZ, alignment);
+ if(ptr == NULL) return NULL;
+
+ mem->host_ptr = ptr;
+ mem->is_svm = 1;
+ mem->is_userptr = 1;
+ mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL SVM memory object", ptr, alignedSZ, 0);
+ mem->size = size;
+ cl_buffer_set_softpin_offset(mem->bo, (size_t)ptr);
+ cl_buffer_set_bo_use_full_range(mem->bo, 1);
+
+ /* Append the svm in the context buffer list */
+ cl_context_add_mem(ctx, mem);
+#endif
+
+ return ptr;
+}
+
void
cl_mem_copy_image_region(const size_t *origin, const size_t *region,
void *dst, size_t dst_row_pitch, size_t dst_slice_pitch,
@@ -1166,6 +1253,18 @@ cl_mem_new_image(cl_context context,
}
LOCAL void
+cl_mem_svm_delete(cl_context ctx, void *svm_pointer)
+{
+ cl_mem mem;
+ if(UNLIKELY(svm_pointer == NULL))
+ return;
+ mem = cl_context_get_svm_from_ptr(ctx, svm_pointer);
+ if(mem == NULL)
+ return;
+ cl_mem_delete(mem);
+}
+
+LOCAL void
cl_mem_delete(cl_mem mem)
{
cl_int i;
@@ -1198,6 +1297,11 @@ cl_mem_delete(cl_mem mem)
}
}
+ if(mem->is_svm && mem->type != CL_MEM_SVM_TYPE) {
+ cl_mem svm_mem = cl_context_get_svm_from_ptr(mem->ctx, mem->host_ptr);
+ if(svm_mem)
+ cl_mem_delete(svm_mem);
+ }
/* Remove it from the list */
cl_context_remove_mem(mem->ctx, mem);
@@ -1244,9 +1348,10 @@ cl_mem_delete(cl_mem mem)
cl_buffer_unreference(mem->bo);
}
- if (mem->is_userptr &&
+ if ((mem->is_userptr &&
(mem->flags & CL_MEM_ALLOC_HOST_PTR) &&
- (mem->type != CL_MEM_SUBBUFFER_TYPE))
+ (mem->type != CL_MEM_SUBBUFFER_TYPE)) ||
+ (mem->is_svm && mem->type == CL_MEM_SVM_TYPE))
cl_free(mem->host_ptr);
CL_OBJECT_DESTROY_BASE(mem);
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 82f30f6a..4a71a8f7 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -73,6 +73,7 @@ typedef struct _cl_mem_dstr_cb {
enum cl_mem_type {
CL_MEM_BUFFER_TYPE,
CL_MEM_SUBBUFFER_TYPE,
+ CL_MEM_SVM_TYPE,
CL_MEM_IMAGE_TYPE,
CL_MEM_GL_IMAGE_TYPE,
CL_MEM_BUFFER1D_IMAGE_TYPE
@@ -93,7 +94,8 @@ typedef struct _cl_mem {
int map_ref; /* The mapped count. */
uint8_t mapped_gtt; /* This object has mapped gtt, for unmap. */
cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */
- uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/
+ uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled */
+ cl_bool is_svm; /* This object is svm */
size_t offset; /* offset of host_ptr to the page beginning, only for CL_MEM_USE_HOST_PTR*/
uint8_t cmrt_mem_type; /* CmBuffer, CmSurface2D, ... */
@@ -113,6 +115,11 @@ typedef struct _cl_mem {
CL_OBJECT_GET_REF(mem) >= 1 && \
mem->type < CL_MEM_IMAGE_TYPE))
+typedef struct _cl_mem_svm {
+ _cl_mem base;
+ cl_svm_mem_flags flags; /* Flags specified at the creation time */
+} _cl_mem_svm;
+
struct _cl_mem_image {
_cl_mem base;
cl_image_format fmt; /* only for images */
@@ -212,6 +219,10 @@ extern cl_mem cl_mem_new_buffer(cl_context, cl_mem_flags, size_t, void*, cl_int*
/* Create a new sub memory object */
extern cl_mem cl_mem_new_sub_buffer(cl_mem, cl_mem_flags, cl_buffer_create_type, const void *, cl_int *);
+void* cl_mem_svm_allocate(cl_context, cl_svm_mem_flags, size_t, unsigned int);
+void cl_mem_svm_delete(cl_context, void *svm_pointer);
+
+
/* Idem but this is an image */
extern cl_mem
cl_mem_new_image(cl_context context,
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index cf8f8292..d1796a5b 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -995,6 +995,10 @@ intel_setup_callbacks(void)
cl_driver_update_device_info = (cl_driver_update_device_info_cb *) intel_update_device_info;
cl_buffer_alloc = (cl_buffer_alloc_cb *) drm_intel_bo_alloc;
cl_buffer_alloc_userptr = (cl_buffer_alloc_userptr_cb*) intel_buffer_alloc_userptr;
+#ifdef HAS_BO_SET_SOFTPIN
+ cl_buffer_set_softpin_offset = (cl_buffer_set_softpin_offset_cb *) drm_intel_bo_set_softpin_offset;
+ cl_buffer_set_bo_use_full_range = (cl_buffer_set_bo_use_full_range_cb *) drm_intel_bo_use_48b_address_range;
+#endif
cl_buffer_set_tiling = (cl_buffer_set_tiling_cb *) intel_buffer_set_tiling;
#if defined(HAS_GL_EGL)
cl_buffer_alloc_from_texture = (cl_buffer_alloc_from_texture_cb *) intel_alloc_buffer_from_texture;