summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRebecca N. Palmer <rebecca_palmer@zoho.com>2018-07-21 20:12:48 +0100
committerYang Rong <rong.r.yang@intel.com>2018-08-20 15:32:16 +0800
commitab45f14f1e552a5d8b300b2bf5b7bdbed525110c (patch)
tree2dbd62d56f96a89a4ab696cd0b4ac6966574b98e
parente1b2419a0008e38ef2d9d255d9e9c74e9fba084b (diff)
Make in-order command queues actually be in-order
When beignet added out-of-order execution support (7fd45f15), it made *all* command queues out-of-order, even if they were created as (and are reported by clGetCommandQueueInfo as) in-order. Signed-off-by: Rebecca N. Palmer <rebecca_palmer@zoho.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r--src/cl_api.c9
-rw-r--r--src/cl_api_kernel.c3
-rw-r--r--src/cl_api_mem.c58
-rw-r--r--src/cl_command_queue.h5
-rw-r--r--src/cl_command_queue_enqueue.c26
-rw-r--r--src/cl_gl_api.c4
6 files changed, 71 insertions, 34 deletions
diff --git a/src/cl_api.c b/src/cl_api.c
index 036ae172..197627a1 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -283,7 +283,7 @@ clEnqueueSVMFree (cl_command_queue command_queue,
data->size = num_svm_pointers;
data->ptr = user_data;
- if (e_status == CL_COMPLETE) {
+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
// Sync mode, no need to queue event.
err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
if (err != CL_SUCCESS) {
@@ -429,7 +429,7 @@ cl_int clEnqueueSVMMemcpy (cl_command_queue command_queue,
data->const_ptr = src_ptr;
data->size = size;
- if (e_status == CL_COMPLETE) {
+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
// Sync mode, no need to queue event.
err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
if (err != CL_SUCCESS) {
@@ -441,6 +441,9 @@ cl_int clEnqueueSVMMemcpy (cl_command_queue command_queue,
break;
}
cl_command_queue_enqueue_event(command_queue, e);
+ if (blocking_copy) {
+ cl_event_wait_for_events_list(1, &e);
+ }
}
} while(0);
@@ -518,7 +521,7 @@ cl_int clEnqueueSVMMemFill (cl_command_queue command_queue,
data->pattern_size = pattern_size;
data->size = size;
- if (e_status == CL_COMPLETE) {
+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
// Sync mode, no need to queue event.
err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
if (err != CL_SUCCESS) {
diff --git a/src/cl_api_kernel.c b/src/cl_api_kernel.c
index 13ea8c07..ce4ff8bc 100644
--- a/src/cl_api_kernel.c
+++ b/src/cl_api_kernel.c
@@ -223,6 +223,7 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
count *= global_wk_sz_rem[2] ? 2 : 1;
const size_t *global_wk_all[2] = {global_wk_sz_div, global_wk_sz_rem};
+ cl_bool allow_immediate_submit = cl_command_queue_allow_bypass_submit(command_queue);
/* Go through the at most 8 cases and euque if there is work items left */
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++) {
@@ -263,7 +264,7 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
break;
}
- err = cl_event_exec(e, (event_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED), CL_FALSE);
+ err = cl_event_exec(e, ((allow_immediate_submit && event_status == CL_COMPLETE) ? CL_SUBMITTED : CL_QUEUED), CL_FALSE);
if (err != CL_SUCCESS) {
break;
}
diff --git a/src/cl_api_mem.c b/src/cl_api_mem.c
index 1daf4039..13282748 100644
--- a/src/cl_api_mem.c
+++ b/src/cl_api_mem.c
@@ -309,7 +309,7 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
data->write_map = 1;
- if (e_status == CL_COMPLETE) {
+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
// Sync mode, no need to queue event.
err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
if (err != CL_SUCCESS) {
@@ -322,6 +322,9 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
}
cl_command_queue_enqueue_event(command_queue, e);
+ if (blocking_map) {
+ cl_event_wait_for_events_list(1, &e);
+ }
}
ptr = data->ptr;
@@ -469,7 +472,7 @@ clEnqueueUnmapMemObject(cl_command_queue command_queue,
data->mem_obj = memobj;
data->ptr = mapped_ptr;
- if (e_status == CL_COMPLETE) { // No need to wait
+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // No need to wait
err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
if (err != CL_SUCCESS) {
break;
@@ -571,7 +574,7 @@ clEnqueueReadBuffer(cl_command_queue command_queue,
data->offset = offset;
data->size = size;
- if (e_status == CL_COMPLETE) {
+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
// Sync mode, no need to queue event.
err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
if (err != CL_SUCCESS) {
@@ -583,6 +586,9 @@ clEnqueueReadBuffer(cl_command_queue command_queue,
break;
}
cl_command_queue_enqueue_event(command_queue, e);
+ if (blocking_read) {
+ cl_event_wait_for_events_list(1, &e);
+ }
}
} while (0);
@@ -674,7 +680,7 @@ clEnqueueWriteBuffer(cl_command_queue command_queue,
data->offset = offset;
data->size = size;
- if (e_status == CL_COMPLETE) {
+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
// Sync mode, no need to queue event.
err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
if (err != CL_SUCCESS) {
@@ -686,6 +692,9 @@ clEnqueueWriteBuffer(cl_command_queue command_queue,
break;
}
cl_command_queue_enqueue_event(command_queue, e);
+ if (blocking_write) {
+ cl_event_wait_for_events_list(1, &e);
+ }
}
} while (0);
@@ -823,7 +832,7 @@ clEnqueueReadBufferRect(cl_command_queue command_queue,
data->host_row_pitch = host_row_pitch;
data->host_slice_pitch = host_slice_pitch;
- if (e_status == CL_COMPLETE) {
+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
// Sync mode, no need to queue event.
err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
if (err != CL_SUCCESS) {
@@ -835,6 +844,9 @@ clEnqueueReadBufferRect(cl_command_queue command_queue,
break;
}
cl_command_queue_enqueue_event(command_queue, e);
+ if (blocking_read) {
+ cl_event_wait_for_events_list(1, &e);
+ }
}
} while (0);
@@ -974,7 +986,7 @@ clEnqueueWriteBufferRect(cl_command_queue command_queue,
data->host_row_pitch = host_row_pitch;
data->host_slice_pitch = host_slice_pitch;
- if (e_status == CL_COMPLETE) {
+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
// Sync mode, no need to queue event.
err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
if (err != CL_SUCCESS) {
@@ -986,6 +998,9 @@ clEnqueueWriteBufferRect(cl_command_queue command_queue,
break;
}
cl_command_queue_enqueue_event(command_queue, e);
+ if (blocking_write) {
+ cl_event_wait_for_events_list(1, &e);
+ }
}
} while (0);
@@ -1093,7 +1108,7 @@ clEnqueueCopyBuffer(cl_command_queue command_queue,
break;
}
- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
+ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
if (err != CL_SUCCESS) {
break;
}
@@ -1283,7 +1298,7 @@ clEnqueueCopyBufferRect(cl_command_queue command_queue,
if (e_status < CL_COMPLETE) { // Error happend, cancel.
err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
break;
- } else if (e_status == CL_COMPLETE) {
+ } else if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
err = cl_event_exec(e, CL_SUBMITTED, CL_FALSE);
if (err != CL_SUCCESS) {
break;
@@ -1384,7 +1399,7 @@ clEnqueueFillBuffer(cl_command_queue command_queue,
break;
}
- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
+ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
if (err != CL_SUCCESS) {
break;
}
@@ -1471,7 +1486,7 @@ clEnqueueMigrateMemObjects(cl_command_queue command_queue,
break;
}
- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
+ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
if (err != CL_SUCCESS) {
break;
}
@@ -1764,7 +1779,7 @@ clEnqueueMapImage(cl_command_queue command_queue,
if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
data->write_map = 1;
- if (e_status == CL_COMPLETE) {
+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
// Sync mode, no need to queue event.
err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
if (err != CL_SUCCESS) {
@@ -1777,6 +1792,9 @@ clEnqueueMapImage(cl_command_queue command_queue,
}
cl_command_queue_enqueue_event(command_queue, e);
+ if (blocking_map) {
+ cl_event_wait_for_events_list(1, &e);
+ }
}
ptr = data->ptr;
@@ -2014,7 +2032,7 @@ clEnqueueReadImage(cl_command_queue command_queue,
data->row_pitch = row_pitch;
data->slice_pitch = slice_pitch;
- if (e_status == CL_COMPLETE) {
+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
// Sync mode, no need to queue event.
err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
if (err != CL_SUCCESS) {
@@ -2026,6 +2044,9 @@ clEnqueueReadImage(cl_command_queue command_queue,
break;
}
cl_command_queue_enqueue_event(command_queue, e);
+ if (blocking_read) {
+ cl_event_wait_for_events_list(1, &e);
+ }
}
} while (0);
@@ -2218,7 +2239,7 @@ clEnqueueWriteImage(cl_command_queue command_queue,
data->row_pitch = row_pitch;
data->slice_pitch = slice_pitch;
- if (e_status == CL_COMPLETE) {
+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
// Sync mode, no need to queue event.
err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
if (err != CL_SUCCESS) {
@@ -2230,6 +2251,9 @@ clEnqueueWriteImage(cl_command_queue command_queue,
break;
}
cl_command_queue_enqueue_event(command_queue, e);
+ if (blocking_write) {
+ cl_event_wait_for_events_list(1, &e);
+ }
}
} while (0);
@@ -2364,7 +2388,7 @@ clEnqueueCopyImage(cl_command_queue command_queue,
break;
}
- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
+ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
if (err != CL_SUCCESS) {
break;
}
@@ -2475,7 +2499,7 @@ clEnqueueCopyImageToBuffer(cl_command_queue command_queue,
break;
}
- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
+ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
if (err != CL_SUCCESS) {
break;
}
@@ -2587,7 +2611,7 @@ clEnqueueCopyBufferToImage(cl_command_queue command_queue,
break;
}
- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
+ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
if (err != CL_SUCCESS) {
break;
}
@@ -2697,7 +2721,7 @@ clEnqueueFillImage(cl_command_queue command_queue,
break;
}
- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
+ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
if (err != CL_SUCCESS) {
break;
}
diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h
index 9f6ff390..8640cf6f 100644
--- a/src/cl_command_queue.h
+++ b/src/cl_command_queue.h
@@ -103,6 +103,11 @@ extern cl_int cl_command_queue_wait_finish(cl_command_queue queue);
extern cl_int cl_command_queue_wait_flush(cl_command_queue queue);
/* Note: Must call this function with queue's lock. */
extern cl_event *cl_command_queue_record_in_queue_events(cl_command_queue queue, cl_uint *list_num);
+/* Whether it is valid to call cl_event_exec directly, instead of cl_command_queue_enqueue_event */
+static inline cl_bool cl_command_queue_allow_bypass_submit(cl_command_queue queue){
+ return (queue->props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)/* if out-of-order, always */
+ || list_empty(&queue->worker.enqueued_events);/* if in-order, only if empty */
+}
#endif /* __CL_COMMAND_QUEUE_H__ */
diff --git a/src/cl_command_queue_enqueue.c b/src/cl_command_queue_enqueue.c
index 44a07615..8166d82b 100644
--- a/src/cl_command_queue_enqueue.c
+++ b/src/cl_command_queue_enqueue.c
@@ -65,6 +65,8 @@ worker_thread_function(void *Arg)
if (cl_event_is_ready(e) <= CL_COMPLETE) {
list_node_del(&e->enqueue_node);
list_add_tail(&ready_list, &e->enqueue_node);
+ } else if(!(queue->props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)){
+ break; /* in in-order mode, can't skip over non-ready events */
}
}
@@ -80,18 +82,20 @@ worker_thread_function(void *Arg)
CL_OBJECT_UNLOCK(queue);
/* Do the really job without lock.*/
- exec_status = CL_SUBMITTED;
- list_for_each_safe(pos, n, &ready_list)
- {
- e = list_entry(pos, _cl_event, enqueue_node);
- cl_event_exec(e, exec_status, CL_FALSE);
- }
+ if (queue->props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { /* in in-order mode, need to get each all the way to CL_COMPLETE before starting the next one */
+ exec_status = CL_SUBMITTED;
+ list_for_each_safe(pos, n, &ready_list)
+ {
+ e = list_entry(pos, _cl_event, enqueue_node);
+ cl_event_exec(e, exec_status, CL_FALSE);
+ }
- /* Notify all waiting for flush. */
- CL_OBJECT_LOCK(queue);
- worker->in_exec_status = CL_SUBMITTED;
- CL_OBJECT_NOTIFY_COND(queue);
- CL_OBJECT_UNLOCK(queue);
+ /* Notify all waiting for flush. */
+ CL_OBJECT_LOCK(queue);
+ worker->in_exec_status = CL_SUBMITTED;
+ CL_OBJECT_NOTIFY_COND(queue);
+ CL_OBJECT_UNLOCK(queue);
+ }
list_for_each_safe(pos, n, &ready_list)
{
diff --git a/src/cl_gl_api.c b/src/cl_gl_api.c
index 61b3ab80..ec584030 100644
--- a/src/cl_gl_api.c
+++ b/src/cl_gl_api.c
@@ -188,7 +188,7 @@ cl_int clEnqueueAcquireGLObjects (cl_command_queue command_queue,
data = &e->exec_data;
data->type = EnqueueReturnSuccesss;
- if (e_status == CL_COMPLETE) {
+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
// Sync mode, no need to queue event.
err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
if (err != CL_SUCCESS) {
@@ -274,7 +274,7 @@ cl_int clEnqueueReleaseGLObjects (cl_command_queue command_queue,
data = &e->exec_data;
data->type = EnqueueReturnSuccesss;
- if (e_status == CL_COMPLETE) {
+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
// Sync mode, no need to queue event.
err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
if (err != CL_SUCCESS) {