summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolai Hähnle <nicolai.haehnle@amd.com>2017-11-09 14:00:22 +0100
committerNicolai Hähnle <nicolai.haehnle@amd.com>2017-11-09 14:00:22 +0100
commite6dbc804a87aef34db138c607ba435d701703bc6 (patch)
tree72886d32739f09c9fdb41dce690b040994671f0c
parent1e5c9cf5902e31d3e038c565588527be35434306 (diff)
winsys/amdgpu: handle cs_add_fence_dependency for deferred/unsubmitted fences
The idea is to fix the following interleaving of operations that can arise from deferred fences: Thread 1 / Context 1 Thread 2 / Context 2 -------------------- -------------------- f = deferred flush <------- application-side synchronization -------> fence_server_sync(f) ... flush() flush() We will now stall in fence_server_sync until the flush of context 1 has completed. This scenario was unlikely to occur previously, because applications seem to be doing Thread 1 / Context 1 Thread 2 / Context 2 -------------------- -------------------- f = glFenceSync() glFlush() <------- application-side synchronization -------> glWaitSync(f) ... and indeed they probably *have* to use this ordering to avoid deadlocks in the GLX model, where all GL operations conceptually go through a single connection to the X server. However, it's less clear whether applications have to do this with other WSI (i.e. EGL). Besides, even this sequence of GL commands can be translated into the Gallium-level sequence outlined above when Gallium threading and asynchronous flushes are used. So it makes sense to be more robust. As a side effect, we no longer busy-wait on submission_in_progress. We won't enable asynchronous flushes on radeon, but add a cs_add_fence_dependency stub anyway to document the potential issue. Reviewed-by: Marek Olšák <marek.olsak@amd.com>
-rw-r--r--src/gallium/drivers/radeon/radeon_winsys.h4
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_cs.c21
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_cs.h9
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_cs.c19
4 files changed, 41 insertions, 12 deletions
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 2d3f646dc65..e8c486cb7f4 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -543,7 +543,9 @@ struct radeon_winsys {
/**
* Create a fence before the CS is flushed.
* The user must flush manually to complete the initializaton of the fence.
- * The fence must not be used before the flush.
+ *
+ * The fence must not be used for anything except \ref cs_add_fence_dependency
+ * before the flush.
*/
struct pipe_fence_handle *(*cs_get_next_fence)(struct radeon_winsys_cs *cs);
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 0a3f0fd6016..9e10ead9999 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -50,7 +50,8 @@ amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type,
fence->fence.ip_type = ip_type;
fence->fence.ip_instance = ip_instance;
fence->fence.ring = ring;
- fence->submission_in_progress = true;
+ util_queue_fence_init(&fence->submitted);
+ util_queue_fence_reset(&fence->submitted);
p_atomic_inc(&ctx->refcount);
return (struct pipe_fence_handle *)fence;
}
@@ -81,6 +82,9 @@ amdgpu_fence_import_sync_file(struct radeon_winsys *rws, int fd)
FREE(fence);
return NULL;
}
+
+ util_queue_fence_init(&fence->submitted);
+
return (struct pipe_fence_handle*)fence;
}
@@ -98,7 +102,7 @@ static int amdgpu_fence_export_sync_file(struct radeon_winsys *rws,
return r ? -1 : fd;
}
- os_wait_until_zero(&fence->submission_in_progress, PIPE_TIMEOUT_INFINITE);
+ util_queue_fence_wait(&fence->submitted);
/* Convert the amdgpu fence into a fence FD. */
int fd;
@@ -118,7 +122,7 @@ static void amdgpu_fence_submitted(struct pipe_fence_handle *fence,
rfence->fence.fence = seq_no;
rfence->user_fence_cpu_address = user_fence_cpu_address;
- rfence->submission_in_progress = false;
+ util_queue_fence_signal(&rfence->submitted);
}
static void amdgpu_fence_signalled(struct pipe_fence_handle *fence)
@@ -126,7 +130,7 @@ static void amdgpu_fence_signalled(struct pipe_fence_handle *fence)
struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
rfence->signalled = true;
- rfence->submission_in_progress = false;
+ util_queue_fence_signal(&rfence->submitted);
}
bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout,
@@ -164,8 +168,7 @@ bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout,
/* The fence might not have a number assigned if its IB is being
* submitted in the other thread right now. Wait until the submission
* is done. */
- if (!os_wait_until_zero_abs_timeout(&rfence->submission_in_progress,
- abs_timeout))
+ if (!util_queue_fence_wait_timeout(&rfence->submitted, abs_timeout))
return false;
user_fence_cpu = rfence->user_fence_cpu_address;
@@ -1029,6 +1032,8 @@ static void amdgpu_cs_add_fence_dependency(struct radeon_winsys_cs *rws,
struct amdgpu_cs_context *cs = acs->csc;
struct amdgpu_fence *fence = (struct amdgpu_fence*)pfence;
+ util_queue_fence_wait(&fence->submitted);
+
if (is_noop_fence_dependency(acs, fence))
return;
@@ -1304,7 +1309,7 @@ bo_list_error:
continue;
}
- assert(!fence->submission_in_progress);
+ assert(util_queue_fence_is_signalled(&fence->submitted));
amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[num++]);
}
@@ -1327,7 +1332,7 @@ bo_list_error:
if (!amdgpu_fence_is_syncobj(fence))
continue;
- assert(!fence->submission_in_progress);
+ assert(util_queue_fence_is_signalled(&fence->submitted));
sem_chunk[num++].handle = fence->syncobj;
}
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
index b7bc9a20bbf..fbf44b36610 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
@@ -144,9 +144,11 @@ struct amdgpu_fence {
struct amdgpu_cs_fence fence;
uint64_t *user_fence_cpu_address;
- /* If the fence is unknown due to an IB still being submitted
- * in the other thread. */
- volatile int submission_in_progress; /* bool (int for atomicity) */
+ /* If the fence has been submitted. This is unsignalled for deferred fences
+ * (cs->next_fence) and while an IB is still being submitted in the submit
+ * thread. */
+ struct util_queue_fence submitted;
+
volatile int signalled; /* bool (int for atomicity) */
};
@@ -178,6 +180,7 @@ static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst,
else
amdgpu_ctx_unref(fence->ctx);
+ util_queue_fence_destroy(&fence->submitted);
FREE(fence);
}
*rdst = rsrc;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 7220f3a0240..add88f80aae 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -786,6 +786,24 @@ radeon_drm_cs_get_next_fence(struct radeon_winsys_cs *rcs)
return fence;
}
+static void
+radeon_drm_cs_add_fence_dependency(struct radeon_winsys_cs *cs,
+ struct pipe_fence_handle *fence)
+{
+ /* TODO: Handle the following unlikely multi-threaded scenario:
+ *
+ * Thread 1 / Context 1 Thread 2 / Context 2
+ * -------------------- --------------------
+ * f = cs_get_next_fence()
+ * cs_add_fence_dependency(f)
+ * cs_flush()
+ * cs_flush()
+ *
+ * We currently assume that this does not happen because we don't support
+ * asynchronous flushes on Radeon.
+ */
+}
+
void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
{
ws->base.ctx_create = radeon_drm_ctx_create;
@@ -801,6 +819,7 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
ws->base.cs_get_next_fence = radeon_drm_cs_get_next_fence;
ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
+ ws->base.cs_add_fence_dependency = radeon_drm_cs_add_fence_dependency;
ws->base.fence_wait = radeon_fence_wait;
ws->base.fence_reference = radeon_fence_reference;
}