summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>2020-09-30 13:07:43 +0200
committerEric Engestrom <eric@engestrom.ch>2020-10-14 19:29:28 +0200
commitebb438a3d212e88e4bd643b6e6c2a4174f130f56 (patch)
tree8d89fde0e723acf80e716c51060af67d6429a379
parentf6dfeec5f40480263b70847082237fbe724b4ebc (diff)
radv: Use atomics to read query results.
The volatile pattern gives me flaky results for 32-bit builds on ChromeOS Android. This is because on 32-bit the volatile 64-bit loads gets split into 2 32-bit loads each. So if we read the lower dword first and then the upper dword, it can happen that the upper dword is already changed but the lower dword isn't yet. In particular for occlusion queries this gives false readings, as the upper dword commonly only constains the ready bit. With the GCC atomic intrinsics we get a call to __atomic_load_8 in libatomic.so which does the right thing. An alternative fix would be to explicitly split the 32-bit loads in the right order and do a bunch of retries if things change, though that gets messy quickly and for 32-bit builds only doesn't feel worth it that much. CC: mesa-stable Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6933> (cherry picked from commit 7568c97df14f8702efcc5691cd8c2fff8f9bff49)
-rw-r--r--.pick_status.json2
-rw-r--r--src/amd/vulkan/radv_query.c43
2 files changed, 24 insertions, 21 deletions
diff --git a/.pick_status.json b/.pick_status.json
index 0007c7c6a2d..96703886ccc 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -8833,7 +8833,7 @@
"description": "radv: Use atomics to read query results.",
"nominated": true,
"nomination_type": 0,
- "resolution": 0,
+ "resolution": 1,
"master_sha": null,
"because_sha": null
},
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index 07a1c387187..b2fb12ff66a 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -34,6 +34,7 @@
#include "radv_private.h"
#include "radv_cs.h"
#include "sid.h"
+#include "util/u_atomic.h"
#define TIMESTAMP_NOT_READY UINT64_MAX
@@ -1367,31 +1368,32 @@ VkResult radv_GetQueryPoolResults(
switch (pool->type) {
case VK_QUERY_TYPE_TIMESTAMP: {
- volatile uint64_t const *src64 = (volatile uint64_t const *)src;
- available = *src64 != TIMESTAMP_NOT_READY;
+ uint64_t const *src64 = (uint64_t const *)src;
+ uint64_t value;
- if (flags & VK_QUERY_RESULT_WAIT_BIT) {
- while (*src64 == TIMESTAMP_NOT_READY)
- ;
- available = true;
- }
+ do {
+ value = p_atomic_read(src64);
+ } while (value == TIMESTAMP_NOT_READY &&
+ (flags & VK_QUERY_RESULT_WAIT_BIT));
+
+ available = value != TIMESTAMP_NOT_READY;
if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
result = VK_NOT_READY;
if (flags & VK_QUERY_RESULT_64_BIT) {
if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
- *(uint64_t*)dest = *src64;
+ *(uint64_t*)dest = value;
dest += 8;
} else {
if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
- *(uint32_t*)dest = *(volatile uint32_t*)src;
+ *(uint32_t*)dest = (uint32_t)value;
dest += 4;
}
break;
}
case VK_QUERY_TYPE_OCCLUSION: {
- volatile uint64_t const *src64 = (volatile uint64_t const *)src;
+ uint64_t const *src64 = (uint64_t const *)src;
uint32_t db_count = device->physical_device->rad_info.num_render_backends;
uint32_t enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask;
uint64_t sample_count = 0;
@@ -1404,8 +1406,8 @@ VkResult radv_GetQueryPoolResults(
continue;
do {
- start = src64[2 * i];
- end = src64[2 * i + 1];
+ start = p_atomic_read(src64 + 2 * i);
+ end = p_atomic_read(src64 + 2 * i + 1);
} while ((!(start & (1ull << 63)) || !(end & (1ull << 63))) && (flags & VK_QUERY_RESULT_WAIT_BIT));
if (!(start & (1ull << 63)) || !(end & (1ull << 63)))
@@ -1430,16 +1432,17 @@ VkResult radv_GetQueryPoolResults(
break;
}
case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
- if (flags & VK_QUERY_RESULT_WAIT_BIT)
- while(!*(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query))
- ;
- available = *(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query);
+ const uint32_t *avail_ptr = (const uint32_t*)(pool->ptr + pool->availability_offset + 4 * query);
+
+ do {
+ available = p_atomic_read(avail_ptr);
+ } while (!available && (flags & VK_QUERY_RESULT_WAIT_BIT));
if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
result = VK_NOT_READY;
- const volatile uint64_t *start = (uint64_t*)src;
- const volatile uint64_t *stop = (uint64_t*)(src + pipelinestat_block_size);
+ const uint64_t *start = (uint64_t*)src;
+ const uint64_t *stop = (uint64_t*)(src + pipelinestat_block_size);
if (flags & VK_QUERY_RESULT_64_BIT) {
uint64_t *dst = (uint64_t*)dest;
dest += util_bitcount(pool->pipeline_stats_mask) * 8;
@@ -1467,7 +1470,7 @@ VkResult radv_GetQueryPoolResults(
break;
}
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
- volatile uint64_t const *src64 = (volatile uint64_t const *)src;
+ uint64_t const *src64 = (uint64_t const *)src;
uint64_t num_primitives_written;
uint64_t primitive_storage_needed;
@@ -1479,7 +1482,7 @@ VkResult radv_GetQueryPoolResults(
*/
available = 1;
for (int j = 0; j < 4; j++) {
- if (!(src64[j] & 0x8000000000000000UL))
+ if (!(p_atomic_read(src64 + j) & 0x8000000000000000UL))
available = 0;
}